/work/svt-av1/Source/Lib/Codec/full_loop.c
Line | Count | Source |
1 | | /* |
2 | | * Copyright(c) 2019 Intel Corporation |
3 | | * Copyright (c) 2016, Alliance for Open Media. All rights reserved |
4 | | * |
5 | | * This source code is subject to the terms of the BSD 3-Clause Clear License and |
6 | | * the Alliance for Open Media Patent License 1.0. If the BSD 3-Clause Clear License |
7 | | * was not distributed with this source code in the LICENSE file, you can |
8 | | * obtain it at https://www.aomedia.org/license. If the Alliance for Open |
9 | | * Media Patent License 1.0 was not distributed with this source code in the |
10 | | * PATENTS file, you can obtain it at https://www.aomedia.org/license/patent-license. |
11 | | */ |
12 | | |
13 | | #include "definitions.h" |
14 | | #include "full_loop.h" |
15 | | #include "pcs.h" |
16 | | #include "rd_cost.h" |
17 | | #include "aom_dsp_rtcd.h" |
18 | | #include "sequence_control_set.h" |
19 | | #include "utility.h" |
20 | | #include "ac_bias.h" |
21 | | |
22 | | const int av1_get_tx_scale_tab[TX_SIZES_ALL] = {0, 0, 0, 1, 2, 0, 0, 0, 0, 1, 1, 2, 2, 0, 0, 0, 0, 1, 1}; |
23 | | |
24 | | void svt_aom_residual_kernel(uint8_t* input, uint32_t input_offset, uint32_t input_stride, uint8_t* pred, |
25 | | uint32_t pred_offset, uint32_t pred_stride, int16_t* residual, uint32_t residual_offset, |
26 | | uint32_t residual_stride, bool hbd, uint32_t area_width, uint32_t area_height); |
27 | | uint64_t svt_spatial_full_distortion_ssim_kernel(uint8_t* input, uint32_t input_offset, uint32_t input_stride, |
28 | | uint8_t* recon, int32_t recon_offset, uint32_t recon_stride, |
29 | | uint32_t area_width, uint32_t area_height, bool hbd, double ac_bias); |
30 | | |
31 | | void svt_aom_quantize_b_c(const TranLow* coeff_ptr, intptr_t n_coeffs, const int16_t* zbin_ptr, |
32 | | const int16_t* round_ptr, const int16_t* quant_ptr, const int16_t* quant_shift_ptr, |
33 | | TranLow* qcoeff_ptr, TranLow* dqcoeff_ptr, const int16_t* dequant_ptr, uint16_t* eob_ptr, |
34 | | const int16_t* scan, const int16_t* iscan, const QmVal* qm_ptr, const QmVal* iqm_ptr, |
35 | 1.20M | const int32_t log_scale) { |
36 | 1.20M | const int32_t zbins[2] = {ROUND_POWER_OF_TWO(zbin_ptr[0], log_scale), ROUND_POWER_OF_TWO(zbin_ptr[1], log_scale)}; |
37 | 1.20M | const int32_t nzbins[2] = {zbins[0] * -1, zbins[1] * -1}; |
38 | 1.20M | intptr_t non_zero_count = n_coeffs, eob = -1; |
39 | 1.20M | (void)iscan; |
40 | | |
41 | 1.20M | memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); |
42 | 1.20M | memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); |
43 | | |
44 | | // Pre-scan pass |
45 | 23.5M | for (intptr_t i = n_coeffs - 1; i >= 0; i--) { |
46 | 22.5M | const int32_t rc = scan[i]; |
47 | 22.5M | const QmVal wt = qm_ptr != NULL ? qm_ptr[rc] : (1 << AOM_QM_BITS); |
48 | 22.5M | const int32_t coeff = coeff_ptr[rc] * wt; |
49 | | |
50 | 22.5M | if (coeff < (zbins[rc != 0] * (1 << AOM_QM_BITS)) && coeff > (nzbins[rc != 0] * (1 << AOM_QM_BITS))) { |
51 | 22.3M | non_zero_count--; |
52 | 22.3M | } else { |
53 | 143k | break; |
54 | 143k | } |
55 | 22.5M | } |
56 | | |
57 | | // Quantization pass: All coefficients with index >= zero_flag are |
58 | | // skippable. Note: zero_flag can be zero. |
59 | 1.21M | for (intptr_t i = 0; i < non_zero_count; i++) { |
60 | 11.2k | const int32_t rc = scan[i]; |
61 | 11.2k | const int32_t coeff = coeff_ptr[rc]; |
62 | 11.2k | const int coeff_sign = coeff < 0 ? -1 : 0; |
63 | 11.2k | const int32_t abs_coeff = (coeff ^ coeff_sign) - coeff_sign; |
64 | | |
65 | 11.2k | const QmVal wt = qm_ptr != NULL ? qm_ptr[rc] : (1 << AOM_QM_BITS); |
66 | 11.2k | if (abs_coeff * wt >= (zbins[rc != 0] << AOM_QM_BITS)) { |
67 | 11.2k | int64_t tmp = clamp(abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], log_scale), INT16_MIN, INT16_MAX); |
68 | 11.2k | tmp *= wt; |
69 | 11.2k | int32_t tmp32 = (int32_t)(((((tmp * quant_ptr[rc != 0]) >> 16) + tmp) * quant_shift_ptr[rc != 0]) >> |
70 | 11.2k | (16 - log_scale + AOM_QM_BITS)); // quantization |
71 | 11.2k | qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign; |
72 | 11.2k | const int32_t iwt = iqm_ptr != NULL ? iqm_ptr[rc] : (1 << AOM_QM_BITS); |
73 | 11.2k | const int32_t dequant = (dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS; |
74 | 11.2k | const TranLow abs_dqcoeff = (tmp32 * dequant) >> log_scale; |
75 | 11.2k | dqcoeff_ptr[rc] = (TranLow)((abs_dqcoeff ^ coeff_sign) - coeff_sign); |
76 | | |
77 | 11.2k | if (tmp32) { |
78 | 11.2k | eob = i; |
79 | 11.2k | } |
80 | 11.2k | } |
81 | 11.2k | } |
82 | 1.20M | *eob_ptr = (uint16_t)(eob + 1); |
83 | 1.20M | } |
84 | | |
85 | | void svt_aom_highbd_quantize_b_c(const TranLow* coeff_ptr, intptr_t n_coeffs, const int16_t* zbin_ptr, |
86 | | const int16_t* round_ptr, const int16_t* quant_ptr, const int16_t* quant_shift_ptr, |
87 | | TranLow* qcoeff_ptr, TranLow* dqcoeff_ptr, const int16_t* dequant_ptr, |
88 | | uint16_t* eob_ptr, const int16_t* scan, const int16_t* iscan, const QmVal* qm_ptr, |
89 | 0 | const QmVal* iqm_ptr, const int32_t log_scale) { |
90 | 0 | intptr_t eob = -1; |
91 | 0 | (void)iscan; |
92 | |
|
93 | 0 | memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); |
94 | 0 | memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); |
95 | |
|
96 | 0 | const int32_t zbins[2] = {ROUND_POWER_OF_TWO(zbin_ptr[0], log_scale), ROUND_POWER_OF_TWO(zbin_ptr[1], log_scale)}; |
97 | 0 | const int32_t nzbins[2] = {zbins[0] * -1, zbins[1] * -1}; |
98 | 0 | intptr_t idx_arr[4096]; |
99 | 0 | int idx = 0; |
100 | | // Pre-scan pass |
101 | 0 | for (intptr_t i = 0; i < n_coeffs; i++) { |
102 | 0 | const int32_t rc = scan[i]; |
103 | 0 | const QmVal wt = qm_ptr != NULL ? qm_ptr[rc] : (1 << AOM_QM_BITS); |
104 | 0 | const int32_t coeff = coeff_ptr[rc] * wt; |
105 | | |
106 | | // If the coefficient is out of the base ZBIN range, keep it for |
107 | | // quantization. |
108 | 0 | if (coeff >= (zbins[rc != 0] * (1 << AOM_QM_BITS)) || coeff <= (nzbins[rc != 0] * (1 << AOM_QM_BITS))) { |
109 | 0 | idx_arr[idx++] = i; |
110 | 0 | } |
111 | 0 | } |
112 | | |
113 | | // Quantization pass: only process the coefficients selected in |
114 | | // pre-scan pass. Note: idx can be zero. |
115 | 0 | for (int i = 0; i < idx; i++) { |
116 | 0 | const int32_t rc = scan[idx_arr[i]]; |
117 | 0 | const int32_t coeff = coeff_ptr[rc]; |
118 | 0 | const int coeff_sign = coeff < 0 ? -1 : 0; |
119 | 0 | const QmVal wt = qm_ptr != NULL ? qm_ptr[rc] : (1 << AOM_QM_BITS); |
120 | 0 | const QmVal iwt = iqm_ptr != NULL ? iqm_ptr[rc] : (1 << AOM_QM_BITS); |
121 | 0 | const int32_t abs_coeff = (coeff ^ coeff_sign) - coeff_sign; |
122 | 0 | const int64_t tmp1 = abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], log_scale); |
123 | 0 | const int64_t tmpw = tmp1 * wt; |
124 | 0 | const int64_t tmp2 = ((tmpw * quant_ptr[rc != 0]) >> 16) + tmpw; |
125 | 0 | const int32_t abs_qcoeff = (int32_t)((tmp2 * quant_shift_ptr[rc != 0]) >> (16 - log_scale + AOM_QM_BITS)); |
126 | 0 | qcoeff_ptr[rc] = (TranLow)((abs_qcoeff ^ coeff_sign) - coeff_sign); |
127 | 0 | int32_t dequant = (dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS; |
128 | 0 | const TranLow abs_dqcoeff = (abs_qcoeff * dequant) >> log_scale; |
129 | 0 | dqcoeff_ptr[rc] = (TranLow)((abs_dqcoeff ^ coeff_sign) - coeff_sign); |
130 | 0 | if (abs_qcoeff) { |
131 | 0 | eob = idx_arr[i]; |
132 | 0 | } |
133 | 0 | } |
134 | |
|
135 | 0 | *eob_ptr = (uint16_t)(eob + 1); |
136 | 0 | } |
137 | | |
138 | | #if CONFIG_ENABLE_HIGH_BIT_DEPTH |
139 | | void svt_av1_highbd_quantize_b_facade(const TranLow* coeff_ptr, intptr_t n_coeffs, const MacroblockPlane* p, |
140 | | TranLow* qcoeff_ptr, TranLow* dqcoeff_ptr, uint16_t* eob_ptr, const ScanOrder* sc, |
141 | 0 | const QuantParam* qparam) { |
142 | 0 | const QmVal* qm_ptr = qparam->qmatrix; |
143 | 0 | const QmVal* iqm_ptr = qparam->iqmatrix; |
144 | 0 | if (qm_ptr || iqm_ptr) { |
145 | 0 | svt_av1_highbd_quantize_b_qm(coeff_ptr, |
146 | 0 | n_coeffs, |
147 | 0 | p->zbin_qtx, |
148 | 0 | p->round_qtx, |
149 | 0 | p->quant_qtx, |
150 | 0 | p->quant_shift_qtx, |
151 | 0 | qcoeff_ptr, |
152 | 0 | dqcoeff_ptr, |
153 | 0 | p->dequant_qtx, |
154 | 0 | eob_ptr, |
155 | 0 | sc->scan, |
156 | 0 | sc->iscan, |
157 | 0 | qm_ptr, |
158 | 0 | iqm_ptr, |
159 | 0 | qparam->log_scale); |
160 | 0 | } else { |
161 | 0 | svt_aom_highbd_quantize_b(coeff_ptr, |
162 | 0 | n_coeffs, |
163 | 0 | p->zbin_qtx, |
164 | 0 | p->round_qtx, |
165 | 0 | p->quant_qtx, |
166 | 0 | p->quant_shift_qtx, |
167 | 0 | qcoeff_ptr, |
168 | 0 | dqcoeff_ptr, |
169 | 0 | p->dequant_qtx, |
170 | 0 | eob_ptr, |
171 | 0 | sc->scan, |
172 | 0 | sc->iscan, |
173 | 0 | NULL, |
174 | 0 | NULL, |
175 | 0 | qparam->log_scale); |
176 | 0 | } |
177 | 0 | assert(qparam->log_scale <= 2); |
178 | 0 | } |
179 | | #endif |
180 | | |
181 | | static void av1_quantize_b_facade_ii(const TranLow* coeff_ptr, intptr_t n_coeffs, const MacroblockPlane* p, |
182 | | TranLow* qcoeff_ptr, TranLow* dqcoeff_ptr, uint16_t* eob_ptr, const ScanOrder* sc, |
183 | 1.19M | const QuantParam* qparam) { |
184 | 1.19M | const QmVal* qm_ptr = qparam->qmatrix; |
185 | 1.19M | const QmVal* iqm_ptr = qparam->iqmatrix; |
186 | 1.19M | if (qm_ptr || iqm_ptr) { |
187 | 0 | svt_av1_quantize_b_qm(coeff_ptr, |
188 | 0 | n_coeffs, |
189 | 0 | p->zbin_qtx, |
190 | 0 | p->round_qtx, |
191 | 0 | p->quant_qtx, |
192 | 0 | p->quant_shift_qtx, |
193 | 0 | qcoeff_ptr, |
194 | 0 | dqcoeff_ptr, |
195 | 0 | p->dequant_qtx, |
196 | 0 | eob_ptr, |
197 | 0 | sc->scan, |
198 | 0 | sc->iscan, |
199 | 0 | qm_ptr, |
200 | 0 | iqm_ptr, |
201 | 0 | qparam->log_scale); |
202 | 1.19M | } else { |
203 | 1.19M | svt_aom_quantize_b(coeff_ptr, |
204 | 1.19M | n_coeffs, |
205 | 1.19M | p->zbin_qtx, |
206 | 1.19M | p->round_qtx, |
207 | 1.19M | p->quant_qtx, |
208 | 1.19M | p->quant_shift_qtx, |
209 | 1.19M | qcoeff_ptr, |
210 | 1.19M | dqcoeff_ptr, |
211 | 1.19M | p->dequant_qtx, |
212 | 1.19M | eob_ptr, |
213 | 1.19M | sc->scan, |
214 | 1.19M | sc->iscan, |
215 | 1.19M | NULL, |
216 | 1.19M | NULL, |
217 | 1.19M | qparam->log_scale); |
218 | 1.19M | } |
219 | 1.19M | assert(qparam->log_scale <= 2); |
220 | 1.19M | } |
221 | | |
222 | | static void quantize_fp_helper_c(const TranLow* coeff_ptr, intptr_t n_coeffs, const int16_t* zbin_ptr, |
223 | | const int16_t* round_ptr, const int16_t* quant_ptr, const int16_t* quant_shift_ptr, |
224 | | TranLow* qcoeff_ptr, TranLow* dqcoeff_ptr, const int16_t* dequant_ptr, |
225 | | uint16_t* eob_ptr, const int16_t* scan, const int16_t* iscan, const QmVal* qm_ptr, |
226 | 22.6k | const QmVal* iqm_ptr, int log_scale) { |
227 | 22.6k | int i, eob = -1; |
228 | 22.6k | const int rounding[2] = {ROUND_POWER_OF_TWO(round_ptr[0], log_scale), ROUND_POWER_OF_TWO(round_ptr[1], log_scale)}; |
229 | 22.6k | (void)zbin_ptr; |
230 | 22.6k | (void)quant_shift_ptr; |
231 | 22.6k | (void)iscan; |
232 | | |
233 | 22.6k | memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); |
234 | 22.6k | memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); |
235 | | |
236 | 22.6k | if (qm_ptr == NULL && iqm_ptr == NULL) { |
237 | 10.1M | for (i = 0; i < n_coeffs; i++) { |
238 | 10.0M | const int rc = scan[i]; |
239 | 10.0M | const int32_t thresh = (int32_t)(dequant_ptr[rc != 0]); |
240 | 10.0M | const int coeff = coeff_ptr[rc]; |
241 | 10.0M | const int coeff_sign = coeff < 0 ? -1 : 0; |
242 | 10.0M | int64_t abs_coeff = (coeff ^ coeff_sign) - coeff_sign; |
243 | 10.0M | int tmp32 = 0; |
244 | 10.0M | if ((abs_coeff << (1 + log_scale)) >= thresh) { |
245 | 10.4k | abs_coeff = clamp64(abs_coeff + rounding[rc != 0], INT16_MIN, INT16_MAX); |
246 | 10.4k | tmp32 = (int)((abs_coeff * quant_ptr[rc != 0]) >> (16 - log_scale)); |
247 | 10.4k | if (tmp32) { |
248 | 10.3k | qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign; |
249 | 10.3k | const TranLow abs_dqcoeff = (tmp32 * dequant_ptr[rc != 0]) >> log_scale; |
250 | 10.3k | dqcoeff_ptr[rc] = (abs_dqcoeff ^ coeff_sign) - coeff_sign; |
251 | 10.3k | } |
252 | 10.4k | } |
253 | 10.0M | if (tmp32) { |
254 | 10.3k | eob = i; |
255 | 10.3k | } |
256 | 10.0M | } |
257 | 22.6k | } else { |
258 | | // Quantization pass: All coefficients with index >= zero_flag are |
259 | | // skippable. Note: zero_flag can be zero. |
260 | 0 | for (i = 0; i < n_coeffs; i++) { |
261 | 0 | const int rc = scan[i]; |
262 | 0 | const int coeff = coeff_ptr[rc]; |
263 | 0 | const QmVal wt = qm_ptr ? qm_ptr[rc] : (1 << AOM_QM_BITS); |
264 | 0 | const QmVal iwt = iqm_ptr ? iqm_ptr[rc] : (1 << AOM_QM_BITS); |
265 | 0 | const int dequant = (dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS; |
266 | 0 | const int coeff_sign = coeff < 0 ? -1 : 0; |
267 | 0 | int64_t abs_coeff = (coeff ^ coeff_sign) - coeff_sign; |
268 | 0 | int tmp32 = 0; |
269 | 0 | if (abs_coeff * wt >= (dequant_ptr[rc != 0] << (AOM_QM_BITS - (1 + log_scale)))) { |
270 | 0 | abs_coeff += rounding[rc != 0]; |
271 | 0 | abs_coeff = clamp64(abs_coeff, INT16_MIN, INT16_MAX); |
272 | 0 | tmp32 = (int)((abs_coeff * wt * quant_ptr[rc != 0]) >> (16 - log_scale + AOM_QM_BITS)); |
273 | 0 | qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign; |
274 | 0 | const TranLow abs_dqcoeff = (tmp32 * dequant) >> log_scale; |
275 | 0 | dqcoeff_ptr[rc] = (abs_dqcoeff ^ coeff_sign) - coeff_sign; |
276 | 0 | } |
277 | |
|
278 | 0 | if (tmp32) { |
279 | 0 | eob = i; |
280 | 0 | } |
281 | 0 | } |
282 | 0 | } |
283 | 22.6k | *eob_ptr = eob + 1; |
284 | 22.6k | } |
285 | | |
286 | | void svt_av1_quantize_fp_c(const TranLow* coeff_ptr, intptr_t n_coeffs, const int16_t* zbin_ptr, |
287 | | const int16_t* round_ptr, const int16_t* quant_ptr, const int16_t* quant_shift_ptr, |
288 | | TranLow* qcoeff_ptr, TranLow* dqcoeff_ptr, const int16_t* dequant_ptr, uint16_t* eob_ptr, |
289 | 13.7k | const int16_t* scan, const int16_t* iscan) { |
290 | 13.7k | quantize_fp_helper_c(coeff_ptr, |
291 | 13.7k | n_coeffs, |
292 | 13.7k | zbin_ptr, |
293 | 13.7k | round_ptr, |
294 | 13.7k | quant_ptr, |
295 | 13.7k | quant_shift_ptr, |
296 | 13.7k | qcoeff_ptr, |
297 | 13.7k | dqcoeff_ptr, |
298 | 13.7k | dequant_ptr, |
299 | 13.7k | eob_ptr, |
300 | 13.7k | scan, |
301 | 13.7k | iscan, |
302 | 13.7k | NULL, |
303 | 13.7k | NULL, |
304 | 13.7k | 0); |
305 | 13.7k | } |
306 | | |
307 | | void svt_av1_quantize_fp_qm_c(const TranLow* coeff_ptr, intptr_t n_coeffs, const int16_t* zbin_ptr, |
308 | | const int16_t* round_ptr, const int16_t* quant_ptr, const int16_t* quant_shift_ptr, |
309 | | TranLow* qcoeff_ptr, TranLow* dqcoeff_ptr, const int16_t* dequant_ptr, uint16_t* eob_ptr, |
310 | | const int16_t* scan, const int16_t* iscan, const QmVal* qm_ptr, const QmVal* iqm_ptr, |
311 | 0 | int16_t log_scale) { |
312 | 0 | quantize_fp_helper_c(coeff_ptr, |
313 | 0 | n_coeffs, |
314 | 0 | zbin_ptr, |
315 | 0 | round_ptr, |
316 | 0 | quant_ptr, |
317 | 0 | quant_shift_ptr, |
318 | 0 | qcoeff_ptr, |
319 | 0 | dqcoeff_ptr, |
320 | 0 | dequant_ptr, |
321 | 0 | eob_ptr, |
322 | 0 | scan, |
323 | 0 | iscan, |
324 | 0 | qm_ptr, |
325 | 0 | iqm_ptr, |
326 | 0 | log_scale); |
327 | 0 | } |
328 | | |
329 | | static void highbd_quantize_fp_helper_c(const TranLow* coeff_ptr, intptr_t count, const int16_t* zbin_ptr, |
330 | | const int16_t* round_ptr, const int16_t* quant_ptr, |
331 | | const int16_t* quant_shift_ptr, TranLow* qcoeff_ptr, TranLow* dqcoeff_ptr, |
332 | | const int16_t* dequant_ptr, uint16_t* eob_ptr, const int16_t* scan, |
333 | | const int16_t* iscan, const QmVal* qm_ptr, const QmVal* iqm_ptr, |
334 | 0 | int16_t log_scale) { |
335 | 0 | int i; |
336 | 0 | int eob = -1; |
337 | 0 | const int shift = 16 - log_scale; |
338 | 0 | (void)zbin_ptr; |
339 | 0 | (void)quant_shift_ptr; |
340 | 0 | (void)iscan; |
341 | |
|
342 | 0 | if (qm_ptr || iqm_ptr) { |
343 | | // Quantization pass: All coefficients with index >= zero_flag are |
344 | | // skippable. Note: zero_flag can be zero. |
345 | 0 | for (i = 0; i < count; i++) { |
346 | 0 | const int rc = scan[i]; |
347 | 0 | const int coeff = coeff_ptr[rc]; |
348 | 0 | const QmVal wt = qm_ptr != NULL ? qm_ptr[rc] : (1 << AOM_QM_BITS); |
349 | 0 | const QmVal iwt = iqm_ptr != NULL ? iqm_ptr[rc] : (1 << AOM_QM_BITS); |
350 | 0 | const int dequant = (dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS; |
351 | 0 | const int coeff_sign = coeff < 0 ? -1 : 0; |
352 | 0 | const int64_t abs_coeff = (coeff ^ coeff_sign) - coeff_sign; |
353 | 0 | if (abs_coeff * wt >= (dequant_ptr[rc != 0] << (AOM_QM_BITS - (1 + log_scale)))) { |
354 | 0 | const int64_t tmp = abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], log_scale); |
355 | 0 | const int abs_qcoeff = (int)((tmp * quant_ptr[rc != 0] * wt) >> (shift + AOM_QM_BITS)); |
356 | 0 | qcoeff_ptr[rc] = (TranLow)((abs_qcoeff ^ coeff_sign) - coeff_sign); |
357 | 0 | const TranLow abs_dqcoeff = (abs_qcoeff * dequant) >> log_scale; |
358 | 0 | dqcoeff_ptr[rc] = (TranLow)((abs_dqcoeff ^ coeff_sign) - coeff_sign); |
359 | 0 | if (abs_qcoeff) { |
360 | 0 | eob = i; |
361 | 0 | } |
362 | 0 | } else { |
363 | 0 | qcoeff_ptr[rc] = 0; |
364 | 0 | dqcoeff_ptr[rc] = 0; |
365 | 0 | } |
366 | 0 | } |
367 | 0 | } else { |
368 | 0 | const int log_scaled_round_arr[2] = { |
369 | 0 | ROUND_POWER_OF_TWO(round_ptr[0], log_scale), |
370 | 0 | ROUND_POWER_OF_TWO(round_ptr[1], log_scale), |
371 | 0 | }; |
372 | 0 | for (i = 0; i < count; i++) { |
373 | 0 | const int rc = scan[i]; |
374 | 0 | const int coeff = coeff_ptr[rc]; |
375 | 0 | const int rc01 = (rc != 0); |
376 | 0 | const int coeff_sign = coeff < 0 ? -1 : 0; |
377 | 0 | const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign; |
378 | 0 | const int log_scaled_round = log_scaled_round_arr[rc01]; |
379 | 0 | if ((abs_coeff << (1 + log_scale)) >= dequant_ptr[rc01]) { |
380 | 0 | const int quant = quant_ptr[rc01]; |
381 | 0 | const int dequant = dequant_ptr[rc01]; |
382 | 0 | const int64_t tmp = (int64_t)abs_coeff + log_scaled_round; |
383 | 0 | const int abs_qcoeff = (int)((tmp * quant) >> shift); |
384 | 0 | qcoeff_ptr[rc] = (TranLow)((abs_qcoeff ^ coeff_sign) - coeff_sign); |
385 | 0 | const TranLow abs_dqcoeff = (abs_qcoeff * dequant) >> log_scale; |
386 | 0 | if (abs_qcoeff) { |
387 | 0 | eob = i; |
388 | 0 | } |
389 | 0 | dqcoeff_ptr[rc] = (TranLow)((abs_dqcoeff ^ coeff_sign) - coeff_sign); |
390 | 0 | } else { |
391 | 0 | qcoeff_ptr[rc] = 0; |
392 | 0 | dqcoeff_ptr[rc] = 0; |
393 | 0 | } |
394 | 0 | } |
395 | 0 | } |
396 | 0 | *eob_ptr = eob + 1; |
397 | 0 | } |
398 | | |
399 | | void svt_av1_highbd_quantize_fp_c(const TranLow* coeff_ptr, intptr_t count, const int16_t* zbin_ptr, |
400 | | const int16_t* round_ptr, const int16_t* quant_ptr, const int16_t* quant_shift_ptr, |
401 | | TranLow* qcoeff_ptr, TranLow* dqcoeff_ptr, const int16_t* dequant_ptr, |
402 | 0 | uint16_t* eob_ptr, const int16_t* scan, const int16_t* iscan, int16_t log_scale) { |
403 | 0 | highbd_quantize_fp_helper_c(coeff_ptr, |
404 | 0 | count, |
405 | 0 | zbin_ptr, |
406 | 0 | round_ptr, |
407 | 0 | quant_ptr, |
408 | 0 | quant_shift_ptr, |
409 | 0 | qcoeff_ptr, |
410 | 0 | dqcoeff_ptr, |
411 | 0 | dequant_ptr, |
412 | 0 | eob_ptr, |
413 | 0 | scan, |
414 | 0 | iscan, |
415 | 0 | NULL, |
416 | 0 | NULL, |
417 | 0 | log_scale); |
418 | 0 | } |
419 | | |
420 | | void svt_av1_quantize_fp_32x32_c(const TranLow* coeff_ptr, intptr_t n_coeffs, const int16_t* zbin_ptr, |
421 | | const int16_t* round_ptr, const int16_t* quant_ptr, const int16_t* quant_shift_ptr, |
422 | | TranLow* qcoeff_ptr, TranLow* dqcoeff_ptr, const int16_t* dequant_ptr, |
423 | 6.09k | uint16_t* eob_ptr, const int16_t* scan, const int16_t* iscan) { |
424 | 6.09k | quantize_fp_helper_c(coeff_ptr, |
425 | 6.09k | n_coeffs, |
426 | 6.09k | zbin_ptr, |
427 | 6.09k | round_ptr, |
428 | 6.09k | quant_ptr, |
429 | 6.09k | quant_shift_ptr, |
430 | 6.09k | qcoeff_ptr, |
431 | 6.09k | dqcoeff_ptr, |
432 | 6.09k | dequant_ptr, |
433 | 6.09k | eob_ptr, |
434 | 6.09k | scan, |
435 | 6.09k | iscan, |
436 | 6.09k | NULL, |
437 | 6.09k | NULL, |
438 | 6.09k | 1); |
439 | 6.09k | } |
440 | | |
441 | | void svt_av1_quantize_fp_64x64_c(const TranLow* coeff_ptr, intptr_t n_coeffs, const int16_t* zbin_ptr, |
442 | | const int16_t* round_ptr, const int16_t* quant_ptr, const int16_t* quant_shift_ptr, |
443 | | TranLow* qcoeff_ptr, TranLow* dqcoeff_ptr, const int16_t* dequant_ptr, |
444 | 2.77k | uint16_t* eob_ptr, const int16_t* scan, const int16_t* iscan) { |
445 | 2.77k | quantize_fp_helper_c(coeff_ptr, |
446 | 2.77k | n_coeffs, |
447 | 2.77k | zbin_ptr, |
448 | 2.77k | round_ptr, |
449 | 2.77k | quant_ptr, |
450 | 2.77k | quant_shift_ptr, |
451 | 2.77k | qcoeff_ptr, |
452 | 2.77k | dqcoeff_ptr, |
453 | 2.77k | dequant_ptr, |
454 | 2.77k | eob_ptr, |
455 | 2.77k | scan, |
456 | 2.77k | iscan, |
457 | 2.77k | NULL, |
458 | 2.77k | NULL, |
459 | 2.77k | 2); |
460 | 2.77k | } |
461 | | |
462 | | void svt_av1_quantize_fp_facade(const TranLow* coeff_ptr, intptr_t n_coeffs, const MacroblockPlane* p, |
463 | | TranLow* qcoeff_ptr, TranLow* dqcoeff_ptr, uint16_t* eob_ptr, const ScanOrder* sc, |
464 | 22.6k | const QuantParam* qparam) { |
465 | 22.6k | const QmVal* qm_ptr = qparam->qmatrix; |
466 | 22.6k | const QmVal* iqm_ptr = qparam->iqmatrix; |
467 | | |
468 | 22.6k | if (qm_ptr || iqm_ptr) { |
469 | 0 | svt_av1_quantize_fp_qm(coeff_ptr, |
470 | 0 | n_coeffs, |
471 | 0 | p->zbin_qtx, |
472 | 0 | p->round_fp_qtx, |
473 | 0 | p->quant_fp_qtx, |
474 | 0 | p->quant_shift_qtx, |
475 | 0 | qcoeff_ptr, |
476 | 0 | dqcoeff_ptr, |
477 | 0 | p->dequant_qtx, |
478 | 0 | eob_ptr, |
479 | 0 | sc->scan, |
480 | 0 | sc->iscan, |
481 | 0 | qm_ptr, |
482 | 0 | iqm_ptr, |
483 | 0 | qparam->log_scale); |
484 | 22.6k | } else { |
485 | 22.6k | switch (qparam->log_scale) { |
486 | 13.7k | case 0: |
487 | 13.7k | svt_av1_quantize_fp(coeff_ptr, |
488 | 13.7k | n_coeffs, |
489 | 13.7k | p->zbin_qtx, |
490 | 13.7k | p->round_fp_qtx, |
491 | 13.7k | p->quant_fp_qtx, |
492 | 13.7k | p->quant_shift_qtx, |
493 | 13.7k | qcoeff_ptr, |
494 | 13.7k | dqcoeff_ptr, |
495 | 13.7k | p->dequant_qtx, |
496 | 13.7k | eob_ptr, |
497 | 13.7k | sc->scan, |
498 | 13.7k | sc->iscan); |
499 | 13.7k | break; |
500 | 6.09k | case 1: |
501 | 6.09k | svt_av1_quantize_fp_32x32(coeff_ptr, |
502 | 6.09k | n_coeffs, |
503 | 6.09k | p->zbin_qtx, |
504 | 6.09k | p->round_fp_qtx, |
505 | 6.09k | p->quant_fp_qtx, |
506 | 6.09k | p->quant_shift_qtx, |
507 | 6.09k | qcoeff_ptr, |
508 | 6.09k | dqcoeff_ptr, |
509 | 6.09k | p->dequant_qtx, |
510 | 6.09k | eob_ptr, |
511 | 6.09k | sc->scan, |
512 | 6.09k | sc->iscan); |
513 | 6.09k | break; |
514 | 2.77k | case 2: |
515 | 2.77k | svt_av1_quantize_fp_64x64(coeff_ptr, |
516 | 2.77k | n_coeffs, |
517 | 2.77k | p->zbin_qtx, |
518 | 2.77k | p->round_fp_qtx, |
519 | 2.77k | p->quant_fp_qtx, |
520 | 2.77k | p->quant_shift_qtx, |
521 | 2.77k | qcoeff_ptr, |
522 | 2.77k | dqcoeff_ptr, |
523 | 2.77k | p->dequant_qtx, |
524 | 2.77k | eob_ptr, |
525 | 2.77k | sc->scan, |
526 | 2.77k | sc->iscan); |
527 | 2.77k | break; |
528 | 0 | default: |
529 | 0 | assert(0); |
530 | 22.6k | } |
531 | 22.6k | } |
532 | 22.6k | } |
533 | | |
534 | | #if CONFIG_ENABLE_HIGH_BIT_DEPTH |
535 | | void svt_av1_highbd_quantize_fp_facade(const TranLow* coeff_ptr, intptr_t n_coeffs, const MacroblockPlane* p, |
536 | | TranLow* qcoeff_ptr, TranLow* dqcoeff_ptr, uint16_t* eob_ptr, |
537 | 0 | const ScanOrder* sc, const QuantParam* qparam) { |
538 | 0 | const QmVal* qm_ptr = qparam->qmatrix; |
539 | 0 | const QmVal* iqm_ptr = qparam->iqmatrix; |
540 | 0 | if (qm_ptr != NULL && iqm_ptr != NULL) { |
541 | 0 | svt_av1_highbd_quantize_fp_qm(coeff_ptr, |
542 | 0 | n_coeffs, |
543 | 0 | p->zbin_qtx, |
544 | 0 | p->round_fp_qtx, |
545 | 0 | p->quant_fp_qtx, |
546 | 0 | p->quant_shift_qtx, |
547 | 0 | qcoeff_ptr, |
548 | 0 | dqcoeff_ptr, |
549 | 0 | p->dequant_qtx, |
550 | 0 | eob_ptr, |
551 | 0 | sc->scan, |
552 | 0 | sc->iscan, |
553 | 0 | qm_ptr, |
554 | 0 | iqm_ptr, |
555 | 0 | qparam->log_scale); |
556 | 0 | } else { |
557 | 0 | svt_av1_highbd_quantize_fp(coeff_ptr, |
558 | 0 | n_coeffs, |
559 | 0 | p->zbin_qtx, |
560 | 0 | p->round_fp_qtx, |
561 | 0 | p->quant_fp_qtx, |
562 | 0 | p->quant_shift_qtx, |
563 | 0 | qcoeff_ptr, |
564 | 0 | dqcoeff_ptr, |
565 | 0 | p->dequant_qtx, |
566 | 0 | eob_ptr, |
567 | 0 | sc->scan, |
568 | 0 | sc->iscan, |
569 | 0 | qparam->log_scale); |
570 | 0 | } |
571 | 0 | } |
572 | | #endif |
573 | | |
574 | | void svt_av1_highbd_quantize_fp_qm_c(const TranLow* coeff_ptr, intptr_t count, const int16_t* zbin_ptr, |
575 | | const int16_t* round_ptr, const int16_t* quant_ptr, const int16_t* quant_shift_ptr, |
576 | | TranLow* qcoeff_ptr, TranLow* dqcoeff_ptr, const int16_t* dequant_ptr, |
577 | | uint16_t* eob_ptr, const int16_t* scan, const int16_t* iscan, const QmVal* qm_ptr, |
578 | 0 | const QmVal* iqm_ptr, int16_t log_scale) { |
579 | 0 | highbd_quantize_fp_helper_c(coeff_ptr, |
580 | 0 | count, |
581 | 0 | zbin_ptr, |
582 | 0 | round_ptr, |
583 | 0 | quant_ptr, |
584 | 0 | quant_shift_ptr, |
585 | 0 | qcoeff_ptr, |
586 | 0 | dqcoeff_ptr, |
587 | 0 | dequant_ptr, |
588 | 0 | eob_ptr, |
589 | 0 | scan, |
590 | 0 | iscan, |
591 | 0 | qm_ptr, |
592 | 0 | iqm_ptr, |
593 | 0 | log_scale); |
594 | 0 | } |
595 | | |
596 | | static INLINE int get_lower_levels_ctx_general(int is_last, int scan_idx, int bwl, int height, const uint8_t* levels, |
597 | 8.75k | int coeff_idx, TxSize tx_size, TxClass tx_class) { |
598 | 8.75k | if (is_last) { |
599 | 8.75k | if (scan_idx == 0) { |
600 | 8.74k | return 0; |
601 | 8.74k | } |
602 | 1 | if (scan_idx <= (height << bwl) >> 3) { |
603 | 0 | return 1; |
604 | 0 | } |
605 | 1 | if (scan_idx <= (height << bwl) >> 2) { |
606 | 0 | return 2; |
607 | 0 | } |
608 | 1 | return 3; |
609 | 1 | } |
610 | 0 | return get_lower_levels_ctx(levels, coeff_idx, bwl, tx_size, tx_class); |
611 | 8.75k | } |
612 | | |
613 | 17.4k | static INLINE int32_t get_golomb_cost(int32_t abs_qc) { |
614 | 17.4k | if (abs_qc >= 1 + NUM_BASE_LEVELS + COEFF_BASE_RANGE) { |
615 | 15.8k | const int32_t r = abs_qc - COEFF_BASE_RANGE - NUM_BASE_LEVELS; |
616 | 15.8k | const int32_t length = get_msb(r) + 1; |
617 | 15.8k | return av1_cost_literal(2 * length - 1); |
618 | 15.8k | } |
619 | 1.57k | return 0; |
620 | 17.4k | } |
621 | | |
622 | 17.4k | static INLINE int get_br_cost(TranLow level, const int* coeff_lps) { |
623 | 17.4k | const int base_range = AOMMIN(level - 1 - NUM_BASE_LEVELS, COEFF_BASE_RANGE); |
624 | 17.4k | return coeff_lps[base_range] + get_golomb_cost(level); |
625 | 17.4k | } |
626 | | |
627 | | static INLINE int get_coeff_cost_general(int is_last, int ci, TranLow abs_qc, int sign, int coeff_ctx, int dc_sign_ctx, |
628 | | const LvMapCoeffCost* txb_costs, int bwl, TxClass tx_class, |
629 | 17.4k | const uint8_t* levels) { |
630 | 17.4k | int cost = 0; |
631 | 17.4k | if (is_last) { |
632 | 17.4k | cost += txb_costs->base_eob_cost[coeff_ctx][AOMMIN(abs_qc, 3) - 1]; |
633 | 17.4k | } else { |
634 | 0 | cost += txb_costs->base_cost[coeff_ctx][AOMMIN(abs_qc, 3)]; |
635 | 0 | } |
636 | 17.4k | if (abs_qc != 0) { |
637 | 17.4k | if (ci == 0) { |
638 | 17.4k | cost += txb_costs->dc_sign_cost[dc_sign_ctx][sign]; |
639 | 17.4k | } else { |
640 | 0 | cost += av1_cost_literal(1); |
641 | 0 | } |
642 | 17.4k | if (abs_qc > NUM_BASE_LEVELS) { |
643 | 17.4k | int br_ctx; |
644 | 17.4k | if (is_last) { |
645 | 17.4k | br_ctx = get_br_ctx_eob(ci, bwl, tx_class); |
646 | 17.4k | } else { |
647 | 0 | br_ctx = get_br_ctx(levels, ci, bwl, tx_class); |
648 | 0 | } |
649 | 17.4k | cost += get_br_cost(abs_qc, txb_costs->lps_cost[br_ctx]); |
650 | 17.4k | } |
651 | 17.4k | } |
652 | 17.4k | return cost; |
653 | 17.4k | } |
654 | | |
655 | 29.4k | static INLINE int64_t get_coeff_dist(TranLow tcoeff, TranLow dqcoeff, int shift) { |
656 | 29.4k | return SQR(((int64_t)tcoeff - dqcoeff) * (int64_t)(1lu << shift)); |
657 | 29.4k | } |
658 | | |
659 | 8.75k | static INLINE void get_qc_dqc_low(TranLow abs_qc, int sign, int dqv, int shift, TranLow* qc_low, TranLow* dqc_low) { |
660 | 8.75k | TranLow abs_qc_low = abs_qc - 1; |
661 | 8.75k | *qc_low = (-sign ^ abs_qc_low) + sign; |
662 | 8.75k | assert((sign ? -abs_qc_low : abs_qc_low) == *qc_low); |
663 | 8.75k | TranLow abs_dqc_low = (abs_qc_low * dqv) >> shift; |
664 | 8.75k | *dqc_low = (-sign ^ abs_dqc_low) + sign; |
665 | 8.75k | assert((sign ? -abs_dqc_low : abs_dqc_low) == *dqc_low); |
666 | 8.75k | } |
667 | | |
668 | | static const int golomb_bits_cost[32] = {0, 512, 512 * 3, 512 * 3, 512 * 5, 512 * 5, 512 * 5, 512 * 5, |
669 | | 512 * 7, 512 * 7, 512 * 7, 512 * 7, 512 * 7, 512 * 7, 512 * 7, 512 * 7, |
670 | | 512 * 9, 512 * 9, 512 * 9, 512 * 9, 512 * 9, 512 * 9, 512 * 9, 512 * 9, |
671 | | 512 * 9, 512 * 9, 512 * 9, 512 * 9, 512 * 9, 512 * 9, 512 * 9, 512 * 9}; |
672 | | static const int golomb_cost_diff[32] = {0, 512, 512 * 2, 0, 512 * 2, 0, 0, 0, 512 * 2, 0, 0, 0, 0, 0, 0, 0, |
673 | | 512 * 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; |
674 | | |
675 | 0 | static INLINE int get_br_cost_with_diff(TranLow level, const int* coeff_lps, int* diff) { |
676 | 0 | const int base_range = AOMMIN(level - 1 - NUM_BASE_LEVELS, COEFF_BASE_RANGE); |
677 | 0 | int golomb_bits = 0; |
678 | 0 | if (level <= COEFF_BASE_RANGE + 1 + NUM_BASE_LEVELS) { |
679 | 0 | *diff += coeff_lps[base_range + COEFF_BASE_RANGE + 1]; |
680 | 0 | } |
681 | |
|
682 | 0 | if (level >= COEFF_BASE_RANGE + 1 + NUM_BASE_LEVELS) { |
683 | 0 | int r = level - COEFF_BASE_RANGE - NUM_BASE_LEVELS; |
684 | 0 | if (r < 32) { |
685 | 0 | golomb_bits = golomb_bits_cost[r]; |
686 | 0 | *diff += golomb_cost_diff[r]; |
687 | 0 | } else { |
688 | 0 | golomb_bits = get_golomb_cost(level); |
689 | 0 | *diff += (r & (r - 1)) == 0 ? 1024 : 0; |
690 | 0 | } |
691 | 0 | } |
692 | |
|
693 | 0 | return coeff_lps[base_range] + golomb_bits; |
694 | 0 | } |
695 | | |
696 | | static AOM_FORCE_INLINE int get_two_coeff_cost_simple(int ci, TranLow abs_qc, int coeff_ctx, |
697 | | const LvMapCoeffCost* txb_costs, int bwl, TxClass tx_class, |
698 | 0 | const uint8_t* levels, int* cost_low) { |
699 | | // this simple version assumes the coeff's scan_idx is not DC (scan_idx != 0) |
700 | | // and not the last (scan_idx != eob - 1) |
701 | 0 | assert(ci > 0); |
702 | | //assert(abs_qc + 4 < 4); |
703 | 0 | int cost = txb_costs->base_cost[coeff_ctx][AOMMIN(abs_qc, 3)]; |
704 | 0 | int diff = 0; |
705 | 0 | if (abs_qc <= 3) { |
706 | 0 | diff = txb_costs->base_cost[coeff_ctx][abs_qc + 4]; |
707 | 0 | } |
708 | 0 | if (abs_qc) { |
709 | 0 | cost += av1_cost_literal(1); |
710 | 0 | if (abs_qc > NUM_BASE_LEVELS) { |
711 | 0 | const int br_ctx = get_br_ctx(levels, ci, bwl, tx_class); |
712 | 0 | int brcost_diff = 0; |
713 | 0 | cost += get_br_cost_with_diff(abs_qc, txb_costs->lps_cost[br_ctx], &brcost_diff); |
714 | 0 | diff += brcost_diff; |
715 | 0 | } |
716 | 0 | } |
717 | 0 | *cost_low = cost - diff; |
718 | |
|
719 | 0 | return cost; |
720 | 0 | } |
721 | | |
722 | | static INLINE int get_coeff_cost_eob(int ci, TranLow abs_qc, int sign, int coeff_ctx, int dc_sign_ctx, |
723 | 1.57k | const LvMapCoeffCost* txb_costs, int bwl, TxClass tx_class) { |
724 | 1.57k | int cost = 0; |
725 | 1.57k | cost += txb_costs->base_eob_cost[coeff_ctx][AOMMIN(abs_qc, 3) - 1]; |
726 | 1.57k | if (abs_qc != 0) { |
727 | 1.57k | if (ci == 0) { |
728 | 1.57k | cost += txb_costs->dc_sign_cost[dc_sign_ctx][sign]; |
729 | 1.57k | } else { |
730 | 0 | cost += av1_cost_literal(1); |
731 | 0 | } |
732 | 1.57k | if (abs_qc > NUM_BASE_LEVELS) { |
733 | 0 | int br_ctx; |
734 | 0 | br_ctx = get_br_ctx_eob(ci, bwl, tx_class); |
735 | 0 | cost += get_br_cost(abs_qc, txb_costs->lps_cost[br_ctx]); |
736 | 0 | } |
737 | 1.57k | } |
738 | 1.57k | return cost; |
739 | 1.57k | } |
740 | | |
741 | 8.75k | static INLINE int get_dqv(const int16_t* dequant, int coeff_idx, const QmVal* iqm_ptr) { |
742 | 8.75k | int dqv = dequant[!!coeff_idx]; |
743 | 8.75k | if (iqm_ptr != NULL) { |
744 | 0 | dqv = ((iqm_ptr[coeff_idx] * dqv) + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS; |
745 | 0 | } |
746 | 8.75k | return dqv; |
747 | 8.75k | } |
748 | | |
749 | | static AOM_FORCE_INLINE void update_coeff_eob(int* accu_rate, int64_t* accu_dist, uint16_t* eob, int* nz_num, |
750 | | int* nz_ci, int si, TxSize tx_size, TxClass tx_class, int bwl, int height, |
751 | | int dc_sign_ctx, int64_t rdmult, int shift, const int16_t* dequant, |
752 | | const int16_t* scan, const LvMapEobCost* txb_eob_costs, |
753 | | const LvMapCoeffCost* txb_costs, const TranLow* tcoeff, TranLow* qcoeff, |
754 | 0 | TranLow* dqcoeff, uint8_t* levels, int sharpness, const QmVal* iqm_ptr) { |
755 | 0 | assert(si != *eob - 1); |
756 | 0 | const int ci = scan[si]; |
757 | 0 | const int dqv = get_dqv(dequant, ci, iqm_ptr); |
758 | 0 | const TranLow qc = qcoeff[ci]; |
759 | 0 | const int coeff_ctx = get_lower_levels_ctx(levels, ci, bwl, tx_size, tx_class); |
760 | 0 | if (qc == 0) { |
761 | 0 | *accu_rate += txb_costs->base_cost[coeff_ctx][0]; |
762 | 0 | } else { |
763 | 0 | int lower_level = 0; |
764 | 0 | const TranLow abs_qc = abs(qc); |
765 | 0 | const TranLow tqc = tcoeff[ci]; |
766 | 0 | const TranLow dqc = dqcoeff[ci]; |
767 | 0 | const int sign = (qc < 0) ? 1 : 0; |
768 | 0 | const int64_t dist0 = get_coeff_dist(tqc, 0, shift); |
769 | 0 | int64_t dist = get_coeff_dist(tqc, dqc, shift) - dist0; |
770 | 0 | int rate = get_coeff_cost_general( |
771 | 0 | 0, ci, abs_qc, sign, coeff_ctx, dc_sign_ctx, txb_costs, bwl, tx_class, levels); |
772 | 0 | int64_t rd = RDCOST(rdmult, *accu_rate + rate, *accu_dist + dist); |
773 | |
|
774 | 0 | TranLow qc_low, dqc_low; |
775 | 0 | TranLow abs_qc_low; |
776 | 0 | int64_t dist_low, rd_low; |
777 | 0 | int rate_low; |
778 | 0 | if (abs_qc == 1) { |
779 | 0 | abs_qc_low = 0; |
780 | 0 | dqc_low = qc_low = 0; |
781 | 0 | dist_low = 0; |
782 | 0 | rate_low = txb_costs->base_cost[coeff_ctx][0]; |
783 | 0 | rd_low = RDCOST(rdmult, *accu_rate + rate_low, *accu_dist); |
784 | 0 | } else { |
785 | 0 | get_qc_dqc_low(abs_qc, sign, dqv, shift, &qc_low, &dqc_low); |
786 | 0 | abs_qc_low = abs_qc - 1; |
787 | 0 | dist_low = get_coeff_dist(tqc, dqc_low, shift) - dist0; |
788 | 0 | rate_low = get_coeff_cost_general( |
789 | 0 | 0, ci, abs_qc_low, sign, coeff_ctx, dc_sign_ctx, txb_costs, bwl, tx_class, levels); |
790 | 0 | rd_low = RDCOST(rdmult, *accu_rate + rate_low, *accu_dist + dist_low); |
791 | 0 | } |
792 | |
|
793 | 0 | int lower_level_new_eob = 0; |
794 | 0 | const int new_eob = si + 1; |
795 | 0 | const int coeff_ctx_new_eob = get_lower_levels_ctx_eob(bwl, height, si); |
796 | 0 | const int new_eob_cost = get_eob_cost(new_eob, txb_eob_costs, txb_costs, tx_class); |
797 | 0 | int rate_coeff_eob = new_eob_cost + |
798 | 0 | get_coeff_cost_eob(ci, abs_qc, sign, coeff_ctx_new_eob, dc_sign_ctx, txb_costs, bwl, tx_class); |
799 | 0 | int64_t dist_new_eob = dist; |
800 | 0 | int64_t rd_new_eob = RDCOST(rdmult, rate_coeff_eob, dist_new_eob); |
801 | |
|
802 | 0 | if (abs_qc_low > 0) { |
803 | 0 | const int rate_coeff_eob_low = new_eob_cost + |
804 | 0 | get_coeff_cost_eob(ci, abs_qc_low, sign, coeff_ctx_new_eob, dc_sign_ctx, txb_costs, bwl, tx_class); |
805 | 0 | const int64_t dist_new_eob_low = dist_low; |
806 | 0 | const int64_t rd_new_eob_low = RDCOST(rdmult, rate_coeff_eob_low, dist_new_eob_low); |
807 | 0 | if (rd_new_eob_low < rd_new_eob) { |
808 | 0 | lower_level_new_eob = 1; |
809 | 0 | rd_new_eob = rd_new_eob_low; |
810 | 0 | rate_coeff_eob = rate_coeff_eob_low; |
811 | 0 | dist_new_eob = dist_new_eob_low; |
812 | 0 | } |
813 | 0 | } |
814 | |
|
815 | 0 | if (rd_low < rd) { |
816 | 0 | lower_level = 1; |
817 | 0 | rd = rd_low; |
818 | 0 | rate = rate_low; |
819 | 0 | dist = dist_low; |
820 | 0 | } |
821 | |
|
822 | 0 | if (sharpness == 0 && rd_new_eob < rd) { |
823 | 0 | for (int ni = 0; ni < *nz_num; ++ni) { |
824 | 0 | int last_ci = nz_ci[ni]; |
825 | 0 | levels[get_padded_idx(last_ci, bwl)] = 0; |
826 | 0 | qcoeff[last_ci] = 0; |
827 | 0 | dqcoeff[last_ci] = 0; |
828 | 0 | } |
829 | 0 | *eob = new_eob; |
830 | 0 | *nz_num = 0; |
831 | 0 | *accu_rate = rate_coeff_eob; |
832 | 0 | *accu_dist = dist_new_eob; |
833 | 0 | lower_level = lower_level_new_eob; |
834 | 0 | } else { |
835 | 0 | *accu_rate += rate; |
836 | 0 | *accu_dist += dist; |
837 | 0 | } |
838 | |
|
839 | 0 | if (lower_level) { |
840 | 0 | qcoeff[ci] = qc_low; |
841 | 0 | dqcoeff[ci] = dqc_low; |
842 | 0 | levels[get_padded_idx(ci, bwl)] = AOMMIN(abs_qc_low, INT8_MAX); |
843 | 0 | } |
844 | 0 | if (qcoeff[ci]) { |
845 | 0 | nz_ci[*nz_num] = ci; |
846 | 0 | ++*nz_num; |
847 | 0 | } |
848 | 0 | } |
849 | 0 | } |
850 | | |
851 | | static INLINE void update_coeff_general(int* accu_rate, int64_t* accu_dist, int si, int eob, TxSize tx_size, |
852 | | TxClass tx_class, int bwl, int height, int64_t rdmult, int shift, |
853 | | int dc_sign_ctx, const int16_t* dequant, const int16_t* scan, |
854 | | const LvMapCoeffCost* txb_costs, const TranLow* tcoeff, TranLow* qcoeff, |
855 | 8.75k | TranLow* dqcoeff, uint8_t* levels, const QmVal* iqm_ptr) { |
856 | 8.75k | const int ci = scan[si]; |
857 | 8.75k | const int dqv = get_dqv(dequant, ci, iqm_ptr); |
858 | 8.75k | const TranLow qc = qcoeff[ci]; |
859 | 8.75k | const int is_last = si == (eob - 1); |
860 | 8.75k | const int coeff_ctx = get_lower_levels_ctx_general(is_last, si, bwl, height, levels, ci, tx_size, tx_class); |
861 | 8.75k | if (qc == 0) { |
862 | 0 | *accu_rate += txb_costs->base_cost[coeff_ctx][0]; |
863 | 8.75k | } else { |
864 | 8.75k | const int sign = (qc < 0) ? 1 : 0; |
865 | 8.75k | const TranLow abs_qc = abs(qc); |
866 | 8.75k | const TranLow tqc = tcoeff[ci]; |
867 | 8.75k | const TranLow dqc = dqcoeff[ci]; |
868 | 8.75k | const int64_t dist = get_coeff_dist(tqc, dqc, shift); |
869 | 8.75k | const int64_t dist0 = get_coeff_dist(tqc, 0, shift); |
870 | 8.75k | const int rate = get_coeff_cost_general( |
871 | 8.75k | is_last, ci, abs_qc, sign, coeff_ctx, dc_sign_ctx, txb_costs, bwl, tx_class, levels); |
872 | 8.75k | const int64_t rd = RDCOST(rdmult, rate, dist); |
873 | | |
874 | 8.75k | TranLow qc_low, dqc_low; |
875 | 8.75k | TranLow abs_qc_low; |
876 | 8.75k | int64_t dist_low, rd_low; |
877 | 8.75k | int rate_low; |
878 | 8.75k | if (abs_qc == 1) { |
879 | 0 | abs_qc_low = qc_low = dqc_low = 0; |
880 | 0 | dist_low = dist0; |
881 | 0 | rate_low = txb_costs->base_cost[coeff_ctx][0]; |
882 | 8.75k | } else { |
883 | 8.75k | get_qc_dqc_low(abs_qc, sign, dqv, shift, &qc_low, &dqc_low); |
884 | 8.75k | abs_qc_low = abs_qc - 1; |
885 | 8.75k | dist_low = get_coeff_dist(tqc, dqc_low, shift); |
886 | 8.75k | rate_low = get_coeff_cost_general( |
887 | 8.75k | is_last, ci, abs_qc_low, sign, coeff_ctx, dc_sign_ctx, txb_costs, bwl, tx_class, levels); |
888 | 8.75k | } |
889 | | |
890 | 8.75k | rd_low = RDCOST(rdmult, rate_low, dist_low); |
891 | 8.75k | if (rd_low < rd) { |
892 | 80 | qcoeff[ci] = qc_low; |
893 | 80 | dqcoeff[ci] = dqc_low; |
894 | 80 | levels[get_padded_idx(ci, bwl)] = AOMMIN(abs_qc_low, INT8_MAX); |
895 | 80 | *accu_rate += rate_low; |
896 | 80 | *accu_dist += dist_low - dist0; |
897 | 8.67k | } else { |
898 | 8.67k | *accu_rate += rate; |
899 | 8.67k | *accu_dist += dist - dist0; |
900 | 8.67k | } |
901 | 8.75k | } |
902 | 8.75k | } |
903 | | |
904 | | static AOM_FORCE_INLINE void update_coeff_simple(int* accu_rate, int si, int eob, TxSize tx_size, TxClass tx_class, |
905 | | int bwl, int64_t rdmult, int shift, const int16_t* dequant, |
906 | | const int16_t* scan, const LvMapCoeffCost* txb_costs, |
907 | | const TranLow* tcoeff, TranLow* qcoeff, TranLow* dqcoeff, |
908 | 0 | uint8_t* levels, const QmVal* iqm_ptr) { |
909 | 0 | const int dqv = get_dqv(dequant, scan[si], iqm_ptr); |
910 | 0 | (void)eob; |
911 | | // this simple version assumes the coeff's scan_idx is not DC (scan_idx != 0) |
912 | | // and not the last (scan_idx != eob - 1) |
913 | 0 | assert(si != eob - 1); |
914 | 0 | assert(si > 0); |
915 | 0 | const int ci = scan[si]; |
916 | 0 | const TranLow qc = qcoeff[ci]; |
917 | 0 | const int coeff_ctx = get_lower_levels_ctx(levels, ci, bwl, tx_size, tx_class); |
918 | 0 | if (qc == 0) { |
919 | 0 | *accu_rate += txb_costs->base_cost[coeff_ctx][0]; |
920 | 0 | } else { |
921 | 0 | const TranLow abs_qc = abs(qc); |
922 | 0 | const TranLow abs_tqc = abs(tcoeff[ci]); |
923 | 0 | const TranLow abs_dqc = abs(dqcoeff[ci]); |
924 | 0 | int rate_low = 0; |
925 | 0 | const int rate = get_two_coeff_cost_simple(ci, abs_qc, coeff_ctx, txb_costs, bwl, tx_class, levels, &rate_low); |
926 | 0 | if (abs_dqc < abs_tqc) { |
927 | 0 | *accu_rate += rate; |
928 | 0 | return; |
929 | 0 | } |
930 | | |
931 | 0 | const int64_t dist = get_coeff_dist(abs_tqc, abs_dqc, shift); |
932 | 0 | const int64_t rd = RDCOST(rdmult, rate, dist); |
933 | |
|
934 | 0 | const TranLow abs_qc_low = abs_qc - 1; |
935 | 0 | const TranLow abs_dqc_low = (abs_qc_low * dqv) >> shift; |
936 | 0 | const int64_t dist_low = get_coeff_dist(abs_tqc, abs_dqc_low, shift); |
937 | 0 | const int64_t rd_low = RDCOST(rdmult, rate_low, dist_low); |
938 | |
|
939 | 0 | if (rd_low < rd) { |
940 | 0 | const int sign = (qc < 0) ? 1 : 0; |
941 | 0 | qcoeff[ci] = (-sign ^ abs_qc_low) + sign; |
942 | 0 | dqcoeff[ci] = (-sign ^ abs_dqc_low) + sign; |
943 | 0 | levels[get_padded_idx(ci, bwl)] = AOMMIN(abs_qc_low, INT8_MAX); |
944 | 0 | *accu_rate += rate_low; |
945 | 0 | } else { |
946 | 0 | *accu_rate += rate; |
947 | 0 | } |
948 | 0 | } |
949 | 0 | } |
950 | | |
951 | | static INLINE void update_skip(int* accu_rate, int64_t accu_dist, uint16_t* eob, int nz_num, int* nz_ci, int64_t rdmult, |
952 | 10.3k | int skip_cost, int non_skip_cost, TranLow* qcoeff, TranLow* dqcoeff, int sharpness) { |
953 | 10.3k | const int64_t rd = RDCOST(rdmult, *accu_rate + non_skip_cost, accu_dist); |
954 | 10.3k | const int64_t rd_new_eob = RDCOST(rdmult, skip_cost, 0); |
955 | 10.3k | if (sharpness == 0 && rd_new_eob < rd) { |
956 | 0 | for (int i = 0; i < nz_num; ++i) { |
957 | 0 | const int ci = nz_ci[i]; |
958 | 0 | qcoeff[ci] = 0; |
959 | 0 | dqcoeff[ci] = 0; |
960 | | // no need to set up levels because this is the last step |
961 | | // levels[get_padded_idx(ci, bwl)] = 0; |
962 | 0 | } |
963 | 0 | *accu_rate = 0; |
964 | 0 | *eob = 0; |
965 | 0 | } |
966 | 10.3k | } |
967 | | |
968 | | enum { |
969 | | NO_AQ = 0, |
970 | | VARIANCE_AQ = 1, |
971 | | COMPLEXITY_AQ = 2, |
972 | | CYCLIC_REFRESH_AQ = 3, |
973 | | AQ_MODE_COUNT // This should always be the last member of the enum |
974 | | } UENUM1BYTE(AQ_MODE); |
975 | | |
976 | | enum { |
977 | | NO_DELTA_Q = 0, |
978 | | DELTA_Q_ONLY = 1, |
979 | | DELTA_Q_LF = 2, |
980 | | DELTAQ_MODE_COUNT // This should always be the last member of the enum |
981 | | } UENUM1BYTE(DELTAQ_MODE); |
982 | | |
983 | | // These numbers are empirically obtained. |
984 | | #if TUNE_CHROMA_SSIM |
985 | | static const int plane_rd_mult[2][REF_TYPES][PLANE_TYPES] = {{ |
986 | | {17, 13}, |
987 | | {16, 10}, |
988 | | }, |
989 | | { |
990 | | {17, 13}, |
991 | | {16, 10}, |
992 | | }}; |
993 | | #else |
994 | | static const int plane_rd_mult[2][REF_TYPES][PLANE_TYPES] = {{{17, 20}, {16, 20}}, |
995 | | { |
996 | | {17, 13}, |
997 | | {16, 10}, |
998 | | }}; |
999 | | #endif |
1000 | | |
1001 | | /* |
1002 | | * Reduce the number of non-zero quantized coefficients before getting to the main/complex RDOQ stage |
1003 | | * (it performs an early check of whether to zero out each of the non-zero quantized coefficients, |
1004 | | * and updates the quantized coeffs if it is determined it can be zeroed out). |
1005 | | */ |
1006 | | static INLINE void update_coeff_eob_fast(uint16_t* eob, int shift, const int16_t* dequant_ptr, const int16_t* scan, |
1007 | 0 | const TranLow* coeff_ptr, TranLow* qcoeff_ptr, TranLow* dqcoeff_ptr) { |
1008 | 0 | int eob_out = *eob; |
1009 | 0 | const int zbin[2] = {dequant_ptr[0] + ROUND_POWER_OF_TWO(dequant_ptr[0] * 70, 7), |
1010 | 0 | dequant_ptr[1] + ROUND_POWER_OF_TWO(dequant_ptr[1] * 70, 7)}; |
1011 | 0 | for (int i = *eob - 1; i >= 0; i--) { |
1012 | 0 | const int rc = scan[i]; |
1013 | 0 | const int qcoeff = qcoeff_ptr[rc]; |
1014 | 0 | const int coeff = coeff_ptr[rc]; |
1015 | 0 | const int coeff_sign = -(coeff < 0); |
1016 | 0 | int64_t abs_coeff = (coeff ^ coeff_sign) - coeff_sign; |
1017 | 0 | if (((abs_coeff << (1 + shift)) < zbin[rc != 0]) || (qcoeff == 0)) { |
1018 | 0 | eob_out--; |
1019 | 0 | qcoeff_ptr[rc] = 0; |
1020 | 0 | dqcoeff_ptr[rc] = 0; |
1021 | 0 | } else { |
1022 | 0 | break; |
1023 | 0 | } |
1024 | 0 | } |
1025 | 0 | *eob = eob_out; |
1026 | 0 | } |
1027 | | |
1028 | | static void svt_fast_optimize_b(const TranLow* coeff_ptr, const MacroblockPlane* p, TranLow* qcoeff_ptr, |
1029 | | TranLow* dqcoeff_ptr, uint16_t* eob, TxSize tx_size, TxType tx_type) |
1030 | | |
1031 | 0 | { |
1032 | 0 | const ScanOrder* const scan_order = get_scan_order(tx_size, tx_type); |
1033 | 0 | const int16_t* scan = scan_order->scan; |
1034 | 0 | const int shift = av1_get_tx_scale_tab[tx_size]; |
1035 | 0 | update_coeff_eob_fast(eob, shift, p->dequant_qtx, scan, coeff_ptr, qcoeff_ptr, dqcoeff_ptr); |
1036 | 0 | } |
1037 | | |
1038 | | static void svt_av1_optimize_b(PictureControlSet* pcs, ModeDecisionContext* ctx, int16_t txb_skip_context, |
1039 | | int16_t dc_sign_context, const TranLow* coeff_ptr, const MacroblockPlane* p, |
1040 | | TranLow* qcoeff_ptr, TranLow* dqcoeff_ptr, uint16_t* eob, const QuantParam* qparam, |
1041 | | TxSize tx_size, TxType tx_type, bool is_inter, uint8_t use_sharpness, |
1042 | 10.3k | uint8_t delta_q_present, uint8_t picture_qp, uint32_t lambda, int plane) { |
1043 | 10.3k | SequenceControlSet* scs = pcs->scs; |
1044 | 10.3k | bool allintra = scs->allintra; |
1045 | 10.3k | bool rtc = scs->static_config.rtc; |
1046 | 10.3k | int sharpness = 0; // No Sharpness |
1047 | 10.3k | const ScanOrder* const scan_order = get_scan_order(tx_size, tx_type); |
1048 | 10.3k | const int16_t* scan = scan_order->scan; |
1049 | 10.3k | const int shift = av1_get_tx_scale_tab[tx_size]; |
1050 | 10.3k | const PlaneType plane_type = plane; |
1051 | 10.3k | const TxSize txs_ctx = get_txsize_entropy_ctx(tx_size); |
1052 | 10.3k | const TxClass tx_class = tx_type_to_class[tx_type]; |
1053 | 10.3k | const int bwl = get_txb_bwl(tx_size); |
1054 | 10.3k | const int width = get_txb_wide(tx_size); |
1055 | 10.3k | const int height = get_txb_high(tx_size); |
1056 | 10.3k | assert(width == (1 << bwl)); |
1057 | 10.3k | assert(txs_ctx < TX_SIZES); |
1058 | 10.3k | const LvMapCoeffCost* txb_costs = &ctx->md_rate_est_ctx->coeff_fac_bits[txs_ctx][plane_type]; |
1059 | 10.3k | const int eob_multi_size = txsize_log2_minus4[tx_size]; |
1060 | 10.3k | const LvMapEobCost* txb_eob_costs = &ctx->md_rate_est_ctx->eob_frac_bits[eob_multi_size][plane_type]; |
1061 | 10.3k | const int non_skip_cost = txb_costs->txb_skip_cost[txb_skip_context][0]; |
1062 | 10.3k | const int skip_cost = txb_costs->txb_skip_cost[txb_skip_context][1]; |
1063 | 10.3k | const int eob_cost = get_eob_cost(*eob, txb_eob_costs, txb_costs, tx_class); |
1064 | 10.3k | int rweight = 100; |
1065 | 10.3k | const int32_t sharpness_val = CLIP3(0, 7, pcs->scs->static_config.sharpness); |
1066 | 10.3k | const int rshift = MAX(2, (int)sharpness_val); |
1067 | 10.3k | if (use_sharpness && delta_q_present && plane == 0) { |
1068 | 0 | int diff = ctx->sb_ptr->qindex - quantizer_to_qindex[picture_qp]; |
1069 | 0 | if (diff < 0) { |
1070 | 0 | sharpness = 1; |
1071 | 0 | rweight = 0; |
1072 | 0 | } |
1073 | 0 | } |
1074 | 10.3k | const int64_t rdmult = |
1075 | 10.3k | (((((int64_t)lambda * plane_rd_mult[allintra || rtc][is_inter][plane_type]) * rweight) / 100) + 2) >> rshift; |
1076 | 10.3k | uint8_t levels_buf[TX_PAD_2D]; |
1077 | 10.3k | uint8_t* const levels = set_levels(levels_buf, width); |
1078 | | |
1079 | 10.3k | if (*eob > 1) { |
1080 | 0 | svt_av1_txb_init_levels(qcoeff_ptr, width, height, levels); |
1081 | 0 | } |
1082 | 10.3k | int accu_rate = eob_cost; |
1083 | | |
1084 | 10.3k | int64_t accu_dist = 0; |
1085 | 10.3k | int si = *eob - 1; |
1086 | 10.3k | const int ci = scan[si]; |
1087 | 10.3k | const TranLow qc = qcoeff_ptr[ci]; |
1088 | 10.3k | const TranLow abs_qc = abs(qc); |
1089 | 10.3k | const int sign = qc < 0; |
1090 | 10.3k | const int max_nz_num = 4; |
1091 | 10.3k | int nz_num = 1; |
1092 | 10.3k | int nz_ci[5] = {ci, 0, 0, 0, 0}; |
1093 | 10.3k | if (abs_qc >= 2) { |
1094 | 8.75k | update_coeff_general(&accu_rate, |
1095 | 8.75k | &accu_dist, |
1096 | 8.75k | si, |
1097 | 8.75k | *eob, |
1098 | 8.75k | tx_size, |
1099 | 8.75k | tx_class, |
1100 | 8.75k | bwl, |
1101 | 8.75k | height, |
1102 | 8.75k | rdmult, |
1103 | 8.75k | shift, |
1104 | 8.75k | dc_sign_context, |
1105 | 8.75k | p->dequant_qtx, |
1106 | 8.75k | scan, |
1107 | 8.75k | txb_costs, |
1108 | 8.75k | coeff_ptr, |
1109 | 8.75k | qcoeff_ptr, |
1110 | 8.75k | dqcoeff_ptr, |
1111 | 8.75k | levels, |
1112 | 8.75k | qparam->iqmatrix); |
1113 | 8.75k | --si; |
1114 | 8.75k | } else { |
1115 | 1.57k | assert(abs_qc == 1); |
1116 | 1.57k | const int coeff_ctx = get_lower_levels_ctx_eob(bwl, height, si); |
1117 | 1.57k | accu_rate += get_coeff_cost_eob(ci, abs_qc, sign, coeff_ctx, dc_sign_context, txb_costs, bwl, tx_class); |
1118 | | |
1119 | 1.57k | const TranLow tqc = coeff_ptr[ci]; |
1120 | 1.57k | const TranLow dqc = dqcoeff_ptr[ci]; |
1121 | 1.57k | const int64_t dist = get_coeff_dist(tqc, dqc, shift); |
1122 | 1.57k | const int64_t dist0 = get_coeff_dist(tqc, 0, shift); |
1123 | 1.57k | accu_dist += dist - dist0; |
1124 | 1.57k | --si; |
1125 | 1.57k | } |
1126 | 10.3k | #define UPDATE_COEFF_EOB_CASE(tx_class_literal) \ |
1127 | 10.3k | case tx_class_literal: \ |
1128 | 10.3k | for (; si >= 0 && nz_num <= max_nz_num; --si) { \ |
1129 | 0 | update_coeff_eob(&accu_rate, \ |
1130 | 0 | &accu_dist, \ |
1131 | 0 | eob, \ |
1132 | 0 | &nz_num, \ |
1133 | 0 | nz_ci, \ |
1134 | 0 | si, \ |
1135 | 0 | tx_size, \ |
1136 | 0 | tx_class_literal, \ |
1137 | 0 | bwl, \ |
1138 | 0 | height, \ |
1139 | 0 | dc_sign_context, \ |
1140 | 0 | rdmult, \ |
1141 | 0 | shift, \ |
1142 | 0 | p->dequant_qtx, \ |
1143 | 0 | scan, \ |
1144 | 0 | txb_eob_costs, \ |
1145 | 0 | txb_costs, \ |
1146 | 0 | coeff_ptr, \ |
1147 | 0 | qcoeff_ptr, \ |
1148 | 0 | dqcoeff_ptr, \ |
1149 | 0 | levels, \ |
1150 | 0 | sharpness, \ |
1151 | 0 | qparam->iqmatrix); \ |
1152 | 0 | } \ |
1153 | 10.3k | break; |
1154 | 10.3k | switch (tx_class) { |
1155 | 10.3k | UPDATE_COEFF_EOB_CASE(TX_CLASS_2D); |
1156 | 0 | UPDATE_COEFF_EOB_CASE(TX_CLASS_HORIZ); |
1157 | 0 | UPDATE_COEFF_EOB_CASE(TX_CLASS_VERT); |
1158 | 0 | #undef UPDATE_COEFF_EOB_CASE |
1159 | 0 | default: |
1160 | 0 | assert(false); |
1161 | 10.3k | } |
1162 | | |
1163 | 10.3k | if (si == -1 && nz_num <= max_nz_num) { |
1164 | 10.3k | update_skip(&accu_rate, |
1165 | 10.3k | accu_dist, |
1166 | 10.3k | eob, |
1167 | 10.3k | nz_num, |
1168 | 10.3k | nz_ci, |
1169 | 10.3k | rdmult, |
1170 | 10.3k | skip_cost, |
1171 | 10.3k | non_skip_cost, |
1172 | 10.3k | qcoeff_ptr, |
1173 | 10.3k | dqcoeff_ptr, |
1174 | 10.3k | sharpness); |
1175 | 10.3k | } |
1176 | | |
1177 | 10.3k | int si_end = 1; // default: full RDOQ |
1178 | 10.3k | if (ctx->rdoq_ctrls.cut_off_num) { |
1179 | 10.3k | const int cut_off_coeff = AOMMAX((width * height) >> 7, |
1180 | 10.3k | (*eob * ctx->rdoq_ctrls.cut_off_num) / ctx->rdoq_ctrls.cut_off_denum); |
1181 | 10.3k | si_end = AOMMAX(1, *eob - cut_off_coeff); |
1182 | 10.3k | } |
1183 | 10.3k | #define UPDATE_COEFF_SIMPLE_CASE(tx_class_literal) \ |
1184 | 10.3k | case tx_class_literal: \ |
1185 | 10.3k | for (; si >= si_end; --si) { \ |
1186 | 0 | update_coeff_simple(&accu_rate, \ |
1187 | 0 | si, \ |
1188 | 0 | *eob, \ |
1189 | 0 | tx_size, \ |
1190 | 0 | tx_class_literal, \ |
1191 | 0 | bwl, \ |
1192 | 0 | rdmult, \ |
1193 | 0 | shift, \ |
1194 | 0 | p->dequant_qtx, \ |
1195 | 0 | scan, \ |
1196 | 0 | txb_costs, \ |
1197 | 0 | coeff_ptr, \ |
1198 | 0 | qcoeff_ptr, \ |
1199 | 0 | dqcoeff_ptr, \ |
1200 | 0 | levels, \ |
1201 | 0 | qparam->iqmatrix); \ |
1202 | 0 | } \ |
1203 | 10.3k | break; |
1204 | 10.3k | switch (tx_class) { |
1205 | 10.3k | UPDATE_COEFF_SIMPLE_CASE(TX_CLASS_2D); |
1206 | 0 | UPDATE_COEFF_SIMPLE_CASE(TX_CLASS_HORIZ); |
1207 | 0 | UPDATE_COEFF_SIMPLE_CASE(TX_CLASS_VERT); |
1208 | 0 | #undef UPDATE_COEFF_SIMPLE_CASE |
1209 | 0 | default: |
1210 | 0 | assert(false); |
1211 | 10.3k | } |
1212 | | |
1213 | | // DC position |
1214 | 10.3k | if (si == 0) { |
1215 | | // no need to update accu_dist because it's not used after this point |
1216 | 0 | int64_t dummy_dist = 0; |
1217 | 0 | update_coeff_general(&accu_rate, |
1218 | 0 | &dummy_dist, |
1219 | 0 | si, |
1220 | 0 | *eob, |
1221 | 0 | tx_size, |
1222 | 0 | tx_class, |
1223 | 0 | bwl, |
1224 | 0 | height, |
1225 | 0 | rdmult, |
1226 | 0 | shift, |
1227 | 0 | dc_sign_context, |
1228 | 0 | p->dequant_qtx, |
1229 | 0 | scan, |
1230 | 0 | txb_costs, |
1231 | 0 | coeff_ptr, |
1232 | 0 | qcoeff_ptr, |
1233 | 0 | dqcoeff_ptr, |
1234 | 0 | levels, |
1235 | 0 | qparam->iqmatrix); |
1236 | 0 | } |
1237 | 10.3k | } |
1238 | | |
1239 | 1.22M | static INLINE TxSize aom_av1_get_adjusted_tx_size(TxSize tx_size) { |
1240 | 1.22M | switch (tx_size) { |
1241 | 3.21k | case TX_64X64: |
1242 | 5.55k | case TX_64X32: |
1243 | 5.55k | case TX_32X64: |
1244 | 5.55k | return TX_32X32; |
1245 | 0 | case TX_64X16: |
1246 | 0 | return TX_32X16; |
1247 | 0 | case TX_16X64: |
1248 | 0 | return TX_16X32; |
1249 | 1.22M | default: |
1250 | 1.22M | return tx_size; |
1251 | 1.22M | } |
1252 | 1.22M | } |
1253 | | |
1254 | | void svt_aom_quantize_inv_quantize_light(PictureControlSet* pcs, int32_t* coeff, int32_t* quant_coeff, |
1255 | | int32_t* recon_coeff, uint32_t qindex, TxSize txsize, uint16_t* eob, |
1256 | 7.54k | uint32_t bit_depth, TxType tx_type) { |
1257 | 7.54k | EncodeContext* enc_ctx = pcs->scs->enc_ctx; |
1258 | | |
1259 | 7.54k | uint32_t q_index = qindex; |
1260 | | |
1261 | 7.54k | const ScanOrder* const scan_order = get_scan_order(txsize, tx_type); |
1262 | | |
1263 | 7.54k | const int32_t n_coeffs = av1_get_max_eob(txsize); |
1264 | | |
1265 | 7.54k | int32_t qmatrix_level = (IS_2D_TRANSFORM(tx_type) && pcs->ppcs->frm_hdr.quantization_params.using_qmatrix) |
1266 | | |
1267 | 7.54k | ? pcs->ppcs->frm_hdr.quantization_params.qm[PLANE_Y] |
1268 | | |
1269 | 7.54k | : NUM_QM_LEVELS - 1; |
1270 | | |
1271 | 7.54k | TxSize adjusted_tx_size = aom_av1_get_adjusted_tx_size(txsize); |
1272 | | |
1273 | 7.54k | const QmVal* q_matrix = pcs->ppcs->gqmatrix[qmatrix_level][PLANE_Y][adjusted_tx_size]; |
1274 | | |
1275 | 7.54k | const QmVal* iq_matrix = pcs->ppcs->giqmatrix[qmatrix_level][PLANE_Y][adjusted_tx_size]; |
1276 | | |
1277 | 7.54k | if (q_matrix == NULL && iq_matrix == NULL) { |
1278 | 7.54k | #if CONFIG_ENABLE_HIGH_BIT_DEPTH |
1279 | 7.54k | if (bit_depth > EB_EIGHT_BIT) { |
1280 | 0 | svt_aom_highbd_quantize_b((TranLow*)coeff, |
1281 | 0 | n_coeffs, |
1282 | 0 | enc_ctx->quants_bd.y_zbin[q_index], |
1283 | 0 | enc_ctx->quants_bd.y_round[q_index], |
1284 | 0 | enc_ctx->quants_bd.y_quant[q_index], |
1285 | 0 | enc_ctx->quants_bd.y_quant_shift[q_index], |
1286 | 0 | quant_coeff, |
1287 | 0 | (TranLow*)recon_coeff, |
1288 | 0 | enc_ctx->deq_bd.v_dequant_qtx[q_index], |
1289 | 0 | eob, |
1290 | 0 | scan_order->scan, |
1291 | 0 | scan_order->iscan, |
1292 | 0 | q_matrix, |
1293 | 0 | iq_matrix, |
1294 | 0 | av1_get_tx_scale_tab[txsize]); |
1295 | 0 | } else |
1296 | | #else |
1297 | | UNUSED(bit_depth); |
1298 | | #endif |
1299 | 7.54k | { |
1300 | 7.54k | svt_aom_quantize_b((TranLow*)coeff, |
1301 | 7.54k | n_coeffs, |
1302 | 7.54k | enc_ctx->quants_8bit.v_zbin[q_index], |
1303 | 7.54k | enc_ctx->quants_8bit.v_round[q_index], |
1304 | 7.54k | enc_ctx->quants_8bit.v_quant[q_index], |
1305 | 7.54k | enc_ctx->quants_8bit.v_quant_shift[q_index], |
1306 | 7.54k | quant_coeff, |
1307 | 7.54k | (TranLow*)recon_coeff, |
1308 | 7.54k | enc_ctx->deq_8bit.y_dequant_qtx[q_index], |
1309 | 7.54k | eob, |
1310 | 7.54k | scan_order->scan, |
1311 | 7.54k | scan_order->iscan, |
1312 | 7.54k | q_matrix, |
1313 | 7.54k | iq_matrix, |
1314 | 7.54k | av1_get_tx_scale_tab[txsize]); |
1315 | 7.54k | } |
1316 | 7.54k | } else { |
1317 | 0 | #if CONFIG_ENABLE_HIGH_BIT_DEPTH |
1318 | 0 | if (bit_depth > EB_EIGHT_BIT) { |
1319 | 0 | svt_av1_highbd_quantize_b_qm((TranLow*)coeff, |
1320 | 0 | n_coeffs, |
1321 | 0 | enc_ctx->quants_bd.y_zbin[q_index], |
1322 | 0 | enc_ctx->quants_bd.y_round[q_index], |
1323 | 0 | enc_ctx->quants_bd.y_quant[q_index], |
1324 | 0 | enc_ctx->quants_bd.y_quant_shift[q_index], |
1325 | 0 | quant_coeff, |
1326 | 0 | (TranLow*)recon_coeff, |
1327 | 0 | enc_ctx->deq_bd.v_dequant_qtx[q_index], |
1328 | 0 | eob, |
1329 | 0 | scan_order->scan, |
1330 | 0 | scan_order->iscan, |
1331 | 0 | q_matrix, |
1332 | 0 | iq_matrix, |
1333 | 0 | av1_get_tx_scale_tab[txsize]); |
1334 | 0 | } else |
1335 | 0 | #endif |
1336 | 0 | { |
1337 | 0 | svt_av1_quantize_b_qm((TranLow*)coeff, |
1338 | 0 | n_coeffs, |
1339 | 0 | enc_ctx->quants_8bit.v_zbin[q_index], |
1340 | 0 | enc_ctx->quants_8bit.v_round[q_index], |
1341 | 0 | enc_ctx->quants_8bit.v_quant[q_index], |
1342 | 0 | enc_ctx->quants_8bit.v_quant_shift[q_index], |
1343 | 0 | quant_coeff, |
1344 | 0 | (TranLow*)recon_coeff, |
1345 | 0 | enc_ctx->deq_8bit.y_dequant_qtx[q_index], |
1346 | 0 | eob, |
1347 | 0 | scan_order->scan, |
1348 | 0 | scan_order->iscan, |
1349 | 0 | q_matrix, |
1350 | 0 | iq_matrix, |
1351 | 0 | av1_get_tx_scale_tab[txsize]); |
1352 | 0 | } |
1353 | 0 | } |
1354 | 7.54k | } |
1355 | | |
1356 | | // See av1_get_txb_entropy_context in libaom |
1357 | 0 | uint8_t svt_av1_compute_cul_level_c(const int16_t* const scan, const int32_t* const quant_coeff, uint16_t* eob) { |
1358 | 0 | int32_t cul_level = 0; |
1359 | 0 | for (int32_t c = 0; c < *eob; ++c) { |
1360 | 0 | const int16_t pos = scan[c]; |
1361 | 0 | const int32_t v = quant_coeff[pos]; |
1362 | 0 | int32_t level = ABS(v); |
1363 | 0 | cul_level += level; |
1364 | | // Early exit the loop if cul_level reaches COEFF_CONTEXT_MASK |
1365 | 0 | if (cul_level >= COEFF_CONTEXT_MASK) { |
1366 | 0 | break; |
1367 | 0 | } |
1368 | 0 | } |
1369 | |
|
1370 | 0 | cul_level = AOMMIN(COEFF_CONTEXT_MASK, cul_level); |
1371 | | // DC value |
1372 | 0 | set_dc_sign(&cul_level, quant_coeff[0]); |
1373 | 0 | return (uint8_t)cul_level; |
1374 | 0 | } |
1375 | | |
1376 | | #if OPT_COEFF_SHAVING |
1377 | | |
1378 | | // Retract EOB by removing trailing low-magnitude coefficients separated by zero gaps, |
1379 | | // then compute energy on the reduced block and optionally zero it entirely if energy is low. |
1380 | | // Returns the updated EOB (0 = block became skip). |
1381 | | static INLINE uint16_t shave_coeff(int32_t* quant_buf, int32_t* recon_buf, uint16_t eob, TxSize tx_size, TxType tx_type, |
1382 | 0 | const CoeffShavingCtrls* ctrls) { |
1383 | 0 | const int16_t* const scan = get_scan_order(tx_size, tx_type)->scan; |
1384 | |
|
1385 | 0 | const int level_th = ctrls->level_threshold; |
1386 | 0 | const int gap_th = ctrls->zero_gap_threshold; |
1387 | |
|
1388 | 0 | int updated_eob = (int)eob; |
1389 | | |
1390 | | // ------------------------- |
1391 | | // Phase 1: EOB retraction |
1392 | | // ------------------------- |
1393 | 0 | while (updated_eob > 1) { |
1394 | 0 | const int last_pos = scan[updated_eob - 1]; |
1395 | 0 | const int32_t val = quant_buf[last_pos]; |
1396 | 0 | const int32_t abs_val = (val >= 0) ? val : -val; |
1397 | |
|
1398 | 0 | if (abs_val > level_th) { |
1399 | 0 | break; |
1400 | 0 | } |
1401 | | |
1402 | | // Find previous non-zero coefficient |
1403 | 0 | int next_nz = updated_eob - 2; |
1404 | 0 | while (next_nz >= 0 && quant_buf[scan[next_nz]] == 0) { |
1405 | 0 | --next_nz; |
1406 | 0 | } |
1407 | |
|
1408 | 0 | if (next_nz < 0) { |
1409 | 0 | break; |
1410 | 0 | } |
1411 | | |
1412 | | // Gap check |
1413 | 0 | const int gap = (updated_eob - 1) - next_nz - 1; |
1414 | 0 | if (gap < gap_th) { |
1415 | 0 | break; |
1416 | 0 | } |
1417 | | |
1418 | | // Zero trailing coefficient |
1419 | 0 | quant_buf[last_pos] = 0; |
1420 | 0 | recon_buf[last_pos] = 0; |
1421 | |
|
1422 | 0 | updated_eob = next_nz + 1; |
1423 | 0 | } |
1424 | | |
1425 | | // ------------------------- |
1426 | | // Phase 2: energy check (post-shaving) |
1427 | | // ------------------------- |
1428 | 0 | const int skip_th = ctrls->skip_energy_threshold; |
1429 | 0 | if (skip_th > 0 && updated_eob > 0) { |
1430 | 0 | int32_t total_energy = 0; |
1431 | |
|
1432 | 0 | for (int c = 0; c < updated_eob; ++c) { |
1433 | 0 | const int32_t v = quant_buf[scan[c]]; |
1434 | 0 | total_energy += (v >= 0) ? v : -v; |
1435 | |
|
1436 | 0 | if (total_energy > skip_th) { |
1437 | 0 | break; |
1438 | 0 | } |
1439 | 0 | } |
1440 | |
|
1441 | 0 | if (total_energy <= skip_th) { |
1442 | | // Zero entire block |
1443 | 0 | for (int c = 0; c < updated_eob; ++c) { |
1444 | 0 | const int pos = scan[c]; |
1445 | 0 | quant_buf[pos] = 0; |
1446 | 0 | recon_buf[pos] = 0; |
1447 | 0 | } |
1448 | 0 | return 0; |
1449 | 0 | } |
1450 | 0 | } |
1451 | | |
1452 | 0 | return (uint16_t)updated_eob; |
1453 | 0 | } |
1454 | | |
1455 | | #endif |
1456 | | |
1457 | | uint8_t svt_aom_quantize_inv_quantize(PictureControlSet* pcs, ModeDecisionContext* ctx, int32_t* coeff, |
1458 | | int32_t* quant_coeff, int32_t* recon_coeff, uint32_t qindex, |
1459 | | int32_t segmentation_qp_offset, TxSize txsize, uint16_t* eob, |
1460 | | uint32_t component_type, uint32_t bit_depth, TxType tx_type, |
1461 | | int16_t txb_skip_context, int16_t dc_sign_context, PredictionMode pred_mode, |
1462 | 1.21M | uint32_t lambda, bool is_encode_pass) { |
1463 | 1.21M | SequenceControlSet* scs = pcs->scs; |
1464 | 1.21M | EncodeContext* enc_ctx = scs->enc_ctx; |
1465 | 1.21M | int32_t plane = component_type == COMPONENT_LUMA ? PLANE_Y |
1466 | 1.21M | : (component_type == COMPONENT_CHROMA_CB ? PLANE_U : PLANE_V); |
1467 | | |
1468 | 1.21M | int32_t qmatrix_level = (IS_2D_TRANSFORM(tx_type) && pcs->ppcs->frm_hdr.quantization_params.using_qmatrix) |
1469 | 1.21M | ? pcs->ppcs->frm_hdr.quantization_params.qm[plane] |
1470 | 1.21M | : NUM_QM_LEVELS - 1; |
1471 | | |
1472 | 1.21M | TxSize adjusted_tx_size = aom_av1_get_adjusted_tx_size(txsize); |
1473 | 1.21M | MacroblockPlane candidate_plane; |
1474 | 1.21M | const QmVal* q_matrix = pcs->ppcs->gqmatrix[qmatrix_level][plane][adjusted_tx_size]; |
1475 | 1.21M | const QmVal* iq_matrix = pcs->ppcs->giqmatrix[qmatrix_level][plane][adjusted_tx_size]; |
1476 | 1.21M | int32_t q_index = pcs->ppcs->frm_hdr.delta_q_params.delta_q_present |
1477 | 1.21M | ? qindex |
1478 | 1.21M | : pcs->ppcs->frm_hdr.quantization_params.base_q_idx; |
1479 | 1.21M | if (segmentation_qp_offset != 0) { |
1480 | 0 | q_index = CLIP3(0, 255, q_index + segmentation_qp_offset); |
1481 | 0 | } |
1482 | 1.21M | if (component_type != COMPONENT_LUMA) { |
1483 | 256k | const int8_t offset = (component_type == COMPONENT_CHROMA_CB) |
1484 | 256k | ? pcs->ppcs->frm_hdr.quantization_params.delta_q_dc[1] // we are assuming delta_q_ac == delta_q_dc |
1485 | 256k | : pcs->ppcs->frm_hdr.quantization_params.delta_q_dc[2]; |
1486 | 256k | q_index += offset; |
1487 | 256k | q_index = (uint32_t)CLIP3(0, 255, (int32_t)q_index); |
1488 | 256k | } |
1489 | 1.22M | if (bit_depth == EB_EIGHT_BIT) { |
1490 | 1.22M | if (component_type == COMPONENT_LUMA) { |
1491 | 965k | candidate_plane.quant_qtx = enc_ctx->quants_8bit.y_quant[q_index]; |
1492 | 965k | candidate_plane.quant_fp_qtx = enc_ctx->quants_8bit.y_quant_fp[q_index]; |
1493 | 965k | candidate_plane.round_fp_qtx = enc_ctx->quants_8bit.y_round_fp[q_index]; |
1494 | 965k | candidate_plane.quant_shift_qtx = enc_ctx->quants_8bit.y_quant_shift[q_index]; |
1495 | 965k | candidate_plane.zbin_qtx = enc_ctx->quants_8bit.y_zbin[q_index]; |
1496 | 965k | candidate_plane.round_qtx = enc_ctx->quants_8bit.y_round[q_index]; |
1497 | 965k | candidate_plane.dequant_qtx = enc_ctx->deq_8bit.y_dequant_qtx[q_index]; |
1498 | 965k | } else if (component_type == COMPONENT_CHROMA_CB) { |
1499 | 128k | candidate_plane.quant_qtx = enc_ctx->quants_8bit.u_quant[q_index]; |
1500 | 128k | candidate_plane.quant_fp_qtx = enc_ctx->quants_8bit.u_quant_fp[q_index]; |
1501 | 128k | candidate_plane.round_fp_qtx = enc_ctx->quants_8bit.u_round_fp[q_index]; |
1502 | 128k | candidate_plane.quant_shift_qtx = enc_ctx->quants_8bit.u_quant_shift[q_index]; |
1503 | 128k | candidate_plane.zbin_qtx = enc_ctx->quants_8bit.u_zbin[q_index]; |
1504 | 128k | candidate_plane.round_qtx = enc_ctx->quants_8bit.u_round[q_index]; |
1505 | 128k | candidate_plane.dequant_qtx = enc_ctx->deq_8bit.u_dequant_qtx[q_index]; |
1506 | 128k | } |
1507 | | |
1508 | 127k | else { |
1509 | 127k | candidate_plane.quant_qtx = enc_ctx->quants_8bit.v_quant[q_index]; |
1510 | 127k | candidate_plane.quant_fp_qtx = enc_ctx->quants_8bit.v_quant_fp[q_index]; |
1511 | 127k | candidate_plane.round_fp_qtx = enc_ctx->quants_8bit.v_round_fp[q_index]; |
1512 | 127k | candidate_plane.quant_shift_qtx = enc_ctx->quants_8bit.v_quant_shift[q_index]; |
1513 | 127k | candidate_plane.zbin_qtx = enc_ctx->quants_8bit.v_zbin[q_index]; |
1514 | 127k | candidate_plane.round_qtx = enc_ctx->quants_8bit.v_round[q_index]; |
1515 | 127k | candidate_plane.dequant_qtx = enc_ctx->deq_8bit.v_dequant_qtx[q_index]; |
1516 | 127k | } |
1517 | 18.4E | } else { |
1518 | 18.4E | if (component_type == COMPONENT_LUMA) { |
1519 | 0 | candidate_plane.quant_qtx = enc_ctx->quants_bd.y_quant[q_index]; |
1520 | 0 | candidate_plane.quant_fp_qtx = enc_ctx->quants_bd.y_quant_fp[q_index]; |
1521 | 0 | candidate_plane.round_fp_qtx = enc_ctx->quants_bd.y_round_fp[q_index]; |
1522 | 0 | candidate_plane.quant_shift_qtx = enc_ctx->quants_bd.y_quant_shift[q_index]; |
1523 | 0 | candidate_plane.zbin_qtx = enc_ctx->quants_bd.y_zbin[q_index]; |
1524 | 0 | candidate_plane.round_qtx = enc_ctx->quants_bd.y_round[q_index]; |
1525 | 0 | candidate_plane.dequant_qtx = enc_ctx->deq_bd.y_dequant_qtx[q_index]; |
1526 | 0 | } |
1527 | | |
1528 | 18.4E | else if (component_type == COMPONENT_CHROMA_CB) { |
1529 | 0 | candidate_plane.quant_qtx = enc_ctx->quants_bd.u_quant[q_index]; |
1530 | 0 | candidate_plane.quant_fp_qtx = enc_ctx->quants_bd.u_quant_fp[q_index]; |
1531 | 0 | candidate_plane.round_fp_qtx = enc_ctx->quants_bd.u_round_fp[q_index]; |
1532 | 0 | candidate_plane.quant_shift_qtx = enc_ctx->quants_bd.u_quant_shift[q_index]; |
1533 | 0 | candidate_plane.zbin_qtx = enc_ctx->quants_bd.u_zbin[q_index]; |
1534 | 0 | candidate_plane.round_qtx = enc_ctx->quants_bd.u_round[q_index]; |
1535 | 0 | candidate_plane.dequant_qtx = enc_ctx->deq_bd.u_dequant_qtx[q_index]; |
1536 | 0 | } |
1537 | | |
1538 | 18.4E | else { |
1539 | 18.4E | candidate_plane.quant_qtx = enc_ctx->quants_bd.v_quant[q_index]; |
1540 | 18.4E | candidate_plane.quant_fp_qtx = enc_ctx->quants_bd.v_quant_fp[q_index]; |
1541 | 18.4E | candidate_plane.round_fp_qtx = enc_ctx->quants_bd.v_round_fp[q_index]; |
1542 | 18.4E | candidate_plane.quant_shift_qtx = enc_ctx->quants_bd.v_quant_shift[q_index]; |
1543 | 18.4E | candidate_plane.zbin_qtx = enc_ctx->quants_bd.v_zbin[q_index]; |
1544 | 18.4E | candidate_plane.round_qtx = enc_ctx->quants_bd.v_round[q_index]; |
1545 | 18.4E | candidate_plane.dequant_qtx = enc_ctx->deq_bd.v_dequant_qtx[q_index]; |
1546 | 18.4E | } |
1547 | 18.4E | } |
1548 | | |
1549 | 1.21M | const ScanOrder* const scan_order = get_scan_order(txsize, tx_type); |
1550 | | |
1551 | 1.21M | const int32_t n_coeffs = av1_get_max_eob(txsize); |
1552 | | |
1553 | 1.21M | QuantParam qparam; |
1554 | | |
1555 | 1.21M | qparam.log_scale = av1_get_tx_scale_tab[txsize]; |
1556 | 1.21M | qparam.tx_size = txsize; |
1557 | 1.21M | qparam.qmatrix = q_matrix; |
1558 | 1.21M | qparam.iqmatrix = iq_matrix; |
1559 | | |
1560 | 1.21M | bool is_inter = (pred_mode >= NEARESTMV); |
1561 | 1.21M | bool perform_rdoq; |
1562 | | |
1563 | | // If rdoq_level is specified in the command line instruction, set perform_rdoq accordingly. |
1564 | 1.21M | perform_rdoq = !svt_av1_is_lossless_segment(pcs, ctx->blk_ptr->segment_id) && |
1565 | 22.6k | ((ctx->mds_do_rdoq || is_encode_pass) && ctx->rdoq_ctrls.enabled); |
1566 | 1.22M | if (!is_encode_pass) { |
1567 | 1.22M | if ((ctx->rdoq_ctrls.dct_dct_only && tx_type != DCT_DCT) || |
1568 | 1.22M | (ctx->rdoq_ctrls.skip_uv && component_type != COMPONENT_LUMA)) { |
1569 | 0 | perform_rdoq = 0; |
1570 | 0 | } |
1571 | 1.22M | } |
1572 | 1.21M | if (perform_rdoq) { |
1573 | 22.6k | #if CONFIG_ENABLE_HIGH_BIT_DEPTH |
1574 | 22.6k | if ((bit_depth > EB_EIGHT_BIT) || (is_encode_pass && scs->is_16bit_pipeline)) { |
1575 | 0 | svt_av1_highbd_quantize_fp_facade((TranLow*)coeff, |
1576 | 0 | n_coeffs, |
1577 | 0 | &candidate_plane, |
1578 | 0 | quant_coeff, |
1579 | 0 | (TranLow*)recon_coeff, |
1580 | 0 | eob, |
1581 | 0 | scan_order, |
1582 | 0 | &qparam); |
1583 | 0 | } else |
1584 | 22.6k | #endif |
1585 | 22.6k | { |
1586 | 22.6k | svt_av1_quantize_fp_facade((TranLow*)coeff, |
1587 | 22.6k | n_coeffs, |
1588 | 22.6k | &candidate_plane, |
1589 | 22.6k | quant_coeff, |
1590 | 22.6k | (TranLow*)recon_coeff, |
1591 | 22.6k | eob, |
1592 | 22.6k | scan_order, |
1593 | 22.6k | &qparam); |
1594 | 22.6k | } |
1595 | 1.19M | } else { |
1596 | 1.19M | #if CONFIG_ENABLE_HIGH_BIT_DEPTH |
1597 | 1.19M | if ((bit_depth > EB_EIGHT_BIT) || (is_encode_pass && scs->is_16bit_pipeline)) { |
1598 | 0 | svt_av1_highbd_quantize_b_facade((TranLow*)coeff, |
1599 | 0 | n_coeffs, |
1600 | 0 | &candidate_plane, |
1601 | 0 | quant_coeff, |
1602 | 0 | (TranLow*)recon_coeff, |
1603 | 0 | eob, |
1604 | 0 | scan_order, |
1605 | 0 | &qparam); |
1606 | 0 | } else |
1607 | 1.19M | #endif |
1608 | 1.19M | { |
1609 | 1.19M | av1_quantize_b_facade_ii((TranLow*)coeff, |
1610 | 1.19M | n_coeffs, |
1611 | 1.19M | &candidate_plane, |
1612 | 1.19M | quant_coeff, |
1613 | 1.19M | (TranLow*)recon_coeff, |
1614 | 1.19M | eob, |
1615 | 1.19M | scan_order, |
1616 | 1.19M | &qparam); |
1617 | 1.19M | } |
1618 | 1.19M | } |
1619 | 1.21M | if (perform_rdoq && *eob != 0) { |
1620 | 10.3k | int width = tx_size_wide[txsize]; |
1621 | 10.3k | int height = tx_size_high[txsize]; |
1622 | 10.3k | int eob_perc = (*eob) * 100 / (width * height); |
1623 | 10.3k | if (eob_perc >= ctx->rdoq_ctrls.eob_th) { |
1624 | 0 | perform_rdoq = 0; |
1625 | 0 | } |
1626 | 10.3k | if (perform_rdoq && (eob_perc >= ctx->rdoq_ctrls.eob_fast_th)) { |
1627 | 0 | svt_fast_optimize_b( |
1628 | 0 | (TranLow*)coeff, &candidate_plane, quant_coeff, (TranLow*)recon_coeff, eob, txsize, tx_type); |
1629 | 0 | } |
1630 | 10.3k | if (perform_rdoq == 0) { |
1631 | 0 | #if CONFIG_ENABLE_HIGH_BIT_DEPTH |
1632 | 0 | if ((bit_depth > EB_EIGHT_BIT) || (is_encode_pass && scs->is_16bit_pipeline)) { |
1633 | 0 | svt_av1_highbd_quantize_b_facade((TranLow*)coeff, |
1634 | 0 | n_coeffs, |
1635 | 0 | &candidate_plane, |
1636 | 0 | quant_coeff, |
1637 | 0 | (TranLow*)recon_coeff, |
1638 | 0 | eob, |
1639 | 0 | scan_order, |
1640 | 0 | &qparam); |
1641 | 0 | } else |
1642 | 0 | #endif |
1643 | 0 | { |
1644 | 0 | av1_quantize_b_facade_ii((TranLow*)coeff, |
1645 | 0 | n_coeffs, |
1646 | 0 | &candidate_plane, |
1647 | 0 | quant_coeff, |
1648 | 0 | (TranLow*)recon_coeff, |
1649 | 0 | eob, |
1650 | 0 | scan_order, |
1651 | 0 | &qparam); |
1652 | 0 | } |
1653 | 0 | } |
1654 | 10.3k | } |
1655 | 1.21M | if (perform_rdoq && *eob != 0) { |
1656 | | // Perform rdoq |
1657 | 10.3k | svt_av1_optimize_b(pcs, |
1658 | 10.3k | ctx, |
1659 | 10.3k | txb_skip_context, |
1660 | 10.3k | dc_sign_context, |
1661 | 10.3k | (TranLow*)coeff, |
1662 | 10.3k | &candidate_plane, |
1663 | 10.3k | quant_coeff, |
1664 | 10.3k | (TranLow*)recon_coeff, |
1665 | 10.3k | eob, |
1666 | 10.3k | &qparam, |
1667 | 10.3k | txsize, |
1668 | 10.3k | tx_type, |
1669 | 10.3k | is_inter, |
1670 | 10.3k | scs->vq_ctrls.sharpness_ctrls.rdoq, |
1671 | 10.3k | pcs->ppcs->frm_hdr.delta_q_params.delta_q_present, |
1672 | 10.3k | pcs->ppcs->picture_qp, |
1673 | 10.3k | lambda, |
1674 | 10.3k | (component_type == COMPONENT_LUMA) ? 0 : 1); |
1675 | 10.3k | } |
1676 | | |
1677 | 1.21M | #if OPT_COEFF_SHAVING |
1678 | | // Apply coefficient shaving for luma after all quantization/RDOQ is complete. |
1679 | | // This catches all luma quantize paths (light PD1, regular TX, encode pass) |
1680 | | // in a single place. |
1681 | 1.21M | if (component_type == COMPONENT_LUMA && ctx->coeff_shaving_ctrls.enabled && *eob > 1) { |
1682 | 0 | *eob = shave_coeff(quant_coeff, recon_coeff, *eob, txsize, tx_type, &ctx->coeff_shaving_ctrls); |
1683 | 0 | } |
1684 | 1.21M | #endif |
1685 | | |
1686 | 1.21M | if (!ctx->rate_est_ctrls.update_skip_ctx_dc_sign_ctx) { |
1687 | 1.19M | return 0; |
1688 | 1.19M | } |
1689 | | |
1690 | | // Derive cul_level |
1691 | 20.4k | return svt_av1_compute_cul_level(scan_order->scan, quant_coeff, eob); |
1692 | 1.21M | } |
1693 | | |
1694 | | void svt_aom_inv_transform_recon_wrapper(PictureControlSet* pcs, ModeDecisionContext* ctx, uint8_t* pred_buffer, |
1695 | | uint32_t pred_offset, uint32_t pred_stride, uint8_t* rec_buffer, |
1696 | | uint32_t rec_offset, uint32_t rec_stride, int32_t* rec_coeff_buffer, |
1697 | | uint32_t coeff_offset, bool hbd, TxSize txsize, TxType transform_type, |
1698 | 37.2k | PlaneType component_type, uint32_t eob) { |
1699 | 37.2k | if (hbd) { |
1700 | 0 | svt_aom_inv_transform_recon(rec_coeff_buffer + coeff_offset, |
1701 | 0 | CONVERT_TO_BYTEPTR(((uint16_t*)pred_buffer) + pred_offset), |
1702 | 0 | pred_stride, |
1703 | 0 | CONVERT_TO_BYTEPTR(((uint16_t*)rec_buffer) + rec_offset), |
1704 | 0 | rec_stride, |
1705 | 0 | txsize, |
1706 | 0 | EB_TEN_BIT, |
1707 | 0 | transform_type, |
1708 | 0 | component_type, |
1709 | 0 | eob, |
1710 | 0 | svt_av1_is_lossless_segment(pcs, ctx->blk_ptr->segment_id)); |
1711 | 37.2k | } else { |
1712 | 37.2k | svt_aom_inv_transform_recon8bit(rec_coeff_buffer + coeff_offset, |
1713 | 37.2k | pred_buffer + pred_offset, |
1714 | 37.2k | pred_stride, |
1715 | 37.2k | rec_buffer + rec_offset, |
1716 | 37.2k | rec_stride, |
1717 | 37.2k | txsize, |
1718 | 37.2k | transform_type, |
1719 | 37.2k | component_type, |
1720 | 37.2k | eob, |
1721 | 37.2k | svt_av1_is_lossless_segment(pcs, ctx->blk_ptr->segment_id)); |
1722 | 37.2k | } |
1723 | 37.2k | } |
1724 | | |
1725 | | /* |
1726 | | tx path for light PD1 chroma |
1727 | | */ |
1728 | | void svt_aom_full_loop_chroma_light_pd1(PictureControlSet* pcs, ModeDecisionContext* ctx, |
1729 | | ModeDecisionCandidateBuffer* cand_bf, EbPictureBufferDesc* input_pic, |
1730 | | uint32_t input_cb_origin_in_index, uint32_t blk_chroma_origin_index, |
1731 | | COMPONENT_TYPE component_type, uint32_t chroma_qindex, |
1732 | | uint64_t cb_full_distortion[DIST_CALC_TOTAL], |
1733 | | uint64_t cr_full_distortion[DIST_CALC_TOTAL], uint64_t* cb_coeff_bits, |
1734 | 0 | uint64_t* cr_coeff_bits) { |
1735 | 0 | uint32_t full_lambda = ctx->hbd_md ? ctx->full_lambda_md[EB_10_BIT_MD] : ctx->full_lambda_md[EB_8_BIT_MD]; |
1736 | 0 | const TxSize tx_size_uv = av1_get_max_uv_txsize(ctx->blk_geom->bsize, 1, 1); |
1737 | 0 | const int tx_width_uv = tx_size_wide[tx_size_uv]; |
1738 | 0 | const int tx_height_uv = tx_size_high[tx_size_uv]; |
1739 | |
|
1740 | 0 | TxCoeffShape pf_shape = ctx->pf_ctrls.pf_shape; |
1741 | | // If Cb component not detected as complex, can use TX shortcuts |
1742 | 0 | if (ctx->use_tx_shortcuts_mds3 && |
1743 | 0 | (ctx->chroma_complexity == COMPONENT_LUMA || ctx->chroma_complexity == COMPONENT_CHROMA_CR)) { |
1744 | 0 | pf_shape = N4_SHAPE; |
1745 | 0 | } else { |
1746 | 0 | uint8_t use_pfn4_cond = 0; |
1747 | 0 | if (ctx->lpd1_tx_ctrls.use_uv_shortcuts_on_y_coeffs && |
1748 | 0 | (ctx->chroma_complexity == COMPONENT_LUMA || ctx->chroma_complexity == COMPONENT_CHROMA_CR)) { |
1749 | 0 | const uint16_t th = ((tx_width_uv >> 4) * (tx_height_uv >> 4)); |
1750 | 0 | use_pfn4_cond = (cand_bf->cnt_nz_coeff < th) || !cand_bf->block_has_coeff ? 1 : 0; |
1751 | 0 | } |
1752 | 0 | if (use_pfn4_cond) { |
1753 | 0 | pf_shape = N4_SHAPE; |
1754 | 0 | } |
1755 | 0 | } |
1756 | 0 | assert(tx_size_uv < TX_SIZES_ALL); |
1757 | 0 | const int32_t chroma_shift = (MAX_TX_SCALE - av1_get_tx_scale_tab[tx_size_uv]) * 2; |
1758 | 0 | uint32_t bwidth = tx_width_uv; |
1759 | 0 | uint32_t bheight = tx_height_uv; |
1760 | 0 | if (pf_shape) { |
1761 | 0 | bwidth = MAX((bwidth >> pf_shape), 4); |
1762 | 0 | bheight = (bheight >> pf_shape); |
1763 | 0 | } |
1764 | 0 | if (component_type == COMPONENT_CHROMA || component_type == COMPONENT_CHROMA_CB) { |
1765 | 0 | svt_aom_residual_kernel(input_pic->u_buffer, |
1766 | 0 | input_cb_origin_in_index, |
1767 | 0 | input_pic->u_stride, |
1768 | 0 | cand_bf->pred->u_buffer, |
1769 | 0 | blk_chroma_origin_index, |
1770 | 0 | cand_bf->pred->u_stride, |
1771 | 0 | (int16_t*)cand_bf->residual->u_buffer, |
1772 | 0 | blk_chroma_origin_index, |
1773 | 0 | cand_bf->residual->u_stride, |
1774 | 0 | ctx->hbd_md, |
1775 | 0 | ctx->blk_geom->bwidth_uv, |
1776 | 0 | ctx->blk_geom->bheight_uv); |
1777 | | |
1778 | | // Cb Transform |
1779 | 0 | svt_aom_estimate_transform(pcs, |
1780 | 0 | ctx, |
1781 | 0 | &(((int16_t*)cand_bf->residual->u_buffer)[blk_chroma_origin_index]), |
1782 | 0 | cand_bf->residual->u_stride, |
1783 | 0 | &(((int32_t*)ctx->tx_coeffs->u_buffer)[0]), |
1784 | 0 | NOT_USED_VALUE, |
1785 | 0 | tx_size_uv, |
1786 | 0 | &ctx->three_quad_energy, |
1787 | 0 | ctx->hbd_md ? EB_TEN_BIT : EB_EIGHT_BIT, |
1788 | 0 | cand_bf->cand->transform_type_uv, |
1789 | 0 | PLANE_TYPE_UV, |
1790 | 0 | pf_shape); |
1791 | 0 | cand_bf->quant_dc.u[0] = svt_aom_quantize_inv_quantize(pcs, |
1792 | 0 | ctx, |
1793 | 0 | &(((int32_t*)ctx->tx_coeffs->u_buffer)[0]), |
1794 | 0 | &(((int32_t*)cand_bf->quant->u_buffer)[0]), |
1795 | 0 | &(((int32_t*)cand_bf->rec_coeff->u_buffer)[0]), |
1796 | 0 | chroma_qindex, |
1797 | 0 | 0, |
1798 | 0 | tx_size_uv, |
1799 | 0 | &cand_bf->eob.u[0], |
1800 | 0 | COMPONENT_CHROMA_CB, |
1801 | 0 | ctx->hbd_md ? EB_TEN_BIT : EB_EIGHT_BIT, |
1802 | 0 | cand_bf->cand->transform_type_uv, |
1803 | 0 | 0, |
1804 | 0 | 0, |
1805 | 0 | cand_bf->cand->block_mi.mode, |
1806 | 0 | full_lambda, |
1807 | 0 | false); |
1808 | |
|
1809 | 0 | svt_aom_picture_full_distortion32_bits_single(&(((int32_t*)ctx->tx_coeffs->u_buffer)[0]), |
1810 | 0 | &(((int32_t*)cand_bf->rec_coeff->u_buffer)[0]), |
1811 | 0 | tx_width_uv, |
1812 | 0 | bwidth, |
1813 | 0 | bheight, |
1814 | 0 | cb_full_distortion, |
1815 | 0 | cand_bf->eob.u[0]); |
1816 | 0 | cb_full_distortion[DIST_CALC_RESIDUAL] = RIGHT_SIGNED_SHIFT(cb_full_distortion[DIST_CALC_RESIDUAL], |
1817 | 0 | chroma_shift); |
1818 | 0 | cb_full_distortion[DIST_CALC_PREDICTION] = RIGHT_SIGNED_SHIFT(cb_full_distortion[DIST_CALC_PREDICTION], |
1819 | 0 | chroma_shift); |
1820 | 0 | cand_bf->u_has_coeff = (cand_bf->eob.u[0] > 0); |
1821 | 0 | } |
1822 | |
|
1823 | 0 | pf_shape = ctx->pf_ctrls.pf_shape; |
1824 | | // If Cr component not detected as complex, can use TX shortcuts |
1825 | 0 | if (ctx->use_tx_shortcuts_mds3 && |
1826 | 0 | (ctx->chroma_complexity == COMPONENT_LUMA || ctx->chroma_complexity == COMPONENT_CHROMA_CB)) { |
1827 | 0 | pf_shape = N4_SHAPE; |
1828 | 0 | } else { |
1829 | 0 | uint8_t use_pfn4_cond = 0; |
1830 | 0 | if (ctx->lpd1_tx_ctrls.use_uv_shortcuts_on_y_coeffs && |
1831 | 0 | (ctx->chroma_complexity == COMPONENT_LUMA || ctx->chroma_complexity == COMPONENT_CHROMA_CB)) { |
1832 | 0 | const uint16_t th = ((tx_width_uv >> 4) * (tx_height_uv >> 4)); |
1833 | 0 | use_pfn4_cond = (cand_bf->cnt_nz_coeff < th) || !cand_bf->block_has_coeff ? 1 : 0; |
1834 | 0 | } |
1835 | 0 | if (use_pfn4_cond) { |
1836 | 0 | pf_shape = N4_SHAPE; |
1837 | 0 | } |
1838 | 0 | } |
1839 | 0 | bwidth = tx_width_uv; |
1840 | 0 | bheight = tx_height_uv; |
1841 | 0 | if (pf_shape) { |
1842 | 0 | bwidth = MAX((bwidth >> pf_shape), 4); |
1843 | 0 | bheight = (bheight >> pf_shape); |
1844 | 0 | } |
1845 | |
|
1846 | 0 | if (component_type == COMPONENT_CHROMA || component_type == COMPONENT_CHROMA_CR) { |
1847 | | //Cr Residual |
1848 | 0 | svt_aom_residual_kernel(input_pic->v_buffer, |
1849 | 0 | input_cb_origin_in_index, |
1850 | 0 | input_pic->v_stride, |
1851 | 0 | cand_bf->pred->v_buffer, |
1852 | 0 | blk_chroma_origin_index, |
1853 | 0 | cand_bf->pred->v_stride, |
1854 | 0 | (int16_t*)cand_bf->residual->v_buffer, |
1855 | 0 | blk_chroma_origin_index, |
1856 | 0 | cand_bf->residual->v_stride, |
1857 | 0 | ctx->hbd_md, |
1858 | 0 | ctx->blk_geom->bwidth_uv, |
1859 | 0 | ctx->blk_geom->bheight_uv); |
1860 | | // Cr Transform |
1861 | 0 | svt_aom_estimate_transform(pcs, |
1862 | 0 | ctx, |
1863 | 0 | &(((int16_t*)cand_bf->residual->v_buffer)[blk_chroma_origin_index]), |
1864 | 0 | cand_bf->residual->v_stride, |
1865 | 0 | &(((int32_t*)ctx->tx_coeffs->v_buffer)[0]), |
1866 | 0 | NOT_USED_VALUE, |
1867 | 0 | tx_size_uv, |
1868 | 0 | &ctx->three_quad_energy, |
1869 | 0 | ctx->hbd_md ? EB_TEN_BIT : EB_EIGHT_BIT, |
1870 | 0 | cand_bf->cand->transform_type_uv, |
1871 | 0 | PLANE_TYPE_UV, |
1872 | 0 | pf_shape); |
1873 | 0 | cand_bf->quant_dc.v[0] = svt_aom_quantize_inv_quantize(pcs, |
1874 | 0 | ctx, |
1875 | 0 | &(((int32_t*)ctx->tx_coeffs->v_buffer)[0]), |
1876 | 0 | &(((int32_t*)cand_bf->quant->v_buffer)[0]), |
1877 | 0 | &(((int32_t*)cand_bf->rec_coeff->v_buffer)[0]), |
1878 | 0 | chroma_qindex, |
1879 | 0 | 0, |
1880 | 0 | tx_size_uv, |
1881 | 0 | &cand_bf->eob.v[0], |
1882 | 0 | COMPONENT_CHROMA_CR, |
1883 | 0 | ctx->hbd_md ? EB_TEN_BIT : EB_EIGHT_BIT, |
1884 | 0 | cand_bf->cand->transform_type_uv, |
1885 | 0 | 0, |
1886 | 0 | 0, |
1887 | 0 | cand_bf->cand->block_mi.mode, |
1888 | 0 | full_lambda, |
1889 | 0 | false); |
1890 | |
|
1891 | 0 | svt_aom_picture_full_distortion32_bits_single(&(((int32_t*)ctx->tx_coeffs->v_buffer)[0]), |
1892 | 0 | &(((int32_t*)cand_bf->rec_coeff->v_buffer)[0]), |
1893 | 0 | tx_width_uv, |
1894 | 0 | bwidth, |
1895 | 0 | bheight, |
1896 | 0 | cr_full_distortion, |
1897 | 0 | cand_bf->eob.v[0]); |
1898 | |
|
1899 | 0 | cr_full_distortion[DIST_CALC_RESIDUAL] = RIGHT_SIGNED_SHIFT(cr_full_distortion[DIST_CALC_RESIDUAL], |
1900 | 0 | chroma_shift); |
1901 | 0 | cr_full_distortion[DIST_CALC_PREDICTION] = RIGHT_SIGNED_SHIFT(cr_full_distortion[DIST_CALC_PREDICTION], |
1902 | 0 | chroma_shift); |
1903 | 0 | cand_bf->v_has_coeff = (cand_bf->eob.v[0] > 0); |
1904 | 0 | } |
1905 | | |
1906 | | //CHROMA-ONLY |
1907 | 0 | svt_aom_txb_estimate_coeff_bits(ctx, |
1908 | 0 | 0, |
1909 | 0 | NULL, |
1910 | 0 | pcs, |
1911 | 0 | cand_bf, |
1912 | 0 | NOT_USED_VALUE, |
1913 | 0 | 0, |
1914 | 0 | cand_bf->quant, |
1915 | 0 | NOT_USED_VALUE, |
1916 | 0 | cand_bf->eob.u[0], |
1917 | 0 | cand_bf->eob.v[0], |
1918 | 0 | NOT_USED_VALUE, |
1919 | 0 | cb_coeff_bits, |
1920 | 0 | cr_coeff_bits, |
1921 | 0 | NOT_USED_VALUE, |
1922 | 0 | tx_size_uv, |
1923 | 0 | NOT_USED_VALUE, |
1924 | 0 | cand_bf->cand->transform_type_uv, |
1925 | 0 | component_type); |
1926 | 0 | } |
1927 | | |
1928 | | /**************************************** |
1929 | | ************ Full loop **************** |
1930 | | ****************************************/ |
1931 | | void svt_aom_full_loop_uv(PictureControlSet* pcs, ModeDecisionContext* ctx, ModeDecisionCandidateBuffer* cand_bf, |
1932 | | EbPictureBufferDesc* input_pic, COMPONENT_TYPE component_type, uint32_t chroma_qindex, |
1933 | | uint64_t cb_full_distortion[DIST_TOTAL][DIST_CALC_TOTAL], |
1934 | | uint64_t cr_full_distortion[DIST_TOTAL][DIST_CALC_TOTAL], uint64_t* cb_coeff_bits, |
1935 | 127k | uint64_t* cr_coeff_bits, bool is_full_loop) { |
1936 | 127k | EbSpatialFullDistType spatial_full_dist_type_fun = ctx->hbd_md ? svt_full_distortion_kernel16_bits |
1937 | 127k | : svt_spatial_full_distortion_kernel; |
1938 | 127k | EB_ALIGN(16) uint64_t txb_full_distortion[DIST_TOTAL][3][DIST_CALC_TOTAL]; |
1939 | 127k | const SsimLevel ssim_level = ctx->tune_ssim_level; |
1940 | 127k | if (ssim_level > SSIM_LVL_0) { |
1941 | 0 | assert(ctx->pd_pass == PD_PASS_1); |
1942 | 0 | assert(ctx->md_stage == MD_STAGE_3); |
1943 | 0 | } |
1944 | 127k | cand_bf->u_has_coeff = 0; |
1945 | 127k | cand_bf->v_has_coeff = 0; |
1946 | 127k | int16_t* chroma_residual_ptr; |
1947 | 127k | uint32_t full_lambda = ctx->hbd_md ? ctx->full_lambda_md[EB_10_BIT_MD] : ctx->full_lambda_md[EB_8_BIT_MD]; |
1948 | | |
1949 | 127k | ctx->three_quad_energy = 0; |
1950 | | |
1951 | 127k | const double effective_ac_bias = get_effective_ac_bias( |
1952 | 127k | pcs->scs->static_config.ac_bias, pcs->slice_type == I_SLICE, pcs->temporal_layer_index); |
1953 | 127k | const uint8_t tx_depth = cand_bf->cand->block_mi.tx_depth; |
1954 | 127k | const TxSize tx_size = av1_get_tx_size(ctx->blk_geom->bsize, tx_depth, PLANE_TYPE_Y); |
1955 | 127k | const TxSize tx_size_uv = av1_get_max_uv_txsize(ctx->blk_geom->bsize, 1, 1); |
1956 | 127k | const int tx_width_uv = tx_size_wide[tx_size_uv]; |
1957 | 127k | const int tx_height_uv = tx_size_high[tx_size_uv]; |
1958 | 127k | const bool is_inter = (is_inter_mode(cand_bf->cand->block_mi.mode) || cand_bf->cand->block_mi.use_intrabc) ? true |
1959 | 127k | : false; |
1960 | 127k | const int tu_count = tx_depth ? 1 : tx_blocks_per_depth[ctx->blk_geom->bsize][tx_depth]; //NM: 128x128 exeption |
1961 | 127k | uint32_t txb_1d_offset = 0; |
1962 | | |
1963 | 127k | int txb_itr = 0; |
1964 | 127k | do { |
1965 | 127k | const uint32_t txb_origin_x = tx_org[ctx->blk_geom->bsize][is_inter][tx_depth][txb_itr].x; |
1966 | 127k | const uint32_t txb_origin_y = tx_org[ctx->blk_geom->bsize][is_inter][tx_depth][txb_itr].y; |
1967 | 127k | int32_t cropped_tx_width_uv = MIN( |
1968 | 127k | (uint32_t)tx_width_uv, (pcs->ppcs->aligned_width >> 1) - ((ROUND_UV(ctx->blk_org_x + txb_origin_x)) >> 1)); |
1969 | 127k | int32_t cropped_tx_height_uv = MIN( |
1970 | 127k | (uint32_t)tx_height_uv, |
1971 | 127k | (pcs->ppcs->aligned_height >> 1) - ((ROUND_UV(ctx->blk_org_y + txb_origin_y)) >> 1)); |
1972 | 127k | uint32_t tu_cb_origin_index = (ROUND_UV(txb_origin_x) + |
1973 | 127k | (ROUND_UV(txb_origin_y) * cand_bf->residual->u_stride)) >> |
1974 | 127k | 1; |
1975 | 127k | uint32_t tu_cr_origin_index = (ROUND_UV(txb_origin_x) + |
1976 | 127k | (ROUND_UV(txb_origin_y) * cand_bf->residual->v_stride)) >> |
1977 | 127k | 1; |
1978 | 127k | TxCoeffShape pf_shape = ctx->pf_ctrls.pf_shape; |
1979 | 127k | if (ctx->md_stage == MD_STAGE_3 && ctx->use_tx_shortcuts_mds3 && ctx->chroma_complexity == COMPONENT_LUMA) { |
1980 | 0 | pf_shape = N4_SHAPE; |
1981 | 0 | } |
1982 | | // for chroma path, use luma coeff info to make shortcut decisions (available even if MDS1 is skipped) |
1983 | 127k | else if (ctx->tx_shortcut_ctrls.apply_pf_on_coeffs && ctx->md_stage == MD_STAGE_3 && |
1984 | 0 | ctx->chroma_complexity == COMPONENT_LUMA) { |
1985 | 0 | uint8_t use_pfn4_cond = 0; |
1986 | |
|
1987 | 0 | const uint16_t th = (tx_width_uv >> 4) * (tx_height_uv >> 4); |
1988 | 0 | use_pfn4_cond = (cand_bf->cnt_nz_coeff < th) || !cand_bf->block_has_coeff ? 1 : 0; |
1989 | |
|
1990 | 0 | if (use_pfn4_cond) { |
1991 | 0 | pf_shape = N4_SHAPE; |
1992 | 0 | } |
1993 | 0 | } |
1994 | | // This function replaces the previous Intra Chroma mode if the LM fast |
1995 | | // cost is better. |
1996 | | // *Note - this might require that we have inv transform in the loop |
1997 | 127k | if (component_type == COMPONENT_CHROMA_CB || component_type == COMPONENT_CHROMA || |
1998 | 127k | component_type == COMPONENT_ALL) { |
1999 | 127k | ctx->cb_txb_skip_context = 0; |
2000 | 127k | ctx->cb_dc_sign_context = 0; |
2001 | 127k | if (ctx->rate_est_ctrls.update_skip_ctx_dc_sign_ctx) { |
2002 | 0 | svt_aom_get_txb_ctx(pcs, |
2003 | 0 | COMPONENT_CHROMA, |
2004 | 0 | ctx->cb_dc_sign_level_coeff_na, |
2005 | 0 | ROUND_UV(ctx->blk_org_x + txb_origin_x) >> 1, |
2006 | 0 | ROUND_UV(ctx->blk_org_y + txb_origin_y) >> 1, |
2007 | 0 | ctx->blk_geom->bsize_uv, |
2008 | 0 | tx_size_uv, |
2009 | 0 | &ctx->cb_txb_skip_context, |
2010 | 0 | &ctx->cb_dc_sign_context); |
2011 | 0 | } |
2012 | | // Configure the Chroma Residual Ptr |
2013 | | |
2014 | 127k | chroma_residual_ptr = &(((int16_t*)cand_bf->residual->u_buffer)[tu_cb_origin_index]); |
2015 | | |
2016 | | // Cb Transform |
2017 | 127k | svt_aom_estimate_transform(pcs, |
2018 | 127k | ctx, |
2019 | 127k | chroma_residual_ptr, |
2020 | 127k | cand_bf->residual->u_stride, |
2021 | 127k | &(((int32_t*)ctx->tx_coeffs->u_buffer)[txb_1d_offset]), |
2022 | 127k | NOT_USED_VALUE, |
2023 | 127k | tx_size_uv, |
2024 | 127k | &ctx->three_quad_energy, |
2025 | 127k | ctx->hbd_md ? EB_TEN_BIT : EB_EIGHT_BIT, |
2026 | 127k | cand_bf->cand->transform_type_uv, |
2027 | 127k | PLANE_TYPE_UV, |
2028 | 127k | pf_shape); |
2029 | | |
2030 | 127k | int32_t seg_qp = pcs->ppcs->frm_hdr.segmentation_params.segmentation_enabled |
2031 | 127k | ? pcs->ppcs->frm_hdr.segmentation_params.feature_data[ctx->blk_ptr->segment_id][SEG_LVL_ALT_Q] |
2032 | 127k | : 0; |
2033 | 127k | cand_bf->quant_dc.u[txb_itr] = svt_aom_quantize_inv_quantize( |
2034 | 127k | pcs, |
2035 | 127k | ctx, |
2036 | 127k | &(((int32_t*)ctx->tx_coeffs->u_buffer)[txb_1d_offset]), |
2037 | 127k | &(((int32_t*)cand_bf->quant->u_buffer)[txb_1d_offset]), |
2038 | 127k | &(((int32_t*)cand_bf->rec_coeff->u_buffer)[txb_1d_offset]), |
2039 | 127k | chroma_qindex, |
2040 | 127k | seg_qp, |
2041 | 127k | tx_size_uv, |
2042 | 127k | &cand_bf->eob.u[txb_itr], |
2043 | 127k | COMPONENT_CHROMA_CB, |
2044 | 127k | ctx->hbd_md ? EB_TEN_BIT : EB_EIGHT_BIT, |
2045 | 127k | cand_bf->cand->transform_type_uv, |
2046 | 127k | ctx->cb_txb_skip_context, |
2047 | 127k | ctx->cb_dc_sign_context, |
2048 | 127k | cand_bf->cand->block_mi.mode, |
2049 | 127k | full_lambda, |
2050 | 127k | false); |
2051 | | |
2052 | 127k | if (is_full_loop && ctx->mds_do_spatial_sse) { |
2053 | 127k | uint32_t cb_has_coeff = cand_bf->eob.u[txb_itr] > 0; |
2054 | | |
2055 | 127k | if (cb_has_coeff) { |
2056 | 5.47k | svt_aom_inv_transform_recon_wrapper(pcs, |
2057 | 5.47k | ctx, |
2058 | 5.47k | cand_bf->pred->u_buffer, |
2059 | 5.47k | tu_cb_origin_index, |
2060 | 5.47k | cand_bf->pred->u_stride, |
2061 | 5.47k | cand_bf->recon->u_buffer, |
2062 | 5.47k | tu_cb_origin_index, |
2063 | 5.47k | cand_bf->recon->u_stride, |
2064 | 5.47k | (int32_t*)cand_bf->rec_coeff->u_buffer, |
2065 | 5.47k | txb_1d_offset, |
2066 | 5.47k | ctx->hbd_md, |
2067 | 5.47k | tx_size_uv, |
2068 | 5.47k | cand_bf->cand->transform_type_uv, |
2069 | 5.47k | PLANE_TYPE_UV, |
2070 | 5.47k | (uint32_t)cand_bf->eob.u[txb_itr]); |
2071 | 121k | } else { |
2072 | 121k | svt_av1_picture_copy_cb(cand_bf->pred, |
2073 | 121k | tu_cb_origin_index, |
2074 | 121k | cand_bf->recon, |
2075 | 121k | tu_cb_origin_index, |
2076 | 121k | tx_width_uv, |
2077 | 121k | tx_height_uv, |
2078 | 121k | ctx->hbd_md); |
2079 | 121k | } |
2080 | | |
2081 | 127k | const uint32_t input_chroma_txb_origin_index = ((ROUND_UV(ctx->blk_org_x + txb_origin_x)) >> 1) + |
2082 | 127k | ((ROUND_UV(ctx->blk_org_y + txb_origin_y)) >> 1) * input_pic->u_stride; |
2083 | 127k | const int32_t txb_uv_origin_index = (ROUND_UV(txb_origin_x) + |
2084 | 127k | (ROUND_UV(txb_origin_y) * cand_bf->quant->u_stride)) >> |
2085 | 127k | 1; |
2086 | | |
2087 | 128k | if (ssim_level == SSIM_LVL_1 || ssim_level == SSIM_LVL_3) { |
2088 | 0 | txb_full_distortion[DIST_SSIM][1][DIST_CALC_PREDICTION] = svt_spatial_full_distortion_ssim_kernel( |
2089 | 0 | input_pic->u_buffer, |
2090 | 0 | input_chroma_txb_origin_index, |
2091 | 0 | input_pic->u_stride, |
2092 | 0 | cand_bf->pred->u_buffer, |
2093 | 0 | txb_uv_origin_index, |
2094 | 0 | cand_bf->pred->u_stride, |
2095 | 0 | cropped_tx_width_uv, |
2096 | 0 | cropped_tx_height_uv, |
2097 | 0 | ctx->hbd_md, |
2098 | 0 | effective_ac_bias); |
2099 | |
|
2100 | 0 | txb_full_distortion[DIST_SSIM][1][DIST_CALC_RESIDUAL] = svt_spatial_full_distortion_ssim_kernel( |
2101 | 0 | input_pic->u_buffer, |
2102 | 0 | input_chroma_txb_origin_index, |
2103 | 0 | input_pic->u_stride, |
2104 | 0 | cand_bf->recon->u_buffer, |
2105 | 0 | txb_uv_origin_index, |
2106 | 0 | cand_bf->recon->u_stride, |
2107 | 0 | cropped_tx_width_uv, |
2108 | 0 | cropped_tx_height_uv, |
2109 | 0 | ctx->hbd_md, |
2110 | 0 | effective_ac_bias); |
2111 | |
|
2112 | 0 | txb_full_distortion[DIST_SSIM][1][DIST_CALC_PREDICTION] <<= 4; |
2113 | 0 | txb_full_distortion[DIST_SSIM][1][DIST_CALC_RESIDUAL] <<= 4; |
2114 | 0 | } |
2115 | 127k | txb_full_distortion[DIST_SSD][1][DIST_CALC_PREDICTION] = spatial_full_dist_type_fun( |
2116 | 127k | input_pic->u_buffer, |
2117 | 127k | input_chroma_txb_origin_index, |
2118 | 127k | input_pic->u_stride, |
2119 | 127k | cand_bf->pred->u_buffer, |
2120 | 127k | txb_uv_origin_index, |
2121 | 127k | cand_bf->pred->u_stride, |
2122 | 127k | cropped_tx_width_uv, |
2123 | 127k | cropped_tx_height_uv); |
2124 | 127k | if (effective_ac_bias) { |
2125 | 0 | txb_full_distortion[DIST_SSD][1][DIST_CALC_PREDICTION] += get_svt_psy_full_dist( |
2126 | 0 | input_pic->u_buffer, |
2127 | 0 | input_chroma_txb_origin_index, |
2128 | 0 | input_pic->u_stride, |
2129 | 0 | cand_bf->pred->u_buffer, |
2130 | 0 | txb_uv_origin_index, |
2131 | 0 | cand_bf->pred->u_stride, |
2132 | 0 | cropped_tx_width_uv, |
2133 | 0 | cropped_tx_height_uv, |
2134 | 0 | ctx->hbd_md, |
2135 | 0 | effective_ac_bias); |
2136 | 0 | } |
2137 | | |
2138 | 127k | txb_full_distortion[DIST_SSD][1][DIST_CALC_RESIDUAL] = spatial_full_dist_type_fun( |
2139 | 127k | input_pic->u_buffer, |
2140 | 127k | input_chroma_txb_origin_index, |
2141 | 127k | input_pic->u_stride, |
2142 | 127k | cand_bf->recon->u_buffer, |
2143 | 127k | txb_uv_origin_index, |
2144 | 127k | cand_bf->recon->u_stride, |
2145 | 127k | cropped_tx_width_uv, |
2146 | 127k | cropped_tx_height_uv); |
2147 | 127k | if (effective_ac_bias) { |
2148 | 0 | txb_full_distortion[DIST_SSD][1][DIST_CALC_RESIDUAL] += get_svt_psy_full_dist( |
2149 | 0 | input_pic->u_buffer, |
2150 | 0 | input_chroma_txb_origin_index, |
2151 | 0 | input_pic->u_stride, |
2152 | 0 | cand_bf->recon->u_buffer, |
2153 | 0 | txb_uv_origin_index, |
2154 | 0 | cand_bf->recon->u_stride, |
2155 | 0 | cropped_tx_width_uv, |
2156 | 0 | cropped_tx_height_uv, |
2157 | 0 | ctx->hbd_md, |
2158 | 0 | effective_ac_bias); |
2159 | 0 | } |
2160 | | |
2161 | 127k | txb_full_distortion[DIST_SSD][1][DIST_CALC_PREDICTION] <<= 4; |
2162 | 127k | txb_full_distortion[DIST_SSD][1][DIST_CALC_RESIDUAL] <<= 4; |
2163 | 127k | } else { |
2164 | | // *Full Distortion (SSE) |
2165 | | // *Note - there are known issues with how this distortion metric is currently |
2166 | | // calculated. The amount of scaling between the two arrays is not |
2167 | | // equivalent. |
2168 | 169 | uint32_t bwidth = tx_width_uv; |
2169 | 169 | uint32_t bheight = tx_height_uv; |
2170 | 169 | if (pf_shape) { |
2171 | 0 | bwidth = MAX((bwidth >> pf_shape), 4); |
2172 | 0 | bheight = (bheight >> pf_shape); |
2173 | 0 | } |
2174 | 169 | svt_aom_picture_full_distortion32_bits_single( |
2175 | 169 | &(((int32_t*)ctx->tx_coeffs->u_buffer)[txb_1d_offset]), |
2176 | 169 | &(((int32_t*)cand_bf->rec_coeff->u_buffer)[txb_1d_offset]), |
2177 | 169 | tx_width_uv, |
2178 | 169 | bwidth, |
2179 | 169 | bheight, |
2180 | 169 | txb_full_distortion[DIST_SSD][1], |
2181 | 169 | cand_bf->eob.u[txb_itr]); |
2182 | | |
2183 | 169 | const int32_t chroma_shift = (MAX_TX_SCALE - av1_get_tx_scale_tab[tx_size_uv]) * 2; |
2184 | 169 | txb_full_distortion[DIST_SSD][1][DIST_CALC_RESIDUAL] = RIGHT_SIGNED_SHIFT( |
2185 | 169 | txb_full_distortion[DIST_SSD][1][DIST_CALC_RESIDUAL], chroma_shift); |
2186 | 169 | txb_full_distortion[DIST_SSD][1][DIST_CALC_PREDICTION] = RIGHT_SIGNED_SHIFT( |
2187 | 169 | txb_full_distortion[DIST_SSD][1][DIST_CALC_PREDICTION], chroma_shift); |
2188 | 169 | } |
2189 | 127k | cand_bf->u_has_coeff |= ((cand_bf->eob.u[txb_itr] != 0) << txb_itr); |
2190 | 127k | cb_full_distortion[DIST_SSIM][DIST_CALC_RESIDUAL] += txb_full_distortion[DIST_SSIM][1][DIST_CALC_RESIDUAL]; |
2191 | 127k | cb_full_distortion[DIST_SSIM][DIST_CALC_PREDICTION] += |
2192 | 127k | txb_full_distortion[DIST_SSIM][1][DIST_CALC_PREDICTION]; |
2193 | | |
2194 | 127k | cb_full_distortion[DIST_SSD][DIST_CALC_RESIDUAL] += txb_full_distortion[DIST_SSD][1][DIST_CALC_RESIDUAL]; |
2195 | 127k | cb_full_distortion[DIST_SSD][DIST_CALC_PREDICTION] += |
2196 | 127k | txb_full_distortion[DIST_SSD][1][DIST_CALC_PREDICTION]; |
2197 | 127k | } |
2198 | | |
2199 | 128k | if (component_type == COMPONENT_CHROMA_CR || component_type == COMPONENT_CHROMA || |
2200 | 128k | component_type == COMPONENT_ALL) { |
2201 | 128k | ctx->cr_txb_skip_context = 0; |
2202 | 128k | ctx->cr_dc_sign_context = 0; |
2203 | 128k | if (ctx->rate_est_ctrls.update_skip_ctx_dc_sign_ctx) { |
2204 | 0 | svt_aom_get_txb_ctx(pcs, |
2205 | 0 | COMPONENT_CHROMA, |
2206 | 0 | ctx->cr_dc_sign_level_coeff_na, |
2207 | 0 | ROUND_UV(ctx->blk_org_x + txb_origin_x) >> 1, |
2208 | 0 | ROUND_UV(ctx->blk_org_y + txb_origin_y) >> 1, |
2209 | 0 | ctx->blk_geom->bsize_uv, |
2210 | 0 | tx_size_uv, |
2211 | 0 | &ctx->cr_txb_skip_context, |
2212 | 0 | &ctx->cr_dc_sign_context); |
2213 | 0 | } |
2214 | | // Configure the Chroma Residual Ptr |
2215 | | |
2216 | 128k | chroma_residual_ptr = &(((int16_t*)cand_bf->residual->v_buffer)[tu_cr_origin_index]); |
2217 | | |
2218 | | // Cr Transform |
2219 | 128k | svt_aom_estimate_transform(pcs, |
2220 | 128k | ctx, |
2221 | 128k | chroma_residual_ptr, |
2222 | 128k | cand_bf->residual->v_stride, |
2223 | 128k | &(((int32_t*)ctx->tx_coeffs->v_buffer)[txb_1d_offset]), |
2224 | 128k | NOT_USED_VALUE, |
2225 | 128k | tx_size_uv, |
2226 | 128k | &ctx->three_quad_energy, |
2227 | 128k | ctx->hbd_md ? EB_TEN_BIT : EB_EIGHT_BIT, |
2228 | 128k | cand_bf->cand->transform_type_uv, |
2229 | 128k | PLANE_TYPE_UV, |
2230 | 128k | pf_shape); |
2231 | 128k | int32_t seg_qp = pcs->ppcs->frm_hdr.segmentation_params.segmentation_enabled |
2232 | 128k | ? pcs->ppcs->frm_hdr.segmentation_params.feature_data[ctx->blk_ptr->segment_id][SEG_LVL_ALT_Q] |
2233 | 128k | : 0; |
2234 | 128k | cand_bf->quant_dc.v[txb_itr] = svt_aom_quantize_inv_quantize( |
2235 | 128k | pcs, |
2236 | 128k | ctx, |
2237 | 128k | &(((int32_t*)ctx->tx_coeffs->v_buffer)[txb_1d_offset]), |
2238 | 128k | &(((int32_t*)cand_bf->quant->v_buffer)[txb_1d_offset]), |
2239 | 128k | &(((int32_t*)cand_bf->rec_coeff->v_buffer)[txb_1d_offset]), |
2240 | 128k | chroma_qindex, |
2241 | 128k | seg_qp, |
2242 | 128k | tx_size_uv, |
2243 | 128k | &cand_bf->eob.v[txb_itr], |
2244 | 128k | COMPONENT_CHROMA_CR, |
2245 | 128k | ctx->hbd_md ? EB_TEN_BIT : EB_EIGHT_BIT, |
2246 | 128k | cand_bf->cand->transform_type_uv, |
2247 | 128k | ctx->cr_txb_skip_context, |
2248 | 128k | ctx->cr_dc_sign_context, |
2249 | 128k | cand_bf->cand->block_mi.mode, |
2250 | 128k | full_lambda, |
2251 | 128k | false); |
2252 | 128k | if (is_full_loop && ctx->mds_do_spatial_sse) { |
2253 | 127k | uint32_t cr_has_coeff = cand_bf->eob.v[txb_itr] > 0; |
2254 | | |
2255 | 127k | if (cr_has_coeff) { |
2256 | 5.46k | svt_aom_inv_transform_recon_wrapper(pcs, |
2257 | 5.46k | ctx, |
2258 | 5.46k | cand_bf->pred->v_buffer, |
2259 | 5.46k | tu_cr_origin_index, |
2260 | 5.46k | cand_bf->pred->v_stride, |
2261 | 5.46k | cand_bf->recon->v_buffer, |
2262 | 5.46k | tu_cr_origin_index, |
2263 | 5.46k | cand_bf->recon->v_stride, |
2264 | 5.46k | (int32_t*)cand_bf->rec_coeff->v_buffer, |
2265 | 5.46k | txb_1d_offset, |
2266 | 5.46k | ctx->hbd_md, |
2267 | 5.46k | tx_size_uv, |
2268 | 5.46k | cand_bf->cand->transform_type_uv, |
2269 | 5.46k | PLANE_TYPE_UV, |
2270 | 5.46k | (uint32_t)cand_bf->eob.v[txb_itr]); |
2271 | 121k | } else { |
2272 | 121k | svt_av1_picture_copy_cr(cand_bf->pred, |
2273 | 121k | tu_cb_origin_index, |
2274 | 121k | cand_bf->recon, |
2275 | 121k | tu_cb_origin_index, |
2276 | 121k | tx_width_uv, |
2277 | 121k | tx_height_uv, |
2278 | 121k | ctx->hbd_md); |
2279 | 121k | } |
2280 | 127k | const uint32_t input_chroma_txb_origin_index = ((ROUND_UV(ctx->blk_org_x + txb_origin_x)) >> 1) + |
2281 | 127k | ((ROUND_UV(ctx->blk_org_y + txb_origin_y)) >> 1) * input_pic->v_stride; |
2282 | 127k | const int32_t txb_uv_origin_index = (ROUND_UV(txb_origin_x) + |
2283 | 127k | (ROUND_UV(txb_origin_y) * cand_bf->quant->v_stride)) >> |
2284 | 127k | 1; |
2285 | | |
2286 | 128k | if (ssim_level == SSIM_LVL_1 || ssim_level == SSIM_LVL_3) { |
2287 | 0 | txb_full_distortion[DIST_SSIM][2][DIST_CALC_PREDICTION] = svt_spatial_full_distortion_ssim_kernel( |
2288 | 0 | input_pic->v_buffer, |
2289 | 0 | input_chroma_txb_origin_index, |
2290 | 0 | input_pic->v_stride, |
2291 | 0 | cand_bf->pred->v_buffer, |
2292 | 0 | txb_uv_origin_index, |
2293 | 0 | cand_bf->pred->v_stride, |
2294 | 0 | cropped_tx_width_uv, |
2295 | 0 | cropped_tx_height_uv, |
2296 | 0 | ctx->hbd_md, |
2297 | 0 | effective_ac_bias); |
2298 | |
|
2299 | 0 | txb_full_distortion[DIST_SSIM][2][DIST_CALC_RESIDUAL] = svt_spatial_full_distortion_ssim_kernel( |
2300 | 0 | input_pic->v_buffer, |
2301 | 0 | input_chroma_txb_origin_index, |
2302 | 0 | input_pic->v_stride, |
2303 | 0 | cand_bf->recon->v_buffer, |
2304 | 0 | txb_uv_origin_index, |
2305 | 0 | cand_bf->recon->v_stride, |
2306 | 0 | cropped_tx_width_uv, |
2307 | 0 | cropped_tx_height_uv, |
2308 | 0 | ctx->hbd_md, |
2309 | 0 | effective_ac_bias); |
2310 | |
|
2311 | 0 | txb_full_distortion[DIST_SSIM][2][DIST_CALC_PREDICTION] <<= 4; |
2312 | 0 | txb_full_distortion[DIST_SSIM][2][DIST_CALC_RESIDUAL] <<= 4; |
2313 | 0 | } |
2314 | 127k | txb_full_distortion[DIST_SSD][2][DIST_CALC_PREDICTION] = spatial_full_dist_type_fun( |
2315 | 127k | input_pic->v_buffer, |
2316 | 127k | input_chroma_txb_origin_index, |
2317 | 127k | input_pic->v_stride, |
2318 | 127k | cand_bf->pred->v_buffer, |
2319 | 127k | txb_uv_origin_index, |
2320 | 127k | cand_bf->pred->v_stride, |
2321 | 127k | cropped_tx_width_uv, |
2322 | 127k | cropped_tx_height_uv); |
2323 | 127k | if (effective_ac_bias) { |
2324 | 0 | txb_full_distortion[DIST_SSD][2][DIST_CALC_PREDICTION] += get_svt_psy_full_dist( |
2325 | 0 | input_pic->v_buffer, |
2326 | 0 | input_chroma_txb_origin_index, |
2327 | 0 | input_pic->v_stride, |
2328 | 0 | cand_bf->pred->v_buffer, |
2329 | 0 | txb_uv_origin_index, |
2330 | 0 | cand_bf->pred->v_stride, |
2331 | 0 | cropped_tx_width_uv, |
2332 | 0 | cropped_tx_height_uv, |
2333 | 0 | ctx->hbd_md, |
2334 | 0 | effective_ac_bias); |
2335 | 0 | } |
2336 | | |
2337 | 127k | txb_full_distortion[DIST_SSD][2][DIST_CALC_RESIDUAL] = spatial_full_dist_type_fun( |
2338 | 127k | input_pic->v_buffer, |
2339 | 127k | input_chroma_txb_origin_index, |
2340 | 127k | input_pic->v_stride, |
2341 | 127k | cand_bf->recon->v_buffer, |
2342 | 127k | txb_uv_origin_index, |
2343 | 127k | cand_bf->recon->v_stride, |
2344 | 127k | cropped_tx_width_uv, |
2345 | 127k | cropped_tx_height_uv); |
2346 | 127k | if (effective_ac_bias) { |
2347 | 0 | txb_full_distortion[DIST_SSD][2][DIST_CALC_RESIDUAL] += get_svt_psy_full_dist( |
2348 | 0 | input_pic->v_buffer, |
2349 | 0 | input_chroma_txb_origin_index, |
2350 | 0 | input_pic->v_stride, |
2351 | 0 | cand_bf->recon->v_buffer, |
2352 | 0 | txb_uv_origin_index, |
2353 | 0 | cand_bf->recon->v_stride, |
2354 | 0 | cropped_tx_width_uv, |
2355 | 0 | cropped_tx_height_uv, |
2356 | 0 | ctx->hbd_md, |
2357 | 0 | effective_ac_bias); |
2358 | 0 | } |
2359 | | |
2360 | 127k | txb_full_distortion[DIST_SSD][2][DIST_CALC_PREDICTION] <<= 4; |
2361 | 127k | txb_full_distortion[DIST_SSD][2][DIST_CALC_RESIDUAL] <<= 4; |
2362 | 127k | } else { |
2363 | | // *Full Distortion (SSE) |
2364 | | // *Note - there are known issues with how this distortion metric is currently |
2365 | | // calculated. The amount of scaling between the two arrays is not |
2366 | | // equivalent. |
2367 | 913 | uint32_t bwidth = tx_width_uv; |
2368 | 913 | uint32_t bheight = tx_height_uv; |
2369 | 913 | if (pf_shape) { |
2370 | 0 | bwidth = MAX((bwidth >> pf_shape), 4); |
2371 | 0 | bheight = (bheight >> pf_shape); |
2372 | 0 | } |
2373 | 913 | svt_aom_picture_full_distortion32_bits_single( |
2374 | 913 | &(((int32_t*)ctx->tx_coeffs->v_buffer)[txb_1d_offset]), |
2375 | 913 | &(((int32_t*)cand_bf->rec_coeff->v_buffer)[txb_1d_offset]), |
2376 | 913 | tx_width_uv, |
2377 | 913 | bwidth, |
2378 | 913 | bheight, |
2379 | 913 | txb_full_distortion[DIST_SSD][2], |
2380 | 913 | cand_bf->eob.v[txb_itr]); |
2381 | | |
2382 | 913 | const int32_t chroma_shift = (MAX_TX_SCALE - av1_get_tx_scale_tab[tx_size_uv]) * 2; |
2383 | 913 | txb_full_distortion[DIST_SSD][2][DIST_CALC_RESIDUAL] = RIGHT_SIGNED_SHIFT( |
2384 | 913 | txb_full_distortion[DIST_SSD][2][DIST_CALC_RESIDUAL], chroma_shift); |
2385 | 913 | txb_full_distortion[DIST_SSD][2][DIST_CALC_PREDICTION] = RIGHT_SIGNED_SHIFT( |
2386 | 913 | txb_full_distortion[DIST_SSD][2][DIST_CALC_PREDICTION], chroma_shift); |
2387 | 913 | } |
2388 | 128k | cand_bf->v_has_coeff |= ((cand_bf->eob.v[txb_itr] != 0) << txb_itr); |
2389 | 128k | cr_full_distortion[DIST_SSIM][DIST_CALC_RESIDUAL] += txb_full_distortion[DIST_SSIM][2][DIST_CALC_RESIDUAL]; |
2390 | 128k | cr_full_distortion[DIST_SSIM][DIST_CALC_PREDICTION] += |
2391 | 128k | txb_full_distortion[DIST_SSIM][2][DIST_CALC_PREDICTION]; |
2392 | | |
2393 | 128k | cr_full_distortion[DIST_SSD][DIST_CALC_RESIDUAL] += txb_full_distortion[DIST_SSD][2][DIST_CALC_RESIDUAL]; |
2394 | 128k | cr_full_distortion[DIST_SSD][DIST_CALC_PREDICTION] += |
2395 | 128k | txb_full_distortion[DIST_SSD][2][DIST_CALC_PREDICTION]; |
2396 | 128k | } |
2397 | | |
2398 | 127k | const uint32_t txb_origin_index = txb_origin_x + txb_origin_y * cand_bf->quant->y_stride; |
2399 | | |
2400 | | // Reset the Bit Costs |
2401 | 127k | uint64_t y_txb_coeff_bits = 0; |
2402 | 127k | uint64_t cb_txb_coeff_bits = 0; |
2403 | 127k | uint64_t cr_txb_coeff_bits = 0; |
2404 | | |
2405 | | //CHROMA-ONLY |
2406 | 127k | svt_aom_txb_estimate_coeff_bits(ctx, |
2407 | 127k | 0, |
2408 | 127k | NULL, |
2409 | 127k | pcs, |
2410 | 127k | cand_bf, |
2411 | 127k | txb_origin_index, |
2412 | 127k | txb_1d_offset, |
2413 | 127k | cand_bf->quant, |
2414 | 127k | cand_bf->eob.y[txb_itr], |
2415 | 127k | cand_bf->eob.u[txb_itr], |
2416 | 127k | cand_bf->eob.v[txb_itr], |
2417 | 127k | &y_txb_coeff_bits, |
2418 | 127k | &cb_txb_coeff_bits, |
2419 | 127k | &cr_txb_coeff_bits, |
2420 | 127k | tx_size, |
2421 | 127k | tx_size_uv, |
2422 | 127k | cand_bf->cand->transform_type[txb_itr], |
2423 | 127k | cand_bf->cand->transform_type_uv, |
2424 | 127k | component_type); |
2425 | | |
2426 | 127k | *cb_coeff_bits += cb_txb_coeff_bits; |
2427 | 127k | *cr_coeff_bits += cr_txb_coeff_bits; |
2428 | 127k | txb_1d_offset += tx_width_uv * tx_height_uv; |
2429 | | |
2430 | 127k | ++txb_itr; |
2431 | 127k | } while (txb_itr < tu_count); |
2432 | 127k | } |
2433 | | |
2434 | | /* |
2435 | | check if we need to do inverse transform and recon |
2436 | | */ |
2437 | 248k | uint8_t svt_aom_do_md_recon(PictureParentControlSet* pcs, ModeDecisionContext* ctx) { |
2438 | 248k | const uint8_t encdec_bypass = ctx->bypass_encdec && |
2439 | 248k | (ctx->pd_pass == PD_PASS_1); // if enc dec is bypassed MD has to produce the final recon |
2440 | 248k | const uint8_t need_md_rec_for_intra_pred = !ctx->skip_intra || |
2441 | 0 | ctx->inter_intra_comp_ctrls.enabled; // for intra prediction of current frame |
2442 | 248k | const uint8_t need_md_rec_for_ref = (pcs->is_ref || pcs->scs->static_config.recon_enabled) && |
2443 | 0 | encdec_bypass; // for inter prediction of future frame or if recon is being output |
2444 | 248k | const uint8_t need_md_rec_for_dlf_search = pcs->dlf_ctrls.enabled; // for DLF levels |
2445 | 248k | const uint8_t need_md_rec_for_cdef_search = pcs->cdef_search_ctrls.enabled && |
2446 | 248k | !pcs->cdef_search_ctrls.use_qp_strength && |
2447 | 0 | !pcs->cdef_search_ctrls.use_reference_cdef_fs; // CDEF search levels needing the recon samples |
2448 | 248k | const uint8_t need_md_rec_for_restoration_search = pcs->enable_restoration; // any resoration search level |
2449 | 248k | const uint8_t need_md_rec_for_quality = (pcs->compute_psnr || pcs->compute_ssim) && |
2450 | 0 | (ctx->pd_pass == PD_PASS_1); // stat report needs recon samples for metrics |
2451 | 248k | uint8_t do_recon; |
2452 | 248k | if (need_md_rec_for_intra_pred || need_md_rec_for_ref || need_md_rec_for_dlf_search || |
2453 | 248k | need_md_rec_for_cdef_search || need_md_rec_for_restoration_search || need_md_rec_for_quality) { |
2454 | 248k | do_recon = 1; |
2455 | 248k | } else { |
2456 | 55 | do_recon = 0; |
2457 | 55 | } |
2458 | | |
2459 | 248k | return do_recon; |
2460 | 248k | } |