/src/libavif/ext/aom/av1/encoder/av1_quantize.c
Line | Count | Source |
1 | | /* |
2 | | * Copyright (c) 2016, Alliance for Open Media. All rights reserved. |
3 | | * |
4 | | * This source code is subject to the terms of the BSD 2 Clause License and |
5 | | * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License |
6 | | * was not distributed with this source code in the LICENSE file, you can |
7 | | * obtain it at www.aomedia.org/license/software. If the Alliance for Open |
8 | | * Media Patent License 1.0 was not distributed with this source code in the |
9 | | * PATENTS file, you can obtain it at www.aomedia.org/license/patent. |
10 | | */ |
11 | | |
12 | | #include <math.h> |
13 | | |
14 | | #include "config/aom_dsp_rtcd.h" |
15 | | |
16 | | #include "aom/aomcx.h" |
17 | | #include "aom_dsp/quantize.h" |
18 | | #include "aom_mem/aom_mem.h" |
19 | | #include "aom_ports/bitops.h" |
20 | | #include "aom_ports/mem.h" |
21 | | |
22 | | #include "av1/common/idct.h" |
23 | | #include "av1/common/quant_common.h" |
24 | | #include "av1/common/scan.h" |
25 | | #include "av1/common/seg_common.h" |
26 | | |
27 | | #include "av1/encoder/av1_quantize.h" |
28 | | #include "av1/encoder/encoder.h" |
29 | | #include "av1/encoder/rd.h" |
30 | | |
31 | | void av1_quantize_skip(intptr_t n_coeffs, tran_low_t *qcoeff_ptr, |
32 | 0 | tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr) { |
33 | 0 | memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); |
34 | 0 | memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); |
35 | 0 | *eob_ptr = 0; |
36 | 0 | } |
37 | | |
38 | | int av1_quantize_fp_no_qmatrix(const int16_t quant_ptr[2], |
39 | | const int16_t dequant_ptr[2], |
40 | | const int16_t round_ptr[2], int log_scale, |
41 | | const int16_t *scan, int coeff_count, |
42 | | const tran_low_t *coeff_ptr, |
43 | | tran_low_t *qcoeff_ptr, |
44 | 0 | tran_low_t *dqcoeff_ptr) { |
45 | 0 | memset(qcoeff_ptr, 0, coeff_count * sizeof(*qcoeff_ptr)); |
46 | 0 | memset(dqcoeff_ptr, 0, coeff_count * sizeof(*dqcoeff_ptr)); |
47 | 0 | const int rounding[2] = { ROUND_POWER_OF_TWO(round_ptr[0], log_scale), |
48 | 0 | ROUND_POWER_OF_TWO(round_ptr[1], log_scale) }; |
49 | 0 | int eob = 0; |
50 | 0 | for (int i = 0; i < coeff_count; i++) { |
51 | 0 | const int rc = scan[i]; |
52 | 0 | const int32_t thresh = (int32_t)(dequant_ptr[rc != 0]); |
53 | 0 | const int coeff = coeff_ptr[rc]; |
54 | 0 | const int coeff_sign = AOMSIGN(coeff); |
55 | 0 | int64_t abs_coeff = (coeff ^ coeff_sign) - coeff_sign; |
56 | 0 | int tmp32 = 0; |
57 | 0 | if ((abs_coeff << (1 + log_scale)) >= thresh) { |
58 | 0 | abs_coeff = clamp64(abs_coeff + rounding[rc != 0], INT16_MIN, INT16_MAX); |
59 | 0 | tmp32 = (int)((abs_coeff * quant_ptr[rc != 0]) >> (16 - log_scale)); |
60 | 0 | if (tmp32) { |
61 | 0 | qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign; |
62 | 0 | const tran_low_t abs_dqcoeff = |
63 | 0 | (tmp32 * dequant_ptr[rc != 0]) >> log_scale; |
64 | 0 | dqcoeff_ptr[rc] = (abs_dqcoeff ^ coeff_sign) - coeff_sign; |
65 | 0 | } |
66 | 0 | } |
67 | 0 | if (tmp32) eob = i + 1; |
68 | 0 | } |
69 | 0 | return eob; |
70 | 0 | } |
71 | | |
72 | | static void quantize_fp_helper_c( |
73 | | const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, |
74 | | const int16_t *round_ptr, const int16_t *quant_ptr, |
75 | | const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, |
76 | | tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, |
77 | | const int16_t *scan, const int16_t *iscan, const qm_val_t *qm_ptr, |
78 | 0 | const qm_val_t *iqm_ptr, int log_scale) { |
79 | 0 | int i, eob = -1; |
80 | 0 | const int rounding[2] = { ROUND_POWER_OF_TWO(round_ptr[0], log_scale), |
81 | 0 | ROUND_POWER_OF_TWO(round_ptr[1], log_scale) }; |
82 | | // TODO(jingning) Decide the need of these arguments after the |
83 | | // quantization process is completed. |
84 | 0 | (void)zbin_ptr; |
85 | 0 | (void)quant_shift_ptr; |
86 | 0 | (void)iscan; |
87 | |
|
88 | 0 | memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); |
89 | 0 | memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); |
90 | |
|
91 | 0 | if (qm_ptr == NULL && iqm_ptr == NULL) { |
92 | 0 | *eob_ptr = av1_quantize_fp_no_qmatrix(quant_ptr, dequant_ptr, round_ptr, |
93 | 0 | log_scale, scan, (int)n_coeffs, |
94 | 0 | coeff_ptr, qcoeff_ptr, dqcoeff_ptr); |
95 | 0 | } else { |
96 | | // Quantization pass: All coefficients with index >= zero_flag are |
97 | | // skippable. Note: zero_flag can be zero. |
98 | 0 | for (i = 0; i < n_coeffs; i++) { |
99 | 0 | const int rc = scan[i]; |
100 | 0 | const int coeff = coeff_ptr[rc]; |
101 | 0 | const qm_val_t wt = qm_ptr ? qm_ptr[rc] : (1 << AOM_QM_BITS); |
102 | 0 | const qm_val_t iwt = iqm_ptr ? iqm_ptr[rc] : (1 << AOM_QM_BITS); |
103 | 0 | const int dequant = |
104 | 0 | (dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >> |
105 | 0 | AOM_QM_BITS; |
106 | 0 | const int coeff_sign = AOMSIGN(coeff); |
107 | 0 | int64_t abs_coeff = (coeff ^ coeff_sign) - coeff_sign; |
108 | 0 | int tmp32 = 0; |
109 | 0 | if (abs_coeff * wt >= |
110 | 0 | (dequant_ptr[rc != 0] << (AOM_QM_BITS - (1 + log_scale)))) { |
111 | 0 | abs_coeff += rounding[rc != 0]; |
112 | 0 | abs_coeff = clamp64(abs_coeff, INT16_MIN, INT16_MAX); |
113 | 0 | tmp32 = (int)((abs_coeff * wt * quant_ptr[rc != 0]) >> |
114 | 0 | (16 - log_scale + AOM_QM_BITS)); |
115 | 0 | qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign; |
116 | 0 | const tran_low_t abs_dqcoeff = (tmp32 * dequant) >> log_scale; |
117 | 0 | dqcoeff_ptr[rc] = (abs_dqcoeff ^ coeff_sign) - coeff_sign; |
118 | 0 | } |
119 | |
|
120 | 0 | if (tmp32) eob = i; |
121 | 0 | } |
122 | 0 | *eob_ptr = eob + 1; |
123 | 0 | } |
124 | 0 | } |
125 | | |
126 | | #if CONFIG_AV1_HIGHBITDEPTH |
127 | | static void highbd_quantize_fp_helper_c( |
128 | | const tran_low_t *coeff_ptr, intptr_t count, const int16_t *zbin_ptr, |
129 | | const int16_t *round_ptr, const int16_t *quant_ptr, |
130 | | const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, |
131 | | tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, |
132 | | const int16_t *scan, const int16_t *iscan, const qm_val_t *qm_ptr, |
133 | 0 | const qm_val_t *iqm_ptr, int log_scale) { |
134 | 0 | int i; |
135 | 0 | int eob = -1; |
136 | 0 | const int shift = 16 - log_scale; |
137 | | // TODO(jingning) Decide the need of these arguments after the |
138 | | // quantization process is completed. |
139 | 0 | (void)zbin_ptr; |
140 | 0 | (void)quant_shift_ptr; |
141 | 0 | (void)iscan; |
142 | |
|
143 | 0 | if (qm_ptr || iqm_ptr) { |
144 | | // Quantization pass: All coefficients with index >= zero_flag are |
145 | | // skippable. Note: zero_flag can be zero. |
146 | 0 | for (i = 0; i < count; i++) { |
147 | 0 | const int rc = scan[i]; |
148 | 0 | const int coeff = coeff_ptr[rc]; |
149 | 0 | const qm_val_t wt = qm_ptr != NULL ? qm_ptr[rc] : (1 << AOM_QM_BITS); |
150 | 0 | const qm_val_t iwt = iqm_ptr != NULL ? iqm_ptr[rc] : (1 << AOM_QM_BITS); |
151 | 0 | const int dequant = |
152 | 0 | (dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >> |
153 | 0 | AOM_QM_BITS; |
154 | 0 | const int coeff_sign = AOMSIGN(coeff); |
155 | 0 | const int64_t abs_coeff = (coeff ^ coeff_sign) - coeff_sign; |
156 | 0 | int abs_qcoeff = 0; |
157 | 0 | if (abs_coeff * wt >= |
158 | 0 | (dequant_ptr[rc != 0] << (AOM_QM_BITS - (1 + log_scale)))) { |
159 | 0 | const int64_t tmp = |
160 | 0 | abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], log_scale); |
161 | 0 | abs_qcoeff = |
162 | 0 | (int)((tmp * quant_ptr[rc != 0] * wt) >> (shift + AOM_QM_BITS)); |
163 | 0 | qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign); |
164 | 0 | const tran_low_t abs_dqcoeff = (abs_qcoeff * dequant) >> log_scale; |
165 | 0 | dqcoeff_ptr[rc] = (tran_low_t)((abs_dqcoeff ^ coeff_sign) - coeff_sign); |
166 | 0 | if (abs_qcoeff) eob = i; |
167 | 0 | } else { |
168 | 0 | qcoeff_ptr[rc] = 0; |
169 | 0 | dqcoeff_ptr[rc] = 0; |
170 | 0 | } |
171 | 0 | } |
172 | 0 | } else { |
173 | 0 | const int log_scaled_round_arr[2] = { |
174 | 0 | ROUND_POWER_OF_TWO(round_ptr[0], log_scale), |
175 | 0 | ROUND_POWER_OF_TWO(round_ptr[1], log_scale), |
176 | 0 | }; |
177 | 0 | for (i = 0; i < count; i++) { |
178 | 0 | const int rc = scan[i]; |
179 | 0 | const int coeff = coeff_ptr[rc]; |
180 | 0 | const int rc01 = (rc != 0); |
181 | 0 | const int coeff_sign = AOMSIGN(coeff); |
182 | 0 | const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign; |
183 | 0 | const int log_scaled_round = log_scaled_round_arr[rc01]; |
184 | 0 | if ((abs_coeff << (1 + log_scale)) >= dequant_ptr[rc01]) { |
185 | 0 | const int quant = quant_ptr[rc01]; |
186 | 0 | const int dequant = dequant_ptr[rc01]; |
187 | 0 | const int64_t tmp = (int64_t)abs_coeff + log_scaled_round; |
188 | 0 | const int abs_qcoeff = (int)((tmp * quant) >> shift); |
189 | 0 | qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign); |
190 | 0 | const tran_low_t abs_dqcoeff = (abs_qcoeff * dequant) >> log_scale; |
191 | 0 | if (abs_qcoeff) eob = i; |
192 | 0 | dqcoeff_ptr[rc] = (tran_low_t)((abs_dqcoeff ^ coeff_sign) - coeff_sign); |
193 | 0 | } else { |
194 | 0 | qcoeff_ptr[rc] = 0; |
195 | 0 | dqcoeff_ptr[rc] = 0; |
196 | 0 | } |
197 | 0 | } |
198 | 0 | } |
199 | 0 | *eob_ptr = eob + 1; |
200 | 0 | } |
201 | | #endif // CONFIG_AV1_HIGHBITDEPTH |
202 | | |
203 | | void av1_quantize_fp_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, |
204 | | const int16_t *zbin_ptr, const int16_t *round_ptr, |
205 | | const int16_t *quant_ptr, const int16_t *quant_shift_ptr, |
206 | | tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, |
207 | | const int16_t *dequant_ptr, uint16_t *eob_ptr, |
208 | 0 | const int16_t *scan, const int16_t *iscan) { |
209 | 0 | quantize_fp_helper_c(coeff_ptr, n_coeffs, zbin_ptr, round_ptr, quant_ptr, |
210 | 0 | quant_shift_ptr, qcoeff_ptr, dqcoeff_ptr, dequant_ptr, |
211 | 0 | eob_ptr, scan, iscan, NULL, NULL, 0); |
212 | 0 | } |
213 | | |
214 | | void av1_quantize_lp_c(const int16_t *coeff_ptr, intptr_t n_coeffs, |
215 | | const int16_t *round_ptr, const int16_t *quant_ptr, |
216 | | int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, |
217 | | const int16_t *dequant_ptr, uint16_t *eob_ptr, |
218 | 0 | const int16_t *scan, const int16_t *iscan) { |
219 | 0 | (void)iscan; |
220 | 0 | int eob = -1; |
221 | |
|
222 | 0 | memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); |
223 | 0 | memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); |
224 | | |
225 | | // Quantization pass: All coefficients with index >= zero_flag are |
226 | | // skippable. Note: zero_flag can be zero. |
227 | 0 | for (int i = 0; i < n_coeffs; i++) { |
228 | 0 | const int rc = scan[i]; |
229 | 0 | const int coeff = coeff_ptr[rc]; |
230 | 0 | const int coeff_sign = AOMSIGN(coeff); |
231 | 0 | const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign; |
232 | |
|
233 | 0 | int tmp = clamp(abs_coeff + round_ptr[rc != 0], INT16_MIN, INT16_MAX); |
234 | 0 | tmp = (tmp * quant_ptr[rc != 0]) >> 16; |
235 | |
|
236 | 0 | qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign; |
237 | 0 | dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0]; |
238 | |
|
239 | 0 | if (tmp) eob = i; |
240 | 0 | } |
241 | 0 | *eob_ptr = eob + 1; |
242 | 0 | } |
243 | | |
244 | | void av1_quantize_fp_32x32_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, |
245 | | const int16_t *zbin_ptr, const int16_t *round_ptr, |
246 | | const int16_t *quant_ptr, |
247 | | const int16_t *quant_shift_ptr, |
248 | | tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, |
249 | | const int16_t *dequant_ptr, uint16_t *eob_ptr, |
250 | 0 | const int16_t *scan, const int16_t *iscan) { |
251 | 0 | quantize_fp_helper_c(coeff_ptr, n_coeffs, zbin_ptr, round_ptr, quant_ptr, |
252 | 0 | quant_shift_ptr, qcoeff_ptr, dqcoeff_ptr, dequant_ptr, |
253 | 0 | eob_ptr, scan, iscan, NULL, NULL, 1); |
254 | 0 | } |
255 | | |
256 | | void av1_quantize_fp_64x64_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, |
257 | | const int16_t *zbin_ptr, const int16_t *round_ptr, |
258 | | const int16_t *quant_ptr, |
259 | | const int16_t *quant_shift_ptr, |
260 | | tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, |
261 | | const int16_t *dequant_ptr, uint16_t *eob_ptr, |
262 | 0 | const int16_t *scan, const int16_t *iscan) { |
263 | 0 | quantize_fp_helper_c(coeff_ptr, n_coeffs, zbin_ptr, round_ptr, quant_ptr, |
264 | 0 | quant_shift_ptr, qcoeff_ptr, dqcoeff_ptr, dequant_ptr, |
265 | 0 | eob_ptr, scan, iscan, NULL, NULL, 2); |
266 | 0 | } |
267 | | |
268 | | void av1_quantize_fp_facade(const tran_low_t *coeff_ptr, intptr_t n_coeffs, |
269 | | const MACROBLOCK_PLANE *p, tran_low_t *qcoeff_ptr, |
270 | | tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, |
271 | 107M | const SCAN_ORDER *sc, const QUANT_PARAM *qparam) { |
272 | 107M | const qm_val_t *qm_ptr = qparam->qmatrix; |
273 | 107M | const qm_val_t *iqm_ptr = qparam->iqmatrix; |
274 | 107M | if (qm_ptr != NULL && iqm_ptr != NULL) { |
275 | 0 | quantize_fp_helper_c(coeff_ptr, n_coeffs, p->zbin_QTX, p->round_fp_QTX, |
276 | 0 | p->quant_fp_QTX, p->quant_shift_QTX, qcoeff_ptr, |
277 | 0 | dqcoeff_ptr, p->dequant_QTX, eob_ptr, sc->scan, |
278 | 0 | sc->iscan, qm_ptr, iqm_ptr, qparam->log_scale); |
279 | 107M | } else { |
280 | 107M | switch (qparam->log_scale) { |
281 | 107M | case 0: |
282 | 107M | av1_quantize_fp(coeff_ptr, n_coeffs, p->zbin_QTX, p->round_fp_QTX, |
283 | 107M | p->quant_fp_QTX, p->quant_shift_QTX, qcoeff_ptr, |
284 | 107M | dqcoeff_ptr, p->dequant_QTX, eob_ptr, sc->scan, |
285 | 107M | sc->iscan); |
286 | 107M | break; |
287 | 719k | case 1: |
288 | 719k | av1_quantize_fp_32x32(coeff_ptr, n_coeffs, p->zbin_QTX, p->round_fp_QTX, |
289 | 719k | p->quant_fp_QTX, p->quant_shift_QTX, qcoeff_ptr, |
290 | 719k | dqcoeff_ptr, p->dequant_QTX, eob_ptr, sc->scan, |
291 | 719k | sc->iscan); |
292 | 719k | break; |
293 | 12.0k | case 2: |
294 | 12.0k | av1_quantize_fp_64x64(coeff_ptr, n_coeffs, p->zbin_QTX, p->round_fp_QTX, |
295 | 12.0k | p->quant_fp_QTX, p->quant_shift_QTX, qcoeff_ptr, |
296 | 12.0k | dqcoeff_ptr, p->dequant_QTX, eob_ptr, sc->scan, |
297 | 12.0k | sc->iscan); |
298 | 12.0k | break; |
299 | 0 | default: assert(0); |
300 | 107M | } |
301 | 107M | } |
302 | 107M | } |
303 | | |
304 | | void av1_quantize_b_facade(const tran_low_t *coeff_ptr, intptr_t n_coeffs, |
305 | | const MACROBLOCK_PLANE *p, tran_low_t *qcoeff_ptr, |
306 | | tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, |
307 | 84.8M | const SCAN_ORDER *sc, const QUANT_PARAM *qparam) { |
308 | 84.8M | const qm_val_t *qm_ptr = qparam->qmatrix; |
309 | 84.8M | const qm_val_t *iqm_ptr = qparam->iqmatrix; |
310 | 84.8M | #if !CONFIG_REALTIME_ONLY |
311 | 84.8M | if (qparam->use_quant_b_adapt) { |
312 | | // TODO(sarahparker) These quantize_b optimizations need SIMD |
313 | | // implementations |
314 | 0 | if (qm_ptr != NULL && iqm_ptr != NULL) { |
315 | 0 | aom_quantize_b_adaptive_helper_c( |
316 | 0 | coeff_ptr, n_coeffs, p->zbin_QTX, p->round_QTX, p->quant_QTX, |
317 | 0 | p->quant_shift_QTX, qcoeff_ptr, dqcoeff_ptr, p->dequant_QTX, eob_ptr, |
318 | 0 | sc->scan, sc->iscan, qm_ptr, iqm_ptr, qparam->log_scale); |
319 | 0 | } else { |
320 | 0 | switch (qparam->log_scale) { |
321 | 0 | case 0: |
322 | 0 | aom_quantize_b_adaptive(coeff_ptr, n_coeffs, p->zbin_QTX, |
323 | 0 | p->round_QTX, p->quant_QTX, |
324 | 0 | p->quant_shift_QTX, qcoeff_ptr, dqcoeff_ptr, |
325 | 0 | p->dequant_QTX, eob_ptr, sc->scan, sc->iscan); |
326 | 0 | break; |
327 | 0 | case 1: |
328 | 0 | aom_quantize_b_32x32_adaptive( |
329 | 0 | coeff_ptr, n_coeffs, p->zbin_QTX, p->round_QTX, p->quant_QTX, |
330 | 0 | p->quant_shift_QTX, qcoeff_ptr, dqcoeff_ptr, p->dequant_QTX, |
331 | 0 | eob_ptr, sc->scan, sc->iscan); |
332 | 0 | break; |
333 | 0 | case 2: |
334 | 0 | aom_quantize_b_64x64_adaptive( |
335 | 0 | coeff_ptr, n_coeffs, p->zbin_QTX, p->round_QTX, p->quant_QTX, |
336 | 0 | p->quant_shift_QTX, qcoeff_ptr, dqcoeff_ptr, p->dequant_QTX, |
337 | 0 | eob_ptr, sc->scan, sc->iscan); |
338 | 0 | break; |
339 | 0 | default: assert(0); |
340 | 0 | } |
341 | 0 | } |
342 | 0 | return; |
343 | 0 | } |
344 | 84.8M | #endif // !CONFIG_REALTIME_ONLY |
345 | | |
346 | 84.8M | if (qm_ptr != NULL && iqm_ptr != NULL) { |
347 | 0 | aom_quantize_b_helper_c(coeff_ptr, n_coeffs, p->zbin_QTX, p->round_QTX, |
348 | 0 | p->quant_QTX, p->quant_shift_QTX, qcoeff_ptr, |
349 | 0 | dqcoeff_ptr, p->dequant_QTX, eob_ptr, sc->scan, |
350 | 0 | sc->iscan, qm_ptr, iqm_ptr, qparam->log_scale); |
351 | 84.8M | } else { |
352 | 84.8M | switch (qparam->log_scale) { |
353 | 83.4M | case 0: |
354 | 83.4M | aom_quantize_b(coeff_ptr, n_coeffs, p->zbin_QTX, p->round_QTX, |
355 | 83.4M | p->quant_QTX, p->quant_shift_QTX, qcoeff_ptr, |
356 | 83.4M | dqcoeff_ptr, p->dequant_QTX, eob_ptr, sc->scan, |
357 | 83.4M | sc->iscan); |
358 | 83.4M | break; |
359 | 1.35M | case 1: |
360 | 1.35M | aom_quantize_b_32x32(coeff_ptr, n_coeffs, p->zbin_QTX, p->round_QTX, |
361 | 1.35M | p->quant_QTX, p->quant_shift_QTX, qcoeff_ptr, |
362 | 1.35M | dqcoeff_ptr, p->dequant_QTX, eob_ptr, sc->scan, |
363 | 1.35M | sc->iscan); |
364 | 1.35M | break; |
365 | 40.4k | case 2: |
366 | 40.4k | aom_quantize_b_64x64(coeff_ptr, n_coeffs, p->zbin_QTX, p->round_QTX, |
367 | 40.4k | p->quant_QTX, p->quant_shift_QTX, qcoeff_ptr, |
368 | 40.4k | dqcoeff_ptr, p->dequant_QTX, eob_ptr, sc->scan, |
369 | 40.4k | sc->iscan); |
370 | 40.4k | break; |
371 | 0 | default: assert(0); |
372 | 84.8M | } |
373 | 84.8M | } |
374 | 84.8M | } |
375 | | |
376 | | static void quantize_dc(const tran_low_t *coeff_ptr, int n_coeffs, |
377 | | int skip_block, const int16_t *round_ptr, |
378 | | const int16_t quant, tran_low_t *qcoeff_ptr, |
379 | | tran_low_t *dqcoeff_ptr, const int16_t dequant_ptr, |
380 | | uint16_t *eob_ptr, const qm_val_t *qm_ptr, |
381 | 0 | const qm_val_t *iqm_ptr, const int log_scale) { |
382 | 0 | const int rc = 0; |
383 | 0 | const int coeff = coeff_ptr[rc]; |
384 | 0 | const int coeff_sign = AOMSIGN(coeff); |
385 | 0 | const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign; |
386 | 0 | int64_t tmp; |
387 | 0 | int eob = -1; |
388 | 0 | int32_t tmp32; |
389 | 0 | int dequant; |
390 | |
|
391 | 0 | memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); |
392 | 0 | memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); |
393 | |
|
394 | 0 | if (!skip_block) { |
395 | 0 | const int wt = qm_ptr != NULL ? qm_ptr[rc] : (1 << AOM_QM_BITS); |
396 | 0 | const int iwt = iqm_ptr != NULL ? iqm_ptr[rc] : (1 << AOM_QM_BITS); |
397 | 0 | tmp = clamp(abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], log_scale), |
398 | 0 | INT16_MIN, INT16_MAX); |
399 | 0 | tmp32 = (int32_t)((tmp * wt * quant) >> (16 - log_scale + AOM_QM_BITS)); |
400 | 0 | qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign; |
401 | 0 | dequant = (dequant_ptr * iwt + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS; |
402 | 0 | const tran_low_t abs_dqcoeff = (tmp32 * dequant) >> log_scale; |
403 | 0 | dqcoeff_ptr[rc] = (tran_low_t)((abs_dqcoeff ^ coeff_sign) - coeff_sign); |
404 | 0 | if (tmp32) eob = 0; |
405 | 0 | } |
406 | 0 | *eob_ptr = eob + 1; |
407 | 0 | } |
408 | | |
409 | | void av1_quantize_dc_facade(const tran_low_t *coeff_ptr, intptr_t n_coeffs, |
410 | | const MACROBLOCK_PLANE *p, tran_low_t *qcoeff_ptr, |
411 | | tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, |
412 | 0 | const SCAN_ORDER *sc, const QUANT_PARAM *qparam) { |
413 | | // obsolete skip_block |
414 | 0 | const int skip_block = 0; |
415 | 0 | (void)sc; |
416 | 0 | assert(qparam->log_scale >= 0 && qparam->log_scale < (3)); |
417 | 0 | const qm_val_t *qm_ptr = qparam->qmatrix; |
418 | 0 | const qm_val_t *iqm_ptr = qparam->iqmatrix; |
419 | 0 | quantize_dc(coeff_ptr, (int)n_coeffs, skip_block, p->round_QTX, |
420 | 0 | p->quant_fp_QTX[0], qcoeff_ptr, dqcoeff_ptr, p->dequant_QTX[0], |
421 | 0 | eob_ptr, qm_ptr, iqm_ptr, qparam->log_scale); |
422 | 0 | } |
423 | | |
424 | | #if CONFIG_AV1_HIGHBITDEPTH |
425 | | void av1_highbd_quantize_fp_facade(const tran_low_t *coeff_ptr, |
426 | | intptr_t n_coeffs, const MACROBLOCK_PLANE *p, |
427 | | tran_low_t *qcoeff_ptr, |
428 | | tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, |
429 | | const SCAN_ORDER *sc, |
430 | 23.3M | const QUANT_PARAM *qparam) { |
431 | 23.3M | const qm_val_t *qm_ptr = qparam->qmatrix; |
432 | 23.3M | const qm_val_t *iqm_ptr = qparam->iqmatrix; |
433 | 23.3M | if (qm_ptr != NULL && iqm_ptr != NULL) { |
434 | 0 | highbd_quantize_fp_helper_c( |
435 | 0 | coeff_ptr, n_coeffs, p->zbin_QTX, p->round_fp_QTX, p->quant_fp_QTX, |
436 | 0 | p->quant_shift_QTX, qcoeff_ptr, dqcoeff_ptr, p->dequant_QTX, eob_ptr, |
437 | 0 | sc->scan, sc->iscan, qm_ptr, iqm_ptr, qparam->log_scale); |
438 | 23.3M | } else { |
439 | 23.3M | av1_highbd_quantize_fp(coeff_ptr, n_coeffs, p->zbin_QTX, p->round_fp_QTX, |
440 | 23.3M | p->quant_fp_QTX, p->quant_shift_QTX, qcoeff_ptr, |
441 | 23.3M | dqcoeff_ptr, p->dequant_QTX, eob_ptr, sc->scan, |
442 | 23.3M | sc->iscan, qparam->log_scale); |
443 | 23.3M | } |
444 | 23.3M | } |
445 | | |
446 | | void av1_highbd_quantize_b_facade(const tran_low_t *coeff_ptr, |
447 | | intptr_t n_coeffs, const MACROBLOCK_PLANE *p, |
448 | | tran_low_t *qcoeff_ptr, |
449 | | tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, |
450 | | const SCAN_ORDER *sc, |
451 | 15.5M | const QUANT_PARAM *qparam) { |
452 | 15.5M | const qm_val_t *qm_ptr = qparam->qmatrix; |
453 | 15.5M | const qm_val_t *iqm_ptr = qparam->iqmatrix; |
454 | 15.5M | #if !CONFIG_REALTIME_ONLY |
455 | 15.5M | if (qparam->use_quant_b_adapt) { |
456 | 0 | if (qm_ptr != NULL && iqm_ptr != NULL) { |
457 | 0 | aom_highbd_quantize_b_adaptive_helper_c( |
458 | 0 | coeff_ptr, n_coeffs, p->zbin_QTX, p->round_QTX, p->quant_QTX, |
459 | 0 | p->quant_shift_QTX, qcoeff_ptr, dqcoeff_ptr, p->dequant_QTX, eob_ptr, |
460 | 0 | sc->scan, sc->iscan, qm_ptr, iqm_ptr, qparam->log_scale); |
461 | 0 | } else { |
462 | 0 | switch (qparam->log_scale) { |
463 | 0 | case 0: |
464 | 0 | aom_highbd_quantize_b_adaptive( |
465 | 0 | coeff_ptr, n_coeffs, p->zbin_QTX, p->round_QTX, p->quant_QTX, |
466 | 0 | p->quant_shift_QTX, qcoeff_ptr, dqcoeff_ptr, p->dequant_QTX, |
467 | 0 | eob_ptr, sc->scan, sc->iscan); |
468 | 0 | break; |
469 | 0 | case 1: |
470 | 0 | aom_highbd_quantize_b_32x32_adaptive( |
471 | 0 | coeff_ptr, n_coeffs, p->zbin_QTX, p->round_QTX, p->quant_QTX, |
472 | 0 | p->quant_shift_QTX, qcoeff_ptr, dqcoeff_ptr, p->dequant_QTX, |
473 | 0 | eob_ptr, sc->scan, sc->iscan); |
474 | 0 | break; |
475 | 0 | case 2: |
476 | 0 | aom_highbd_quantize_b_64x64_adaptive( |
477 | 0 | coeff_ptr, n_coeffs, p->zbin_QTX, p->round_QTX, p->quant_QTX, |
478 | 0 | p->quant_shift_QTX, qcoeff_ptr, dqcoeff_ptr, p->dequant_QTX, |
479 | 0 | eob_ptr, sc->scan, sc->iscan); |
480 | 0 | break; |
481 | 0 | default: assert(0); |
482 | 0 | } |
483 | 0 | } |
484 | 0 | return; |
485 | 0 | } |
486 | 15.5M | #endif // !CONFIG_REALTIME_ONLY |
487 | | |
488 | 15.5M | if (qm_ptr != NULL && iqm_ptr != NULL) { |
489 | 0 | aom_highbd_quantize_b_helper_c( |
490 | 0 | coeff_ptr, n_coeffs, p->zbin_QTX, p->round_QTX, p->quant_QTX, |
491 | 0 | p->quant_shift_QTX, qcoeff_ptr, dqcoeff_ptr, p->dequant_QTX, eob_ptr, |
492 | 0 | sc->scan, sc->iscan, qm_ptr, iqm_ptr, qparam->log_scale); |
493 | 15.5M | } else { |
494 | 15.5M | switch (qparam->log_scale) { |
495 | 15.1M | case 0: |
496 | 15.1M | aom_highbd_quantize_b(coeff_ptr, n_coeffs, p->zbin_QTX, p->round_QTX, |
497 | 15.1M | p->quant_QTX, p->quant_shift_QTX, qcoeff_ptr, |
498 | 15.1M | dqcoeff_ptr, p->dequant_QTX, eob_ptr, sc->scan, |
499 | 15.1M | sc->iscan); |
500 | 15.1M | break; |
501 | 359k | case 1: |
502 | 359k | aom_highbd_quantize_b_32x32( |
503 | 359k | coeff_ptr, n_coeffs, p->zbin_QTX, p->round_QTX, p->quant_QTX, |
504 | 359k | p->quant_shift_QTX, qcoeff_ptr, dqcoeff_ptr, p->dequant_QTX, |
505 | 359k | eob_ptr, sc->scan, sc->iscan); |
506 | 359k | break; |
507 | 10.9k | case 2: |
508 | 10.9k | aom_highbd_quantize_b_64x64( |
509 | 10.9k | coeff_ptr, n_coeffs, p->zbin_QTX, p->round_QTX, p->quant_QTX, |
510 | 10.9k | p->quant_shift_QTX, qcoeff_ptr, dqcoeff_ptr, p->dequant_QTX, |
511 | 10.9k | eob_ptr, sc->scan, sc->iscan); |
512 | 10.9k | break; |
513 | 0 | default: assert(0); |
514 | 15.5M | } |
515 | 15.5M | } |
516 | 15.5M | } |
517 | | |
518 | | static inline void highbd_quantize_dc( |
519 | | const tran_low_t *coeff_ptr, int n_coeffs, int skip_block, |
520 | | const int16_t *round_ptr, const int16_t quant, tran_low_t *qcoeff_ptr, |
521 | | tran_low_t *dqcoeff_ptr, const int16_t dequant_ptr, uint16_t *eob_ptr, |
522 | 0 | const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr, const int log_scale) { |
523 | 0 | int eob = -1; |
524 | |
|
525 | 0 | memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); |
526 | 0 | memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); |
527 | |
|
528 | 0 | if (!skip_block) { |
529 | 0 | const qm_val_t wt = qm_ptr != NULL ? qm_ptr[0] : (1 << AOM_QM_BITS); |
530 | 0 | const qm_val_t iwt = iqm_ptr != NULL ? iqm_ptr[0] : (1 << AOM_QM_BITS); |
531 | 0 | const int coeff = coeff_ptr[0]; |
532 | 0 | const int coeff_sign = AOMSIGN(coeff); |
533 | 0 | const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign; |
534 | 0 | const int64_t tmp = abs_coeff + ROUND_POWER_OF_TWO(round_ptr[0], log_scale); |
535 | 0 | const int64_t tmpw = tmp * wt; |
536 | 0 | const int abs_qcoeff = |
537 | 0 | (int)((tmpw * quant) >> (16 - log_scale + AOM_QM_BITS)); |
538 | 0 | qcoeff_ptr[0] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign); |
539 | 0 | const int dequant = |
540 | 0 | (dequant_ptr * iwt + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS; |
541 | |
|
542 | 0 | const tran_low_t abs_dqcoeff = (abs_qcoeff * dequant) >> log_scale; |
543 | 0 | dqcoeff_ptr[0] = (tran_low_t)((abs_dqcoeff ^ coeff_sign) - coeff_sign); |
544 | 0 | if (abs_qcoeff) eob = 0; |
545 | 0 | } |
546 | 0 | *eob_ptr = eob + 1; |
547 | 0 | } |
548 | | |
549 | | void av1_highbd_quantize_dc_facade(const tran_low_t *coeff_ptr, |
550 | | intptr_t n_coeffs, const MACROBLOCK_PLANE *p, |
551 | | tran_low_t *qcoeff_ptr, |
552 | | tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, |
553 | | const SCAN_ORDER *sc, |
554 | 0 | const QUANT_PARAM *qparam) { |
555 | | // obsolete skip_block |
556 | 0 | const int skip_block = 0; |
557 | 0 | const qm_val_t *qm_ptr = qparam->qmatrix; |
558 | 0 | const qm_val_t *iqm_ptr = qparam->iqmatrix; |
559 | 0 | (void)sc; |
560 | |
|
561 | 0 | highbd_quantize_dc(coeff_ptr, (int)n_coeffs, skip_block, p->round_QTX, |
562 | 0 | p->quant_fp_QTX[0], qcoeff_ptr, dqcoeff_ptr, |
563 | 0 | p->dequant_QTX[0], eob_ptr, qm_ptr, iqm_ptr, |
564 | 0 | qparam->log_scale); |
565 | 0 | } |
566 | | |
567 | | void av1_highbd_quantize_fp_c(const tran_low_t *coeff_ptr, intptr_t count, |
568 | | const int16_t *zbin_ptr, const int16_t *round_ptr, |
569 | | const int16_t *quant_ptr, |
570 | | const int16_t *quant_shift_ptr, |
571 | | tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, |
572 | | const int16_t *dequant_ptr, uint16_t *eob_ptr, |
573 | | const int16_t *scan, const int16_t *iscan, |
574 | 0 | int log_scale) { |
575 | 0 | highbd_quantize_fp_helper_c(coeff_ptr, count, zbin_ptr, round_ptr, quant_ptr, |
576 | 0 | quant_shift_ptr, qcoeff_ptr, dqcoeff_ptr, |
577 | 0 | dequant_ptr, eob_ptr, scan, iscan, NULL, NULL, |
578 | 0 | log_scale); |
579 | 0 | } |
580 | | #endif // CONFIG_AV1_HIGHBITDEPTH |
581 | | |
582 | 112M | static void invert_quant(int16_t *quant, int16_t *shift, int d) { |
583 | 112M | uint32_t t; |
584 | 112M | int l, m; |
585 | 112M | t = d; |
586 | 112M | l = get_msb(t); |
587 | 112M | m = 1 + (1 << (16 + l)) / d; |
588 | 112M | *quant = (int16_t)(m - (1 << 16)); |
589 | 112M | *shift = 1 << (16 - l); |
590 | 112M | } |
591 | | |
592 | 18.7M | static int get_qzbin_factor(int q, aom_bit_depth_t bit_depth) { |
593 | 18.7M | const int quant = av1_dc_quant_QTX(q, 0, bit_depth); |
594 | 18.7M | switch (bit_depth) { |
595 | 13.4M | case AOM_BITS_8: return q == 0 ? 64 : (quant < 148 ? 84 : 80); |
596 | 2.39M | case AOM_BITS_10: return q == 0 ? 64 : (quant < 592 ? 84 : 80); |
597 | 2.95M | case AOM_BITS_12: return q == 0 ? 64 : (quant < 2368 ? 84 : 80); |
598 | 0 | default: |
599 | 0 | assert(0 && "bit_depth should be AOM_BITS_8, AOM_BITS_10 or AOM_BITS_12"); |
600 | 0 | return -1; |
601 | 18.7M | } |
602 | 18.7M | } |
603 | | |
604 | | void av1_build_quantizer(aom_bit_depth_t bit_depth, int y_dc_delta_q, |
605 | | int u_dc_delta_q, int u_ac_delta_q, int v_dc_delta_q, |
606 | | int v_ac_delta_q, QUANTS *const quants, |
607 | 73.2k | Dequants *const deq, int sharpness) { |
608 | 73.2k | int i, q, quant_QTX; |
609 | 73.2k | const int sharpness_adjustment = 16 * (7 - sharpness) / 7; |
610 | | |
611 | 18.8M | for (q = 0; q < QINDEX_RANGE; q++) { |
612 | 18.7M | const int qzbin_factor = get_qzbin_factor(q, bit_depth); |
613 | 18.7M | int qrounding_factor = q == 0 ? 64 : 48; |
614 | | |
615 | 56.2M | for (i = 0; i < 2; ++i) { |
616 | 37.5M | int qrounding_factor_fp = 64; |
617 | | |
618 | 37.5M | if (sharpness != 0 && q != 0) { |
619 | 0 | qrounding_factor = 64 - sharpness_adjustment; |
620 | 0 | qrounding_factor_fp = 64 - sharpness_adjustment; |
621 | 0 | } |
622 | | |
623 | | // y quantizer with TX scale |
624 | 37.5M | quant_QTX = i == 0 ? av1_dc_quant_QTX(q, y_dc_delta_q, bit_depth) |
625 | 37.5M | : av1_ac_quant_QTX(q, 0, bit_depth); |
626 | 37.5M | invert_quant(&quants->y_quant[q][i], &quants->y_quant_shift[q][i], |
627 | 37.5M | quant_QTX); |
628 | 37.5M | quants->y_quant_fp[q][i] = (1 << 16) / quant_QTX; |
629 | 37.5M | quants->y_round_fp[q][i] = (qrounding_factor_fp * quant_QTX) >> 7; |
630 | 37.5M | quants->y_zbin[q][i] = ROUND_POWER_OF_TWO(qzbin_factor * quant_QTX, 7); |
631 | 37.5M | quants->y_round[q][i] = (qrounding_factor * quant_QTX) >> 7; |
632 | 37.5M | deq->y_dequant_QTX[q][i] = quant_QTX; |
633 | | |
634 | | // u quantizer with TX scale |
635 | 37.5M | quant_QTX = i == 0 ? av1_dc_quant_QTX(q, u_dc_delta_q, bit_depth) |
636 | 37.5M | : av1_ac_quant_QTX(q, u_ac_delta_q, bit_depth); |
637 | 37.5M | invert_quant(&quants->u_quant[q][i], &quants->u_quant_shift[q][i], |
638 | 37.5M | quant_QTX); |
639 | 37.5M | quants->u_quant_fp[q][i] = (1 << 16) / quant_QTX; |
640 | 37.5M | quants->u_round_fp[q][i] = (qrounding_factor_fp * quant_QTX) >> 7; |
641 | 37.5M | quants->u_zbin[q][i] = ROUND_POWER_OF_TWO(qzbin_factor * quant_QTX, 7); |
642 | 37.5M | quants->u_round[q][i] = (qrounding_factor * quant_QTX) >> 7; |
643 | 37.5M | deq->u_dequant_QTX[q][i] = quant_QTX; |
644 | | |
645 | | // v quantizer with TX scale |
646 | 37.5M | quant_QTX = i == 0 ? av1_dc_quant_QTX(q, v_dc_delta_q, bit_depth) |
647 | 37.5M | : av1_ac_quant_QTX(q, v_ac_delta_q, bit_depth); |
648 | 37.5M | invert_quant(&quants->v_quant[q][i], &quants->v_quant_shift[q][i], |
649 | 37.5M | quant_QTX); |
650 | 37.5M | quants->v_quant_fp[q][i] = (1 << 16) / quant_QTX; |
651 | 37.5M | quants->v_round_fp[q][i] = (qrounding_factor_fp * quant_QTX) >> 7; |
652 | 37.5M | quants->v_zbin[q][i] = ROUND_POWER_OF_TWO(qzbin_factor * quant_QTX, 7); |
653 | 37.5M | quants->v_round[q][i] = (qrounding_factor * quant_QTX) >> 7; |
654 | 37.5M | deq->v_dequant_QTX[q][i] = quant_QTX; |
655 | 37.5M | } |
656 | | |
657 | 131M | for (i = 2; i < 8; i++) { // 8: SIMD width |
658 | 112M | quants->y_quant[q][i] = quants->y_quant[q][1]; |
659 | 112M | quants->y_quant_fp[q][i] = quants->y_quant_fp[q][1]; |
660 | 112M | quants->y_round_fp[q][i] = quants->y_round_fp[q][1]; |
661 | 112M | quants->y_quant_shift[q][i] = quants->y_quant_shift[q][1]; |
662 | 112M | quants->y_zbin[q][i] = quants->y_zbin[q][1]; |
663 | 112M | quants->y_round[q][i] = quants->y_round[q][1]; |
664 | 112M | deq->y_dequant_QTX[q][i] = deq->y_dequant_QTX[q][1]; |
665 | | |
666 | 112M | quants->u_quant[q][i] = quants->u_quant[q][1]; |
667 | 112M | quants->u_quant_fp[q][i] = quants->u_quant_fp[q][1]; |
668 | 112M | quants->u_round_fp[q][i] = quants->u_round_fp[q][1]; |
669 | 112M | quants->u_quant_shift[q][i] = quants->u_quant_shift[q][1]; |
670 | 112M | quants->u_zbin[q][i] = quants->u_zbin[q][1]; |
671 | 112M | quants->u_round[q][i] = quants->u_round[q][1]; |
672 | 112M | deq->u_dequant_QTX[q][i] = deq->u_dequant_QTX[q][1]; |
673 | | |
674 | 112M | quants->v_quant[q][i] = quants->v_quant[q][1]; |
675 | 112M | quants->v_quant_fp[q][i] = quants->v_quant_fp[q][1]; |
676 | 112M | quants->v_round_fp[q][i] = quants->v_round_fp[q][1]; |
677 | 112M | quants->v_quant_shift[q][i] = quants->v_quant_shift[q][1]; |
678 | 112M | quants->v_zbin[q][i] = quants->v_zbin[q][1]; |
679 | 112M | quants->v_round[q][i] = quants->v_round[q][1]; |
680 | 112M | deq->v_dequant_QTX[q][i] = deq->v_dequant_QTX[q][1]; |
681 | 112M | } |
682 | 18.7M | } |
683 | 73.2k | } |
684 | | |
685 | | static inline bool deltaq_params_have_changed( |
686 | | const DeltaQuantParams *prev_deltaq_params, |
687 | 181k | const CommonQuantParams *quant_params) { |
688 | 181k | return (prev_deltaq_params->y_dc_delta_q != quant_params->y_dc_delta_q || |
689 | 108k | prev_deltaq_params->u_dc_delta_q != quant_params->u_dc_delta_q || |
690 | 108k | prev_deltaq_params->v_dc_delta_q != quant_params->v_dc_delta_q || |
691 | 108k | prev_deltaq_params->u_ac_delta_q != quant_params->u_ac_delta_q || |
692 | 108k | prev_deltaq_params->v_ac_delta_q != quant_params->v_ac_delta_q || |
693 | 108k | prev_deltaq_params->sharpness != quant_params->sharpness); |
694 | 181k | } |
695 | | |
696 | | void av1_init_quantizer(EncQuantDequantParams *const enc_quant_dequant_params, |
697 | | CommonQuantParams *quant_params, |
698 | 181k | aom_bit_depth_t bit_depth, int sharpness) { |
699 | 181k | DeltaQuantParams *const prev_deltaq_params = |
700 | 181k | &enc_quant_dequant_params->prev_deltaq_params; |
701 | 181k | quant_params->sharpness = sharpness; |
702 | | |
703 | | // Re-initialize the quantizer only if any of the dc/ac deltaq parameters |
704 | | // change. |
705 | 181k | if (!deltaq_params_have_changed(prev_deltaq_params, quant_params)) return; |
706 | 73.2k | QUANTS *const quants = &enc_quant_dequant_params->quants; |
707 | 73.2k | Dequants *const dequants = &enc_quant_dequant_params->dequants; |
708 | 73.2k | av1_build_quantizer(bit_depth, quant_params->y_dc_delta_q, |
709 | 73.2k | quant_params->u_dc_delta_q, quant_params->u_ac_delta_q, |
710 | 73.2k | quant_params->v_dc_delta_q, quant_params->v_ac_delta_q, |
711 | 73.2k | quants, dequants, sharpness); |
712 | | |
713 | | // Record the state of deltaq parameters. |
714 | 73.2k | prev_deltaq_params->y_dc_delta_q = quant_params->y_dc_delta_q; |
715 | 73.2k | prev_deltaq_params->u_dc_delta_q = quant_params->u_dc_delta_q; |
716 | 73.2k | prev_deltaq_params->v_dc_delta_q = quant_params->v_dc_delta_q; |
717 | 73.2k | prev_deltaq_params->u_ac_delta_q = quant_params->u_ac_delta_q; |
718 | 73.2k | prev_deltaq_params->v_ac_delta_q = quant_params->v_ac_delta_q; |
719 | 73.2k | prev_deltaq_params->sharpness = sharpness; |
720 | 73.2k | } |
721 | | |
722 | | /*!\brief Update quantize parameters in MACROBLOCK |
723 | | * |
724 | | * \param[in] enc_quant_dequant_params This parameter cached the quantize and |
725 | | * dequantize parameters for all q |
726 | | * indices. |
727 | | * \param[in] qindex Quantize index used for the current |
728 | | * superblock. |
729 | | * \param[out] x A superblock data structure for |
730 | | * encoder. |
731 | | */ |
732 | | static void set_q_index(const EncQuantDequantParams *enc_quant_dequant_params, |
733 | 145k | int qindex, MACROBLOCK *x) { |
734 | 145k | const QUANTS *const quants = &enc_quant_dequant_params->quants; |
735 | 145k | const Dequants *const dequants = &enc_quant_dequant_params->dequants; |
736 | 145k | x->qindex = qindex; |
737 | 145k | x->seg_skip_block = |
738 | 145k | 0; // TODO(angiebird): Find a proper place to init this variable. |
739 | | |
740 | | // Y |
741 | 145k | x->plane[0].quant_QTX = quants->y_quant[qindex]; |
742 | 145k | x->plane[0].quant_fp_QTX = quants->y_quant_fp[qindex]; |
743 | 145k | x->plane[0].round_fp_QTX = quants->y_round_fp[qindex]; |
744 | 145k | x->plane[0].quant_shift_QTX = quants->y_quant_shift[qindex]; |
745 | 145k | x->plane[0].zbin_QTX = quants->y_zbin[qindex]; |
746 | 145k | x->plane[0].round_QTX = quants->y_round[qindex]; |
747 | 145k | x->plane[0].dequant_QTX = dequants->y_dequant_QTX[qindex]; |
748 | | |
749 | | // U |
750 | 145k | x->plane[1].quant_QTX = quants->u_quant[qindex]; |
751 | 145k | x->plane[1].quant_fp_QTX = quants->u_quant_fp[qindex]; |
752 | 145k | x->plane[1].round_fp_QTX = quants->u_round_fp[qindex]; |
753 | 145k | x->plane[1].quant_shift_QTX = quants->u_quant_shift[qindex]; |
754 | 145k | x->plane[1].zbin_QTX = quants->u_zbin[qindex]; |
755 | 145k | x->plane[1].round_QTX = quants->u_round[qindex]; |
756 | 145k | x->plane[1].dequant_QTX = dequants->u_dequant_QTX[qindex]; |
757 | | |
758 | | // V |
759 | 145k | x->plane[2].quant_QTX = quants->v_quant[qindex]; |
760 | 145k | x->plane[2].quant_fp_QTX = quants->v_quant_fp[qindex]; |
761 | 145k | x->plane[2].round_fp_QTX = quants->v_round_fp[qindex]; |
762 | 145k | x->plane[2].quant_shift_QTX = quants->v_quant_shift[qindex]; |
763 | 145k | x->plane[2].zbin_QTX = quants->v_zbin[qindex]; |
764 | 145k | x->plane[2].round_QTX = quants->v_round[qindex]; |
765 | 145k | x->plane[2].dequant_QTX = dequants->v_dequant_QTX[qindex]; |
766 | 145k | } |
767 | | |
768 | | /*!\brief Update quantize matrix in MACROBLOCKD based on segment id |
769 | | * |
770 | | * \param[in] quant_params Quantize parameters used by encoder and decoder |
771 | | * \param[in] segment_id Segment id. |
772 | | * \param[out] xd A superblock data structure used by encoder and |
773 | | * decoder. |
774 | | */ |
775 | | static void set_qmatrix(const CommonQuantParams *quant_params, int segment_id, |
776 | 145k | MACROBLOCKD *xd) { |
777 | 145k | const int use_qmatrix = av1_use_qmatrix(quant_params, xd, segment_id); |
778 | 145k | const int qmlevel_y = |
779 | 145k | use_qmatrix ? quant_params->qmatrix_level_y : NUM_QM_LEVELS - 1; |
780 | 145k | const int qmlevel_u = |
781 | 145k | use_qmatrix ? quant_params->qmatrix_level_u : NUM_QM_LEVELS - 1; |
782 | 145k | const int qmlevel_v = |
783 | 145k | use_qmatrix ? quant_params->qmatrix_level_v : NUM_QM_LEVELS - 1; |
784 | 145k | const int qmlevel_ls[MAX_MB_PLANE] = { qmlevel_y, qmlevel_u, qmlevel_v }; |
785 | 581k | for (int i = 0; i < MAX_MB_PLANE; ++i) { |
786 | 436k | const int qmlevel = qmlevel_ls[i]; |
787 | 436k | memcpy(&xd->plane[i].seg_qmatrix[segment_id], |
788 | 436k | quant_params->gqmatrix[qmlevel][i], |
789 | 436k | sizeof(quant_params->gqmatrix[qmlevel][i])); |
790 | 436k | memcpy(&xd->plane[i].seg_iqmatrix[segment_id], |
791 | 436k | quant_params->giqmatrix[qmlevel][i], |
792 | 436k | sizeof(quant_params->giqmatrix[qmlevel][i])); |
793 | 436k | } |
794 | 145k | } |
795 | | |
796 | | void av1_init_plane_quantizers(const AV1_COMP *cpi, MACROBLOCK *x, |
797 | 147k | int segment_id, const int do_update) { |
798 | 147k | const AV1_COMMON *const cm = &cpi->common; |
799 | 147k | const CommonQuantParams *const quant_params = &cm->quant_params; |
800 | 147k | const GF_GROUP *const gf_group = &cpi->ppi->gf_group; |
801 | 147k | const int boost_index = AOMMIN(15, (cpi->ppi->p_rc.gfu_boost / 100)); |
802 | 147k | const int layer_depth = AOMMIN(gf_group->layer_depth[cpi->gf_frame_index], 6); |
803 | 147k | const FRAME_TYPE frame_type = cm->current_frame.frame_type; |
804 | 147k | int qindex_rd; |
805 | | |
806 | 147k | const int current_qindex = |
807 | 147k | clamp(cm->delta_q_info.delta_q_present_flag |
808 | 147k | ? quant_params->base_qindex + x->delta_qindex |
809 | 147k | : quant_params->base_qindex, |
810 | 147k | 0, QINDEX_RANGE - 1); |
811 | 147k | const int qindex = av1_get_qindex(&cm->seg, segment_id, current_qindex); |
812 | | |
813 | 147k | if (cpi->oxcf.sb_qp_sweep) { |
814 | 0 | const int current_rd_qindex = |
815 | 0 | clamp(cm->delta_q_info.delta_q_present_flag |
816 | 0 | ? quant_params->base_qindex + x->rdmult_delta_qindex |
817 | 0 | : quant_params->base_qindex, |
818 | 0 | 0, QINDEX_RANGE - 1); |
819 | 0 | qindex_rd = av1_get_qindex(&cm->seg, segment_id, current_rd_qindex); |
820 | 147k | } else { |
821 | 147k | qindex_rd = qindex; |
822 | 147k | } |
823 | | |
824 | 147k | const int qindex_rdmult = qindex_rd + quant_params->y_dc_delta_q; |
825 | 147k | const int rdmult = av1_compute_rd_mult( |
826 | 147k | qindex_rdmult, cm->seq_params->bit_depth, |
827 | 147k | cpi->ppi->gf_group.update_type[cpi->gf_frame_index], layer_depth, |
828 | 147k | boost_index, frame_type, cpi->oxcf.q_cfg.use_fixed_qp_offsets, |
829 | 147k | is_stat_consumption_stage(cpi), cpi->oxcf.tune_cfg.tuning); |
830 | | |
831 | 147k | const int qindex_change = x->qindex != qindex; |
832 | 147k | if (qindex_change || do_update) { |
833 | 145k | set_q_index(&cpi->enc_quant_dequant_params, qindex, x); |
834 | 145k | } |
835 | | |
836 | 147k | MACROBLOCKD *const xd = &x->e_mbd; |
837 | 147k | if ((segment_id != x->prev_segment_id) || |
838 | 145k | av1_use_qmatrix(quant_params, xd, segment_id)) { |
839 | 145k | set_qmatrix(quant_params, segment_id, xd); |
840 | 145k | } |
841 | | |
842 | 147k | x->seg_skip_block = segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP); |
843 | | |
844 | 147k | av1_set_error_per_bit(&x->errorperbit, rdmult); |
845 | 147k | av1_set_sad_per_bit(cpi, &x->sadperbit, qindex_rd); |
846 | | |
847 | 147k | x->prev_segment_id = segment_id; |
848 | 147k | } |
849 | | |
850 | 145k | void av1_frame_init_quantizer(AV1_COMP *cpi) { |
851 | 145k | MACROBLOCK *const x = &cpi->td.mb; |
852 | 145k | MACROBLOCKD *const xd = &x->e_mbd; |
853 | 145k | x->prev_segment_id = -1; |
854 | 145k | av1_init_plane_quantizers(cpi, x, xd->mi[0]->segment_id, 1); |
855 | 145k | } |
856 | | |
857 | 0 | static int adjust_hdr_cb_deltaq(int base_qindex) { |
858 | 0 | double baseQp = base_qindex / QP_SCALE_FACTOR; |
859 | 0 | const double chromaQp = CHROMA_QP_SCALE * baseQp + CHROMA_QP_OFFSET; |
860 | 0 | const double dcbQP = CHROMA_CB_QP_SCALE * chromaQp * QP_SCALE_FACTOR; |
861 | 0 | int dqpCb = (int)(dcbQP + (dcbQP < 0 ? -0.5 : 0.5)); |
862 | 0 | dqpCb = AOMMIN(0, dqpCb); |
863 | 0 | dqpCb = (int)CLIP(dqpCb, -12 * QP_SCALE_FACTOR, 12 * QP_SCALE_FACTOR); |
864 | 0 | return dqpCb; |
865 | 0 | } |
866 | | |
867 | 0 | static int adjust_hdr_cr_deltaq(int base_qindex) { |
868 | 0 | double baseQp = base_qindex / QP_SCALE_FACTOR; |
869 | 0 | const double chromaQp = CHROMA_QP_SCALE * baseQp + CHROMA_QP_OFFSET; |
870 | 0 | const double dcrQP = CHROMA_CR_QP_SCALE * chromaQp * QP_SCALE_FACTOR; |
871 | 0 | int dqpCr = (int)(dcrQP + (dcrQP < 0 ? -0.5 : 0.5)); |
872 | 0 | dqpCr = AOMMIN(0, dqpCr); |
873 | 0 | dqpCr = (int)CLIP(dqpCr, -12 * QP_SCALE_FACTOR, 12 * QP_SCALE_FACTOR); |
874 | 0 | return dqpCr; |
875 | 0 | } |
876 | | |
877 | | void av1_set_quantizer(AV1_COMMON *const cm, int min_qmlevel, int max_qmlevel, |
878 | | int q, int enable_chroma_deltaq, int enable_hdr_deltaq, |
879 | 126k | bool is_allintra, aom_tune_metric tuning) { |
880 | | // quantizer has to be reinitialized with av1_init_quantizer() if any |
881 | | // delta_q changes. |
882 | 126k | CommonQuantParams *quant_params = &cm->quant_params; |
883 | 126k | quant_params->base_qindex = AOMMAX(cm->delta_q_info.delta_q_present_flag, q); |
884 | 126k | quant_params->y_dc_delta_q = 0; |
885 | | |
886 | | // Disable deltaq in lossless mode. |
887 | 126k | if (enable_chroma_deltaq && q) { |
888 | 0 | if (is_allintra && |
889 | 0 | (tuning == AOM_TUNE_IQ || tuning == AOM_TUNE_SSIMULACRA2)) { |
890 | 0 | int chroma_dc_delta_q = 0; |
891 | 0 | int chroma_ac_delta_q = 0; |
892 | |
|
893 | 0 | if (cm->seq_params->subsampling_x == 1 && |
894 | 0 | cm->seq_params->subsampling_y == 1) { |
895 | | // 4:2:0 subsampling: Constant chroma delta_q decrease (i.e. improved |
896 | | // chroma quality relative to luma) with gradual ramp-down for very low |
897 | | // qindexes. |
898 | | // Lowering chroma delta_q by 16 was found to improve SSIMULACRA 2 |
899 | | // BD-Rate by 1.5-2% on Daala's subset1, as well as reducing chroma |
900 | | // artifacts (smudging, discoloration) during subjective quality |
901 | | // evaluations. |
902 | | // The ramp-down of chroma increase was determined by generating the |
903 | | // convex hull of SSIMULACRA 2 scores (for all boosts from 0-16), and |
904 | | // finding a linear equation that fits the convex hull. |
905 | 0 | chroma_dc_delta_q = -clamp((quant_params->base_qindex / 2) - 14, 0, 16); |
906 | 0 | chroma_ac_delta_q = chroma_dc_delta_q; |
907 | 0 | } else if (cm->seq_params->subsampling_x == 1 && |
908 | 0 | cm->seq_params->subsampling_y == 0) { |
909 | | // 4:2:2 subsampling: Constant chroma AC delta_q increase (i.e. improved |
910 | | // luma quality relative to chroma) with gradual ramp-down for very low |
911 | | // qindexes. |
912 | | // SSIMULACRA 2 appears to have some issues correctly scoring 4:2:2 |
913 | | // material. Solely optimizing for maximum scores suggests a chroma AC |
914 | | // delta_q of 12 is the most efficient. However, visual inspection on |
915 | | // difficult-to-encode material resulted in chroma quality degrading too |
916 | | // much relative to luma, and chroma channels ending up being too small |
917 | | // compared to equivalent 4:4:4 or 4:2:0 encodes. |
918 | | // A chroma AC delta_q of 6 was selected because encoded chroma channels |
919 | | // have a much closer size to 4:4:4 and 4:2:0 encodes, and have more |
920 | | // favorable visual quality characteristics. |
921 | | // The ramp-down of chroma decrease was put into place to match 4:2:0 |
922 | | // and 4:4:4 behavior. There were no special considerations on |
923 | | // SSIMULACRA 2 scores. |
924 | 0 | chroma_dc_delta_q = 0; |
925 | 0 | chroma_ac_delta_q = clamp((quant_params->base_qindex / 2), 0, 6); |
926 | 0 | } else if (cm->seq_params->subsampling_x == 0 && |
927 | 0 | cm->seq_params->subsampling_y == 0) { |
928 | | // 4:4:4 subsampling: Constant chroma AC delta_q increase (i.e. improved |
929 | | // luma quality relative to chroma) with gradual ramp-down for very low |
930 | | // qindexes. |
931 | | // Raising chroma AC delta_q by 24 was found to improve SSIMULACRA 2 |
932 | | // BD-Rate by 2.5-3% on Daala's subset1, as well as providing a more |
933 | | // balanced bit allocation between the (relatively-starved) luma and |
934 | | // chroma channels. |
935 | | // Raising chroma DC delta_q appears to be harmful, both for SSIMULACRA |
936 | | // 2 scores and subjective quality (harshens blocking artifacts). |
937 | | // The ramp-down of chroma decrease was put into place so (lossy) QP 0 |
938 | | // encodes still score within 0.1 SSIMULACRA 2 points of the equivalent |
939 | | // with no chroma delta_q (with a small efficiency improvement), while |
940 | | // encodes in the SSIMULACRA 2 <=90 range yield full benefits from this |
941 | | // adjustment. |
942 | 0 | chroma_dc_delta_q = 0; |
943 | 0 | chroma_ac_delta_q = clamp((quant_params->base_qindex / 2), 0, 24); |
944 | 0 | } |
945 | | |
946 | | // TODO: bug https://crbug.com/aomedia/375221136 - find chroma_delta_q |
947 | | // values for 4:2:2 subsampling mode. |
948 | 0 | quant_params->u_dc_delta_q = chroma_dc_delta_q; |
949 | 0 | quant_params->u_ac_delta_q = chroma_ac_delta_q; |
950 | 0 | quant_params->v_dc_delta_q = chroma_dc_delta_q; |
951 | 0 | quant_params->v_ac_delta_q = chroma_ac_delta_q; |
952 | 0 | } else { |
953 | | // TODO(aomedia:2717): need to design better delta |
954 | 0 | quant_params->u_dc_delta_q = 2; |
955 | 0 | quant_params->u_ac_delta_q = 2; |
956 | 0 | quant_params->v_dc_delta_q = 2; |
957 | 0 | quant_params->v_ac_delta_q = 2; |
958 | 0 | } |
959 | 126k | } else { |
960 | 126k | quant_params->u_dc_delta_q = 0; |
961 | 126k | quant_params->u_ac_delta_q = 0; |
962 | 126k | quant_params->v_dc_delta_q = 0; |
963 | 126k | quant_params->v_ac_delta_q = 0; |
964 | 126k | } |
965 | | |
966 | | // following section 8.3.2 in T-REC-H.Sup15 document |
967 | | // to apply to AV1 qindex in the range of [0, 255] |
968 | 126k | if (enable_hdr_deltaq && q) { |
969 | 0 | int dqpCb = adjust_hdr_cb_deltaq(quant_params->base_qindex); |
970 | 0 | int dqpCr = adjust_hdr_cr_deltaq(quant_params->base_qindex); |
971 | 0 | quant_params->u_dc_delta_q = quant_params->u_ac_delta_q = dqpCb; |
972 | 0 | quant_params->v_dc_delta_q = quant_params->v_ac_delta_q = dqpCr; |
973 | 0 | if (dqpCb != dqpCr) { |
974 | 0 | cm->seq_params->separate_uv_delta_q = 1; |
975 | 0 | } |
976 | 0 | } |
977 | | |
978 | | // Select the best luma and chroma QM formulas based on encoding mode and |
979 | | // tuning |
980 | 126k | int (*get_luma_qmlevel)(int, int, int); |
981 | 126k | int (*get_chroma_qmlevel)(int, int, int); |
982 | | |
983 | 126k | if (is_allintra) { |
984 | 44.0k | if (tuning == AOM_TUNE_IQ || tuning == AOM_TUNE_SSIMULACRA2) { |
985 | 0 | if (tuning == AOM_TUNE_SSIMULACRA2) { |
986 | | // Use luma QM formula specifically tailored for tune SSIMULACRA2 |
987 | 0 | get_luma_qmlevel = aom_get_qmlevel_luma_ssimulacra2; |
988 | 0 | } else { |
989 | 0 | get_luma_qmlevel = aom_get_qmlevel_allintra; |
990 | 0 | } |
991 | |
|
992 | 0 | if (cm->seq_params->subsampling_x == 0 && |
993 | 0 | cm->seq_params->subsampling_y == 0) { |
994 | | // 4:4:4 subsampling mode has 4x the number of chroma coefficients |
995 | | // compared to 4:2:0 (2x on each dimension). This means the encoder |
996 | | // should use lower chroma QM levels that more closely match the scaling |
997 | | // of an equivalent 4:2:0 chroma QM. |
998 | 0 | get_chroma_qmlevel = aom_get_qmlevel_444_chroma; |
999 | 0 | } else { |
1000 | | // For all other chroma subsampling modes, use the all intra QM formula |
1001 | 0 | get_chroma_qmlevel = aom_get_qmlevel_allintra; |
1002 | 0 | } |
1003 | 44.0k | } else { |
1004 | 44.0k | get_luma_qmlevel = aom_get_qmlevel_allintra; |
1005 | 44.0k | get_chroma_qmlevel = aom_get_qmlevel_allintra; |
1006 | 44.0k | } |
1007 | 82.6k | } else { |
1008 | 82.6k | get_luma_qmlevel = aom_get_qmlevel; |
1009 | 82.6k | get_chroma_qmlevel = aom_get_qmlevel; |
1010 | 82.6k | } |
1011 | | |
1012 | 126k | quant_params->qmatrix_level_y = |
1013 | 126k | get_luma_qmlevel(quant_params->base_qindex, min_qmlevel, max_qmlevel); |
1014 | 126k | quant_params->qmatrix_level_u = |
1015 | 126k | get_chroma_qmlevel(quant_params->base_qindex + quant_params->u_ac_delta_q, |
1016 | 126k | min_qmlevel, max_qmlevel); |
1017 | | |
1018 | 126k | if (cm->seq_params->separate_uv_delta_q) { |
1019 | 0 | quant_params->qmatrix_level_v = get_chroma_qmlevel( |
1020 | 0 | quant_params->base_qindex + quant_params->v_ac_delta_q, min_qmlevel, |
1021 | 0 | max_qmlevel); |
1022 | 126k | } else { |
1023 | 126k | quant_params->qmatrix_level_v = quant_params->qmatrix_level_u; |
1024 | 126k | } |
1025 | 126k | } |
1026 | | |
1027 | | // Table that converts 0-63 Q-range values passed in outside to the Qindex |
1028 | | // range used internally. |
1029 | | static const int quantizer_to_qindex[] = { |
1030 | | 0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, |
1031 | | 52, 56, 60, 64, 68, 72, 76, 80, 84, 88, 92, 96, 100, |
1032 | | 104, 108, 112, 116, 120, 124, 128, 132, 136, 140, 144, 148, 152, |
1033 | | 156, 160, 164, 168, 172, 176, 180, 184, 188, 192, 196, 200, 204, |
1034 | | 208, 212, 216, 220, 224, 228, 232, 236, 240, 244, 249, 255, |
1035 | | }; |
1036 | | |
1037 | 2.38M | int av1_quantizer_to_qindex(int quantizer) { |
1038 | 2.38M | return quantizer_to_qindex[quantizer]; |
1039 | 2.38M | } |
1040 | | |
1041 | 0 | int av1_qindex_to_quantizer(int qindex) { |
1042 | 0 | int quantizer; |
1043 | |
|
1044 | 0 | for (quantizer = 0; quantizer < 64; ++quantizer) |
1045 | 0 | if (quantizer_to_qindex[quantizer] >= qindex) return quantizer; |
1046 | | |
1047 | 0 | return 63; |
1048 | 0 | } |