/rust/registry/src/index.crates.io-6f17d22bba15001f/rav1e-0.7.1/src/quantize/mod.rs
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright (c) 2017-2022, The rav1e contributors. All rights reserved |
2 | | // |
3 | | // This source code is subject to the terms of the BSD 2 Clause License and |
4 | | // the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License |
5 | | // was not distributed with this source code in the LICENSE file, you can |
6 | | // obtain it at www.aomedia.org/license/software. If the Alliance for Open |
7 | | // Media Patent License 1.0 was not distributed with this source code in the |
8 | | // PATENTS file, you can obtain it at www.aomedia.org/license/patent. |
9 | | |
10 | | #![allow(non_upper_case_globals)] |
11 | | |
12 | | mod tables; |
13 | | |
14 | | cfg_if::cfg_if! { |
15 | | if #[cfg(nasm_x86_64)] { |
16 | | pub use crate::asm::x86::quantize::*; |
17 | | } else { |
18 | | pub use self::rust::*; |
19 | | } |
20 | | } |
21 | | |
22 | | pub use tables::*; |
23 | | |
24 | | use crate::scan_order::av1_scan_orders; |
25 | | use crate::transform::{TxSize, TxType}; |
26 | | use crate::util::*; |
27 | | use std::convert::Into; |
28 | | use std::mem; |
29 | | use std::num::{NonZeroU16, NonZeroU32, NonZeroU64}; |
30 | | |
31 | 0 | pub fn get_log_tx_scale(tx_size: TxSize) -> usize { |
32 | 0 | let num_pixels = tx_size.area(); |
33 | 0 |
|
34 | 0 | Into::<usize>::into(num_pixels > 256) |
35 | 0 | + Into::<usize>::into(num_pixels > 1024) |
36 | 0 | } |
37 | | |
38 | 0 | pub fn dc_q(qindex: u8, delta_q: i8, bit_depth: usize) -> NonZeroU16 { |
39 | 0 | let dc_q: [&[NonZeroU16; 256]; 3] = |
40 | 0 | [&dc_qlookup_Q3, &dc_qlookup_10_Q3, &dc_qlookup_12_Q3]; |
41 | 0 | let bd = ((bit_depth ^ 8) >> 1).min(2); |
42 | 0 | dc_q[bd][((qindex as isize + delta_q as isize).max(0) as usize).min(255)] |
43 | 0 | } |
44 | | |
45 | 0 | pub fn ac_q(qindex: u8, delta_q: i8, bit_depth: usize) -> NonZeroU16 { |
46 | 0 | let ac_q: [&[NonZeroU16; 256]; 3] = |
47 | 0 | [&ac_qlookup_Q3, &ac_qlookup_10_Q3, &ac_qlookup_12_Q3]; |
48 | 0 | let bd = ((bit_depth ^ 8) >> 1).min(2); |
49 | 0 | ac_q[bd][((qindex as isize + delta_q as isize).max(0) as usize).min(255)] |
50 | 0 | } |
51 | | |
52 | | // TODO: Handle lossless properly. |
53 | 0 | fn select_qi(quantizer: i64, qlookup: &[NonZeroU16; QINDEX_RANGE]) -> u8 { |
54 | 0 | if quantizer < qlookup[MINQ].get() as i64 { |
55 | 0 | MINQ as u8 |
56 | 0 | } else if quantizer >= qlookup[MAXQ].get() as i64 { |
57 | 0 | MAXQ as u8 |
58 | | } else { |
59 | 0 | match qlookup |
60 | 0 | .binary_search(&NonZeroU16::new(quantizer as u16).expect("Not zero")) |
61 | | { |
62 | 0 | Ok(qi) => qi as u8, |
63 | 0 | Err(qi) => { |
64 | 0 | debug_assert!(qi > MINQ); |
65 | 0 | debug_assert!(qi <= MAXQ); |
66 | | // Pick the closest quantizer in the log domain. |
67 | 0 | let qthresh = |
68 | 0 | (qlookup[qi - 1].get() as i32) * (qlookup[qi].get() as i32); |
69 | 0 | let q2_i32 = (quantizer as i32) * (quantizer as i32); |
70 | 0 | if q2_i32 < qthresh { |
71 | 0 | (qi - 1) as u8 |
72 | | } else { |
73 | 0 | qi as u8 |
74 | | } |
75 | | } |
76 | | } |
77 | | } |
78 | 0 | } |
79 | | |
80 | 0 | pub fn select_dc_qi(quantizer: i64, bit_depth: usize) -> u8 { |
81 | 0 | let qlookup = match bit_depth { |
82 | 0 | 8 => &dc_qlookup_Q3, |
83 | 0 | 10 => &dc_qlookup_10_Q3, |
84 | 0 | 12 => &dc_qlookup_12_Q3, |
85 | 0 | _ => unimplemented!(), |
86 | | }; |
87 | 0 | select_qi(quantizer, qlookup) |
88 | 0 | } |
89 | | |
90 | 0 | pub fn select_ac_qi(quantizer: i64, bit_depth: usize) -> u8 { |
91 | 0 | let qlookup = match bit_depth { |
92 | 0 | 8 => &ac_qlookup_Q3, |
93 | 0 | 10 => &ac_qlookup_10_Q3, |
94 | 0 | 12 => &ac_qlookup_12_Q3, |
95 | 0 | _ => unimplemented!(), |
96 | | }; |
97 | 0 | select_qi(quantizer, qlookup) |
98 | 0 | } |
99 | | |
100 | | #[derive(Debug, Clone, Copy)] |
101 | | pub struct QuantizationContext { |
102 | | log_tx_scale: usize, |
103 | | dc_quant: NonZeroU16, |
104 | | dc_offset: u32, |
105 | | dc_mul_add: (u32, u32, u32), |
106 | | |
107 | | ac_quant: NonZeroU16, |
108 | | ac_offset_eob: u32, |
109 | | ac_offset0: u32, |
110 | | ac_offset1: u32, |
111 | | ac_mul_add: (u32, u32, u32), |
112 | | } |
113 | | |
114 | | impl Default for QuantizationContext { |
115 | 0 | fn default() -> Self { |
116 | 0 | QuantizationContext { |
117 | 0 | dc_quant: NonZeroU16::new(1).expect("Not zero"), |
118 | 0 | ac_quant: NonZeroU16::new(1).expect("Not zero"), |
119 | 0 | log_tx_scale: Default::default(), |
120 | 0 | dc_offset: Default::default(), |
121 | 0 | dc_mul_add: Default::default(), |
122 | 0 | ac_offset_eob: Default::default(), |
123 | 0 | ac_offset0: Default::default(), |
124 | 0 | ac_offset1: Default::default(), |
125 | 0 | ac_mul_add: Default::default(), |
126 | 0 | } |
127 | 0 | } |
128 | | } |
129 | | |
130 | 0 | fn divu_gen(d: NonZeroU32) -> (u32, u32, u32) { |
131 | 0 | let nbits = (mem::size_of_val(&d) as u64) * 8; |
132 | 0 | let m = nbits - d.leading_zeros() as u64 - 1; |
133 | 0 | if d.is_power_of_two() { |
134 | 0 | (0xFFFF_FFFF, 0xFFFF_FFFF, m as u32) |
135 | | } else { |
136 | 0 | let d = NonZeroU64::from(d); |
137 | 0 | let t = (1u64 << (m + nbits)) / d; |
138 | 0 |
|
139 | 0 | let d = d.get(); |
140 | 0 | let r = (t * d + d) & ((1 << nbits) - 1); |
141 | 0 | if r <= 1u64 << m { |
142 | 0 | (t as u32 + 1, 0u32, m as u32) |
143 | | } else { |
144 | 0 | (t as u32, t as u32, m as u32) |
145 | | } |
146 | | } |
147 | 0 | } |
148 | | |
149 | | #[inline] |
150 | 0 | const fn divu_pair(x: u32, d: (u32, u32, u32)) -> u32 { |
151 | 0 | let x = x as u64; |
152 | 0 | let (a, b, shift) = d; |
153 | 0 | let shift = shift as u64; |
154 | 0 | let a = a as u64; |
155 | 0 | let b = b as u64; |
156 | 0 |
|
157 | 0 | (((a * x + b) >> 32) >> shift) as u32 |
158 | 0 | } Unexecuted instantiation: rav1e::quantize::divu_pair Unexecuted instantiation: rav1e::quantize::divu_pair |
159 | | |
160 | | #[inline] |
161 | 0 | const fn copysign(value: u32, signed: i32) -> i32 { |
162 | 0 | if signed < 0 { |
163 | 0 | -(value as i32) |
164 | | } else { |
165 | 0 | value as i32 |
166 | | } |
167 | 0 | } Unexecuted instantiation: rav1e::quantize::copysign Unexecuted instantiation: rav1e::quantize::copysign |
168 | | |
169 | | #[cfg(test)] |
170 | | mod test { |
171 | | use super::*; |
172 | | use crate::transform::TxSize::*; |
173 | | |
174 | | #[test] |
175 | | fn test_divu_pair() { |
176 | | for d in 1..1024 { |
177 | | for x in 0..1000 { |
178 | | let ab = divu_gen(NonZeroU32::new(d).unwrap()); |
179 | | assert_eq!(x / d, divu_pair(x, ab)); |
180 | | } |
181 | | } |
182 | | } |
183 | | #[test] |
184 | | fn gen_divu_table() { |
185 | | let b: Vec<(u32, u32, u32)> = |
186 | | dc_qlookup_Q3.iter().map(|&v| divu_gen(v.into())).collect(); |
187 | | |
188 | | println!("{:?}", b); |
189 | | } |
190 | | #[test] |
191 | | fn test_tx_log_scale() { |
192 | | let tx_sizes = [ |
193 | | (TX_4X4, 0), |
194 | | (TX_8X8, 0), |
195 | | (TX_16X16, 0), |
196 | | (TX_32X32, 1), |
197 | | (TX_64X64, 2), |
198 | | (TX_4X8, 0), |
199 | | (TX_8X4, 0), |
200 | | (TX_8X16, 0), |
201 | | (TX_16X8, 0), |
202 | | (TX_16X32, 1), |
203 | | (TX_32X16, 1), |
204 | | (TX_32X64, 2), |
205 | | (TX_64X32, 2), |
206 | | (TX_4X16, 0), |
207 | | (TX_16X4, 0), |
208 | | (TX_8X32, 0), |
209 | | (TX_32X8, 0), |
210 | | (TX_16X64, 1), |
211 | | (TX_64X16, 1), |
212 | | ]; |
213 | | for &tx_size in tx_sizes.iter() { |
214 | | assert!(tx_size.1 == get_log_tx_scale(tx_size.0)); |
215 | | } |
216 | | } |
217 | | } |
218 | | |
219 | | impl QuantizationContext { |
220 | 0 | pub fn update( |
221 | 0 | &mut self, qindex: u8, tx_size: TxSize, is_intra: bool, bit_depth: usize, |
222 | 0 | dc_delta_q: i8, ac_delta_q: i8, |
223 | 0 | ) { |
224 | 0 | self.log_tx_scale = get_log_tx_scale(tx_size); |
225 | 0 |
|
226 | 0 | self.dc_quant = dc_q(qindex, dc_delta_q, bit_depth); |
227 | 0 | self.dc_mul_add = divu_gen(self.dc_quant.into()); |
228 | 0 |
|
229 | 0 | self.ac_quant = ac_q(qindex, ac_delta_q, bit_depth); |
230 | 0 | self.ac_mul_add = divu_gen(self.ac_quant.into()); |
231 | 0 |
|
232 | 0 | // All of these biases were derived by measuring the cost of coding |
233 | 0 | // a zero vs coding a one on any given coefficient position, or, in |
234 | 0 | // the case of the EOB bias, the cost of coding the block with |
235 | 0 | // the chosen EOB (rounding to one) vs rounding to zero and continuing |
236 | 0 | // to choose a new EOB. This was done over several clips, with the |
237 | 0 | // average of the bit costs taken over all blocks in the set, and a new |
238 | 0 | // bias derived via the method outlined in Jean-Marc Valin's |
239 | 0 | // Journal of Dubious Theoretical Results[1], aka: |
240 | 0 | // |
241 | 0 | // lambda = ln(2) / 6.0 |
242 | 0 | // threshold = 0.5 + (lambda * avg_rate_diff) / 2.0 |
243 | 0 | // bias = 1 - threshold |
244 | 0 | // |
245 | 0 | // lambda is a constant since our offsets are already adjusted for the |
246 | 0 | // quantizer. |
247 | 0 | // |
248 | 0 | // Biases were then updated, and cost collection was re-run, until |
249 | 0 | // the calculated biases started to converge after 2-4 iterations. |
250 | 0 | // |
251 | 0 | // In theory, the rounding biases for inter should be somewhat smaller |
252 | 0 | // than the biases for intra, but this turns out to only be the case |
253 | 0 | // for EOB optimization, or at least, is covered by EOB optimization. |
254 | 0 | // The RD-optimal rounding biases for the actual coefficients seem |
255 | 0 | // to be quite close (+/- 1/256), for both inter and intra, |
256 | 0 | // post-deadzoning. |
257 | 0 | // |
258 | 0 | // [1] https://jmvalin.ca/notes/theoretical_results.pdf |
259 | 0 | self.dc_offset = |
260 | 0 | self.dc_quant.get() as u32 * (if is_intra { 109 } else { 108 }) / 256; |
261 | | self.ac_offset0 = |
262 | 0 | self.ac_quant.get() as u32 * (if is_intra { 98 } else { 97 }) / 256; |
263 | | self.ac_offset1 = |
264 | 0 | self.ac_quant.get() as u32 * (if is_intra { 109 } else { 108 }) / 256; |
265 | | self.ac_offset_eob = |
266 | 0 | self.ac_quant.get() as u32 * (if is_intra { 88 } else { 44 }) / 256; |
267 | 0 | } |
268 | | |
269 | | #[inline] |
270 | 0 | pub fn quantize<T: Coefficient>( |
271 | 0 | &self, coeffs: &[T], qcoeffs: &mut [T], tx_size: TxSize, tx_type: TxType, |
272 | 0 | ) -> u16 { |
273 | 0 | let scan = av1_scan_orders[tx_size as usize][tx_type as usize].scan; |
274 | 0 | let iscan = av1_scan_orders[tx_size as usize][tx_type as usize].iscan; |
275 | 0 |
|
276 | 0 | qcoeffs[0] = { |
277 | 0 | let coeff: i32 = i32::cast_from(coeffs[0]) << self.log_tx_scale; |
278 | 0 | let abs_coeff = coeff.unsigned_abs(); |
279 | 0 | T::cast_from(copysign( |
280 | 0 | divu_pair(abs_coeff + self.dc_offset, self.dc_mul_add), |
281 | 0 | coeff, |
282 | 0 | )) |
283 | 0 | }; |
284 | 0 |
|
285 | 0 | // Find the last non-zero coefficient using our smaller biases and |
286 | 0 | // zero everything else. |
287 | 0 | // This threshold is such that `abs(coeff) < deadzone` implies: |
288 | 0 | // (abs(coeff << log_tx_scale) + ac_offset_eob) / ac_quant == 0 |
289 | 0 | let deadzone = T::cast_from( |
290 | 0 | (self.ac_quant.get() as usize - self.ac_offset_eob as usize) |
291 | 0 | .align_power_of_two_and_shift(self.log_tx_scale), |
292 | 0 | ); |
293 | 0 | let eob = { |
294 | 0 | let eob_minus_one = iscan |
295 | 0 | .iter() |
296 | 0 | .zip(coeffs) |
297 | 0 | .map(|(&i, &c)| if c.abs() >= deadzone { i } else { 0 }) Unexecuted instantiation: <rav1e::quantize::QuantizationContext>::quantize::<i32>::{closure#0} Unexecuted instantiation: <rav1e::quantize::QuantizationContext>::quantize::<i16>::{closure#0} |
298 | 0 | .max() |
299 | 0 | .unwrap_or(0); |
300 | 0 | // We skip the DC coefficient since it has its own quantizer index. |
301 | 0 | if eob_minus_one > 0 { |
302 | 0 | eob_minus_one + 1 |
303 | | } else { |
304 | 0 | u16::from(qcoeffs[0] != T::cast_from(0)) |
305 | | } |
306 | | }; |
307 | | |
308 | | // Here we use different rounding biases depending on whether we've |
309 | | // had recent coefficients that are larger than one, or less than |
310 | | // one. The reason for this is that a block usually has a chunk of |
311 | | // large coefficients and a tail of zeroes and ones, and the tradeoffs |
312 | | // for coding these two are different. In the tail of zeroes and ones, |
313 | | // you'll likely end up spending most bits just saying where that |
314 | | // coefficient is in the block, whereas in the chunk of larger |
315 | | // coefficients, most bits will be spent on coding its magnitude. |
316 | | // To that end, we want to bias more toward rounding to zero for |
317 | | // that tail of zeroes and ones than we do for the larger coefficients. |
318 | 0 | let mut level_mode = 1; |
319 | 0 | let ac_quant = self.ac_quant.get() as u32; |
320 | 0 | for &pos in scan.iter().take(usize::from(eob)).skip(1) { |
321 | 0 | let coeff = i32::cast_from(coeffs[pos as usize]) << self.log_tx_scale; |
322 | 0 | let abs_coeff = coeff.unsigned_abs(); |
323 | 0 |
|
324 | 0 | let level0 = divu_pair(abs_coeff, self.ac_mul_add); |
325 | 0 | let offset = if level0 > 1 - level_mode { |
326 | 0 | self.ac_offset1 |
327 | | } else { |
328 | 0 | self.ac_offset0 |
329 | | }; |
330 | | |
331 | 0 | let abs_qcoeff: u32 = |
332 | 0 | level0 + (abs_coeff + offset >= (level0 + 1) * ac_quant) as u32; |
333 | 0 | if level_mode != 0 && abs_qcoeff == 0 { |
334 | 0 | level_mode = 0; |
335 | 0 | } else if abs_qcoeff > 1 { |
336 | 0 | level_mode = 1; |
337 | 0 | } |
338 | | |
339 | 0 | qcoeffs[pos as usize] = T::cast_from(copysign(abs_qcoeff, coeff)); |
340 | | } |
341 | | |
342 | | // Rather than zeroing the tail in scan order, assume that qcoeffs is |
343 | | // pre-filled with zeros. |
344 | | |
345 | | // Check the eob is correct |
346 | 0 | debug_assert_eq!( |
347 | 0 | usize::from(eob), |
348 | 0 | scan |
349 | 0 | .iter() |
350 | 0 | .rposition(|&i| qcoeffs[i as usize] != T::cast_from(0)) |
351 | 0 | .map(|n| n + 1) |
352 | 0 | .unwrap_or(0) |
353 | | ); |
354 | | |
355 | 0 | eob |
356 | 0 | } Unexecuted instantiation: <rav1e::quantize::QuantizationContext>::quantize::<i32> Unexecuted instantiation: <rav1e::quantize::QuantizationContext>::quantize::<i16> |
357 | | } |
358 | | |
359 | | pub mod rust { |
360 | | use super::*; |
361 | | use crate::cpu_features::CpuFeatureLevel; |
362 | | use std::mem::MaybeUninit; |
363 | | |
364 | 0 | pub fn dequantize<T: Coefficient>( |
365 | 0 | qindex: u8, coeffs: &[T], _eob: u16, rcoeffs: &mut [MaybeUninit<T>], |
366 | 0 | tx_size: TxSize, bit_depth: usize, dc_delta_q: i8, ac_delta_q: i8, |
367 | 0 | _cpu: CpuFeatureLevel, |
368 | 0 | ) { |
369 | 0 | let log_tx_scale = get_log_tx_scale(tx_size) as i32; |
370 | 0 | let offset = (1 << log_tx_scale) - 1; |
371 | 0 |
|
372 | 0 | let dc_quant = dc_q(qindex, dc_delta_q, bit_depth).get() as i32; |
373 | 0 | let ac_quant = ac_q(qindex, ac_delta_q, bit_depth).get() as i32; |
374 | | |
375 | 0 | for (i, (r, c)) in rcoeffs |
376 | 0 | .iter_mut() |
377 | 0 | .zip(coeffs.iter().map(|&c| i32::cast_from(c))) Unexecuted instantiation: rav1e::quantize::rust::dequantize::<i32>::{closure#0} Unexecuted instantiation: rav1e::quantize::rust::dequantize::<i16>::{closure#0} |
378 | 0 | .enumerate() |
379 | | { |
380 | 0 | let quant = if i == 0 { dc_quant } else { ac_quant }; |
381 | 0 | r.write(T::cast_from( |
382 | 0 | (c * quant + ((c >> 31) & offset)) >> log_tx_scale, |
383 | 0 | )); |
384 | | } |
385 | 0 | } Unexecuted instantiation: rav1e::quantize::rust::dequantize::<i32> Unexecuted instantiation: rav1e::quantize::rust::dequantize::<i16> |
386 | | } |