/rust/registry/src/index.crates.io-6f17d22bba15001f/rav1e-0.7.1/src/quantize/mod.rs

Source (jump to first uncovered line)
// Copyright (c) 2017-2022, The rav1e contributors. All rights reserved
//
// This source code is subject to the terms of the BSD 2 Clause License and
// the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
// was not distributed with this source code in the LICENSE file, you can
// obtain it at www.aomedia.org/license/software. If the Alliance for Open
// Media Patent License 1.0 was not distributed with this source code in the
// PATENTS file, you can obtain it at www.aomedia.org/license/patent.

#![allow(non_upper_case_globals)]

mod tables;

cfg_if::cfg_if! {
  if #[cfg(nasm_x86_64)] {
    pub use crate::asm::x86::quantize::*;
  } else {
    pub use self::rust::*;
  }
}

pub use tables::*;

use crate::scan_order::av1_scan_orders;
use crate::transform::{TxSize, TxType};
use crate::util::*;
use std::convert::Into;
use std::mem;
use std::num::{NonZeroU16, NonZeroU32, NonZeroU64};

pub fn get_log_tx_scale(tx_size: TxSize) -> usize {
  let num_pixels = tx_size.area();

  Into::<usize>::into(num_pixels > 256)
    + Into::<usize>::into(num_pixels > 1024)
}

pub fn dc_q(qindex: u8, delta_q: i8, bit_depth: usize) -> NonZeroU16 {
  let dc_q: [&[NonZeroU16; 256]; 3] =
    [&dc_qlookup_Q3, &dc_qlookup_10_Q3, &dc_qlookup_12_Q3];
  let bd = ((bit_depth ^ 8) >> 1).min(2);
  dc_q[bd][((qindex as isize + delta_q as isize).max(0) as usize).min(255)]
}

pub fn ac_q(qindex: u8, delta_q: i8, bit_depth: usize) -> NonZeroU16 {
  let ac_q: [&[NonZeroU16; 256]; 3] =
    [&ac_qlookup_Q3, &ac_qlookup_10_Q3, &ac_qlookup_12_Q3];
  let bd = ((bit_depth ^ 8) >> 1).min(2);
  ac_q[bd][((qindex as isize + delta_q as isize).max(0) as usize).min(255)]
}

// TODO: Handle lossless properly.
fn select_qi(quantizer: i64, qlookup: &[NonZeroU16; QINDEX_RANGE]) -> u8 {
  if quantizer < qlookup[MINQ].get() as i64 {
    MINQ as u8
  } else if quantizer >= qlookup[MAXQ].get() as i64 {
    MAXQ as u8
  } else {
    match qlookup
      .binary_search(&NonZeroU16::new(quantizer as u16).expect("Not zero"))
    {
      Ok(qi) => qi as u8,
      Err(qi) => {
        debug_assert!(qi > MINQ);
        debug_assert!(qi <= MAXQ);
        // Pick the closest quantizer in the log domain.
        let qthresh =
          (qlookup[qi - 1].get() as i32) * (qlookup[qi].get() as i32);
        let q2_i32 = (quantizer as i32) * (quantizer as i32);
        if q2_i32 < qthresh {
          (qi - 1) as u8
        } else {
          qi as u8
        }
      }
    }
  }
}

pub fn select_dc_qi(quantizer: i64, bit_depth: usize) -> u8 {
  let qlookup = match bit_depth {
    8 => &dc_qlookup_Q3,
    10 => &dc_qlookup_10_Q3,
    12 => &dc_qlookup_12_Q3,
    _ => unimplemented!(),
  };
  select_qi(quantizer, qlookup)
}

pub fn select_ac_qi(quantizer: i64, bit_depth: usize) -> u8 {
  let qlookup = match bit_depth {
    8 => &ac_qlookup_Q3,
    10 => &ac_qlookup_10_Q3,
    12 => &ac_qlookup_12_Q3,
    _ => unimplemented!(),
  };
  select_qi(quantizer, qlookup)
}

#[derive(Debug, Clone, Copy)]
pub struct QuantizationContext {
  log_tx_scale: usize,
  dc_quant: NonZeroU16,
  dc_offset: u32,
  dc_mul_add: (u32, u32, u32),

  ac_quant: NonZeroU16,
  ac_offset_eob: u32,
  ac_offset0: u32,
  ac_offset1: u32,
  ac_mul_add: (u32, u32, u32),
}

impl Default for QuantizationContext {
  fn default() -> Self {
    QuantizationContext {
      dc_quant: NonZeroU16::new(1).expect("Not zero"),
      ac_quant: NonZeroU16::new(1).expect("Not zero"),
      log_tx_scale: Default::default(),
      dc_offset: Default::default(),
      dc_mul_add: Default::default(),
      ac_offset_eob: Default::default(),
      ac_offset0: Default::default(),
      ac_offset1: Default::default(),
      ac_mul_add: Default::default(),
    }
  }
}

fn divu_gen(d: NonZeroU32) -> (u32, u32, u32) {
  let nbits = (mem::size_of_val(&d) as u64) * 8;
  let m = nbits - d.leading_zeros() as u64 - 1;
  if d.is_power_of_two() {
    (0xFFFF_FFFF, 0xFFFF_FFFF, m as u32)
  } else {
    let d = NonZeroU64::from(d);
    let t = (1u64 << (m + nbits)) / d;

    let d = d.get();
    let r = (t * d + d) & ((1 << nbits) - 1);
    if r <= 1u64 << m {
      (t as u32 + 1, 0u32, m as u32)
    } else {
      (t as u32, t as u32, m as u32)
    }
  }
}

#[inline]
const fn divu_pair(x: u32, d: (u32, u32, u32)) -> u32 {
  let x = x as u64;
  let (a, b, shift) = d;
  let shift = shift as u64;
  let a = a as u64;
  let b = b as u64;

  (((a * x + b) >> 32) >> shift) as u32
}

#[inline]
const fn copysign(value: u32, signed: i32) -> i32 {
  if signed < 0 {
    -(value as i32)
  } else {
    value as i32
  }
}

#[cfg(test)]
mod test {
  use super::*;
  use crate::transform::TxSize::*;

  #[test]
  fn test_divu_pair() {
    for d in 1..1024 {
      for x in 0..1000 {
        let ab = divu_gen(NonZeroU32::new(d).unwrap());
        assert_eq!(x / d, divu_pair(x, ab));
      }
    }
  }
  #[test]
  fn gen_divu_table() {
    let b: Vec<(u32, u32, u32)> =
      dc_qlookup_Q3.iter().map(|&v| divu_gen(v.into())).collect();

    println!("{:?}", b);
  }
  #[test]
  fn test_tx_log_scale() {
    let tx_sizes = [
      (TX_4X4, 0),
      (TX_8X8, 0),
      (TX_16X16, 0),
      (TX_32X32, 1),
      (TX_64X64, 2),
      (TX_4X8, 0),
      (TX_8X4, 0),
      (TX_8X16, 0),
      (TX_16X8, 0),
      (TX_16X32, 1),
      (TX_32X16, 1),
      (TX_32X64, 2),
      (TX_64X32, 2),
      (TX_4X16, 0),
      (TX_16X4, 0),
      (TX_8X32, 0),
      (TX_32X8, 0),
      (TX_16X64, 1),
      (TX_64X16, 1),
    ];
    for &tx_size in tx_sizes.iter() {
      assert!(tx_size.1 == get_log_tx_scale(tx_size.0));
    }
  }
}

impl QuantizationContext {
  pub fn update(
    &mut self, qindex: u8, tx_size: TxSize, is_intra: bool, bit_depth: usize,
    dc_delta_q: i8, ac_delta_q: i8,
  ) {
    self.log_tx_scale = get_log_tx_scale(tx_size);

    self.dc_quant = dc_q(qindex, dc_delta_q, bit_depth);
    self.dc_mul_add = divu_gen(self.dc_quant.into());

    self.ac_quant = ac_q(qindex, ac_delta_q, bit_depth);
    self.ac_mul_add = divu_gen(self.ac_quant.into());

    // All of these biases were derived by measuring the cost of coding
    // a zero vs coding a one on any given coefficient position, or, in
    // the case of the EOB bias, the cost of coding the block with
    // the chosen EOB (rounding to one) vs rounding to zero and continuing
    // to choose a new EOB. This was done over several clips, with the
    // average of the bit costs taken over all blocks in the set, and a new
    // bias derived via the method outlined in Jean-Marc Valin's
    // Journal of Dubious Theoretical Results[1], aka:
    //
    // lambda = ln(2) / 6.0
    // threshold = 0.5 + (lambda * avg_rate_diff) / 2.0
    // bias = 1 - threshold
    //
    // lambda is a constant since our offsets are already adjusted for the
    // quantizer.
    //
    // Biases were then updated, and cost collection was re-run, until
    // the calculated biases started to converge after 2-4 iterations.
    //
    // In theory, the rounding biases for inter should be somewhat smaller
    // than the biases for intra, but this turns out to only be the case
    // for EOB optimization, or at least, is covered by EOB optimization.
    // The RD-optimal rounding biases for the actual coefficients seem
    // to be quite close (+/- 1/256), for both inter and intra,
    // post-deadzoning.
    //
    // [1] https://jmvalin.ca/notes/theoretical_results.pdf
    self.dc_offset =
      self.dc_quant.get() as u32 * (if is_intra { 109 } else { 108 }) / 256;
    self.ac_offset0 =
      self.ac_quant.get() as u32 * (if is_intra { 98 } else { 97 }) / 256;
    self.ac_offset1 =
      self.ac_quant.get() as u32 * (if is_intra { 109 } else { 108 }) / 256;
    self.ac_offset_eob =
      self.ac_quant.get() as u32 * (if is_intra { 88 } else { 44 }) / 256;
  }

  #[inline]
  pub fn quantize<T: Coefficient>(
    &self, coeffs: &[T], qcoeffs: &mut [T], tx_size: TxSize, tx_type: TxType,
  ) -> u16 {
    let scan = av1_scan_orders[tx_size as usize][tx_type as usize].scan;
    let iscan = av1_scan_orders[tx_size as usize][tx_type as usize].iscan;

    qcoeffs[0] = {
      let coeff: i32 = i32::cast_from(coeffs[0]) << self.log_tx_scale;
      let abs_coeff = coeff.unsigned_abs();
      T::cast_from(copysign(
        divu_pair(abs_coeff + self.dc_offset, self.dc_mul_add),
        coeff,
      ))
    };

    // Find the last non-zero coefficient using our smaller biases and
    // zero everything else.
    // This threshold is such that `abs(coeff) < deadzone` implies:
    // (abs(coeff << log_tx_scale) + ac_offset_eob) / ac_quant == 0
    let deadzone = T::cast_from(
      (self.ac_quant.get() as usize - self.ac_offset_eob as usize)
        .align_power_of_two_and_shift(self.log_tx_scale),
    );
    let eob = {
      let eob_minus_one = iscan
        .iter()
        .zip(coeffs)
        .map(|(&i, &c)| if c.abs() >= deadzone { i } else { 0 })
        .max()
        .unwrap_or(0);
      // We skip the DC coefficient since it has its own quantizer index.
      if eob_minus_one > 0 {
        eob_minus_one + 1
      } else {
        u16::from(qcoeffs[0] != T::cast_from(0))
      }
    };

    // Here we use different rounding biases depending on whether we've
    // had recent coefficients that are larger than one, or less than
    // one. The reason for this is that a block usually has a chunk of
    // large coefficients and a tail of zeroes and ones, and the tradeoffs
    // for coding these two are different. In the tail of zeroes and ones,
    // you'll likely end up spending most bits just saying where that
    // coefficient is in the block, whereas in the chunk of larger
    // coefficients, most bits will be spent on coding its magnitude.
    // To that end, we want to bias more toward rounding to zero for
    // that tail of zeroes and ones than we do for the larger coefficients.
    let mut level_mode = 1;
    let ac_quant = self.ac_quant.get() as u32;
    for &pos in scan.iter().take(usize::from(eob)).skip(1) {
      let coeff = i32::cast_from(coeffs[pos as usize]) << self.log_tx_scale;
      let abs_coeff = coeff.unsigned_abs();

      let level0 = divu_pair(abs_coeff, self.ac_mul_add);
      let offset = if level0 > 1 - level_mode {
        self.ac_offset1
      } else {
        self.ac_offset0
      };

      let abs_qcoeff: u32 =
        level0 + (abs_coeff + offset >= (level0 + 1) * ac_quant) as u32;
      if level_mode != 0 && abs_qcoeff == 0 {
        level_mode = 0;
      } else if abs_qcoeff > 1 {
        level_mode = 1;
      }

      qcoeffs[pos as usize] = T::cast_from(copysign(abs_qcoeff, coeff));
    }

    // Rather than zeroing the tail in scan order, assume that qcoeffs is
    // pre-filled with zeros.

    // Check the eob is correct
    debug_assert_eq!(
      usize::from(eob),
      scan
        .iter()
        .rposition(|&i| qcoeffs[i as usize] != T::cast_from(0))
        .map(|n| n + 1)
        .unwrap_or(0)
    );

    eob
  }
}

pub mod rust {
  use super::*;
  use crate::cpu_features::CpuFeatureLevel;
  use std::mem::MaybeUninit;

  pub fn dequantize<T: Coefficient>(
    qindex: u8, coeffs: &[T], _eob: u16, rcoeffs: &mut [MaybeUninit<T>],
    tx_size: TxSize, bit_depth: usize, dc_delta_q: i8, ac_delta_q: i8,
    _cpu: CpuFeatureLevel,
  ) {
    let log_tx_scale = get_log_tx_scale(tx_size) as i32;
    let offset = (1 << log_tx_scale) - 1;

    let dc_quant = dc_q(qindex, dc_delta_q, bit_depth).get() as i32;
    let ac_quant = ac_q(qindex, ac_delta_q, bit_depth).get() as i32;

    for (i, (r, c)) in rcoeffs
      .iter_mut()
      .zip(coeffs.iter().map(|&c| i32::cast_from(c)))
      .enumerate()
    {
      let quant = if i == 0 { dc_quant } else { ac_quant };
      r.write(T::cast_from(
        (c * quant + ((c >> 31) & offset)) >> log_tx_scale,
      ));
    }
  }
}

Coverage Report

Created: 2025-07-11 07:25

Line	Count	Source (jump to first uncovered line)
1		// Copyright (c) 2017-2022, The rav1e contributors. All rights reserved
2		//
3		// This source code is subject to the terms of the BSD 2 Clause License and
4		// the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
5		// was not distributed with this source code in the LICENSE file, you can
6		// obtain it at www.aomedia.org/license/software. If the Alliance for Open
7		// Media Patent License 1.0 was not distributed with this source code in the
8		// PATENTS file, you can obtain it at www.aomedia.org/license/patent.
9
10		#![allow(non_upper_case_globals)]
11
12		mod tables;
13
14		cfg_if::cfg_if! {
15		if #[cfg(nasm_x86_64)] {
16		pub use crate::asm::x86::quantize::*;
17		} else {
18		pub use self::rust::*;
19		}
20		}
21
22		pub use tables::*;
23
24		use crate::scan_order::av1_scan_orders;
25		use crate::transform::{TxSize, TxType};
26		use crate::util::*;
27		use std::convert::Into;
28		use std::mem;
29		use std::num::{NonZeroU16, NonZeroU32, NonZeroU64};
30
31	0	pub fn get_log_tx_scale(tx_size: TxSize) -> usize {
32	0	let num_pixels = tx_size.area();
33	0
34	0	Into::<usize>::into(num_pixels > 256)
35	0	+ Into::<usize>::into(num_pixels > 1024)
36	0	}
37
38	0	pub fn dc_q(qindex: u8, delta_q: i8, bit_depth: usize) -> NonZeroU16 {
39	0	let dc_q: [&[NonZeroU16; 256]; 3] =
40	0	[&dc_qlookup_Q3, &dc_qlookup_10_Q3, &dc_qlookup_12_Q3];
41	0	let bd = ((bit_depth ^ 8) >> 1).min(2);
42	0	dc_q[bd][((qindex as isize + delta_q as isize).max(0) as usize).min(255)]
43	0	}
44
45	0	pub fn ac_q(qindex: u8, delta_q: i8, bit_depth: usize) -> NonZeroU16 {
46	0	let ac_q: [&[NonZeroU16; 256]; 3] =
47	0	[&ac_qlookup_Q3, &ac_qlookup_10_Q3, &ac_qlookup_12_Q3];
48	0	let bd = ((bit_depth ^ 8) >> 1).min(2);
49	0	ac_q[bd][((qindex as isize + delta_q as isize).max(0) as usize).min(255)]
50	0	}
51
52		// TODO: Handle lossless properly.
53	0	fn select_qi(quantizer: i64, qlookup: &[NonZeroU16; QINDEX_RANGE]) -> u8 {
54	0	if quantizer < qlookup[MINQ].get() as i64 {
55	0	MINQ as u8
56	0	} else if quantizer >= qlookup[MAXQ].get() as i64 {
57	0	MAXQ as u8
58		} else {
59	0	match qlookup
60	0	.binary_search(&NonZeroU16::new(quantizer as u16).expect("Not zero"))
61		{
62	0	Ok(qi) => qi as u8,
63	0	Err(qi) => {
64	0	debug_assert!(qi > MINQ);
65	0	debug_assert!(qi <= MAXQ);
66		// Pick the closest quantizer in the log domain.
67	0	let qthresh =
68	0	(qlookup[qi - 1].get() as i32) * (qlookup[qi].get() as i32);
69	0	let q2_i32 = (quantizer as i32) * (quantizer as i32);
70	0	if q2_i32 < qthresh {
71	0	(qi - 1) as u8
72		} else {
73	0	qi as u8
74		}
75		}
76		}
77		}
78	0	}
79
80	0	pub fn select_dc_qi(quantizer: i64, bit_depth: usize) -> u8 {
81	0	let qlookup = match bit_depth {
82	0	8 => &dc_qlookup_Q3,
83	0	10 => &dc_qlookup_10_Q3,
84	0	12 => &dc_qlookup_12_Q3,
85	0	_ => unimplemented!(),
86		};
87	0	select_qi(quantizer, qlookup)
88	0	}
89
90	0	pub fn select_ac_qi(quantizer: i64, bit_depth: usize) -> u8 {
91	0	let qlookup = match bit_depth {
92	0	8 => &ac_qlookup_Q3,
93	0	10 => &ac_qlookup_10_Q3,
94	0	12 => &ac_qlookup_12_Q3,
95	0	_ => unimplemented!(),
96		};
97	0	select_qi(quantizer, qlookup)
98	0	}
99
100		#[derive(Debug, Clone, Copy)]
101		pub struct QuantizationContext {
102		log_tx_scale: usize,
103		dc_quant: NonZeroU16,
104		dc_offset: u32,
105		dc_mul_add: (u32, u32, u32),
106
107		ac_quant: NonZeroU16,
108		ac_offset_eob: u32,
109		ac_offset0: u32,
110		ac_offset1: u32,
111		ac_mul_add: (u32, u32, u32),
112		}
113
114		impl Default for QuantizationContext {
115	0	fn default() -> Self {
116	0	QuantizationContext {
117	0	dc_quant: NonZeroU16::new(1).expect("Not zero"),
118	0	ac_quant: NonZeroU16::new(1).expect("Not zero"),
119	0	log_tx_scale: Default::default(),
120	0	dc_offset: Default::default(),
121	0	dc_mul_add: Default::default(),
122	0	ac_offset_eob: Default::default(),
123	0	ac_offset0: Default::default(),
124	0	ac_offset1: Default::default(),
125	0	ac_mul_add: Default::default(),
126	0	}
127	0	}
128		}
129
130	0	fn divu_gen(d: NonZeroU32) -> (u32, u32, u32) {
131	0	let nbits = (mem::size_of_val(&d) as u64) * 8;
132	0	let m = nbits - d.leading_zeros() as u64 - 1;
133	0	if d.is_power_of_two() {
134	0	(0xFFFF_FFFF, 0xFFFF_FFFF, m as u32)
135		} else {
136	0	let d = NonZeroU64::from(d);
137	0	let t = (1u64 << (m + nbits)) / d;
138	0
139	0	let d = d.get();
140	0	let r = (t * d + d) & ((1 << nbits) - 1);
141	0	if r <= 1u64 << m {
142	0	(t as u32 + 1, 0u32, m as u32)
143		} else {
144	0	(t as u32, t as u32, m as u32)
145		}
146		}
147	0	}
148
149		#[inline]
150	0	const fn divu_pair(x: u32, d: (u32, u32, u32)) -> u32 {
151	0	let x = x as u64;
152	0	let (a, b, shift) = d;
153	0	let shift = shift as u64;
154	0	let a = a as u64;
155	0	let b = b as u64;
156	0
157	0	(((a * x + b) >> 32) >> shift) as u32
158	0	} Unexecuted instantiation: rav1e::quantize::divu_pair Unexecuted instantiation: rav1e::quantize::divu_pair
159
160		#[inline]
161	0	const fn copysign(value: u32, signed: i32) -> i32 {
162	0	if signed < 0 {
163	0	-(value as i32)
164		} else {
165	0	value as i32
166		}
167	0	} Unexecuted instantiation: rav1e::quantize::copysign Unexecuted instantiation: rav1e::quantize::copysign
168
169		#[cfg(test)]
170		mod test {
171		use super::*;
172		use crate::transform::TxSize::*;
173
174		#[test]
175		fn test_divu_pair() {
176		for d in 1..1024 {
177		for x in 0..1000 {
178		let ab = divu_gen(NonZeroU32::new(d).unwrap());
179		assert_eq!(x / d, divu_pair(x, ab));
180		}
181		}
182		}
183		#[test]
184		fn gen_divu_table() {
185		let b: Vec<(u32, u32, u32)> =
186		dc_qlookup_Q3.iter().map(\|&v\| divu_gen(v.into())).collect();
187
188		println!("{:?}", b);
189		}
190		#[test]
191		fn test_tx_log_scale() {
192		let tx_sizes = [
193		(TX_4X4, 0),
194		(TX_8X8, 0),
195		(TX_16X16, 0),
196		(TX_32X32, 1),
197		(TX_64X64, 2),
198		(TX_4X8, 0),
199		(TX_8X4, 0),
200		(TX_8X16, 0),
201		(TX_16X8, 0),
202		(TX_16X32, 1),
203		(TX_32X16, 1),
204		(TX_32X64, 2),
205		(TX_64X32, 2),
206		(TX_4X16, 0),
207		(TX_16X4, 0),
208		(TX_8X32, 0),
209		(TX_32X8, 0),
210		(TX_16X64, 1),
211		(TX_64X16, 1),
212		];
213		for &tx_size in tx_sizes.iter() {
214		assert!(tx_size.1 == get_log_tx_scale(tx_size.0));
215		}
216		}
217		}
218
219		impl QuantizationContext {
220	0	pub fn update(
221	0	&mut self, qindex: u8, tx_size: TxSize, is_intra: bool, bit_depth: usize,
222	0	dc_delta_q: i8, ac_delta_q: i8,
223	0	) {
224	0	self.log_tx_scale = get_log_tx_scale(tx_size);
225	0
226	0	self.dc_quant = dc_q(qindex, dc_delta_q, bit_depth);
227	0	self.dc_mul_add = divu_gen(self.dc_quant.into());
228	0
229	0	self.ac_quant = ac_q(qindex, ac_delta_q, bit_depth);
230	0	self.ac_mul_add = divu_gen(self.ac_quant.into());
231	0
232	0	// All of these biases were derived by measuring the cost of coding
233	0	// a zero vs coding a one on any given coefficient position, or, in
234	0	// the case of the EOB bias, the cost of coding the block with
235	0	// the chosen EOB (rounding to one) vs rounding to zero and continuing
236	0	// to choose a new EOB. This was done over several clips, with the
237	0	// average of the bit costs taken over all blocks in the set, and a new
238	0	// bias derived via the method outlined in Jean-Marc Valin's
239	0	// Journal of Dubious Theoretical Results[1], aka:
240	0	//
241	0	// lambda = ln(2) / 6.0
242	0	// threshold = 0.5 + (lambda * avg_rate_diff) / 2.0
243	0	// bias = 1 - threshold
244	0	//
245	0	// lambda is a constant since our offsets are already adjusted for the
246	0	// quantizer.
247	0	//
248	0	// Biases were then updated, and cost collection was re-run, until
249	0	// the calculated biases started to converge after 2-4 iterations.
250	0	//
251	0	// In theory, the rounding biases for inter should be somewhat smaller
252	0	// than the biases for intra, but this turns out to only be the case
253	0	// for EOB optimization, or at least, is covered by EOB optimization.
254	0	// The RD-optimal rounding biases for the actual coefficients seem
255	0	// to be quite close (+/- 1/256), for both inter and intra,
256	0	// post-deadzoning.
257	0	//
258	0	// [1] https://jmvalin.ca/notes/theoretical_results.pdf
259	0	self.dc_offset =
260	0	self.dc_quant.get() as u32 * (if is_intra { 109 } else { 108 }) / 256;
261		self.ac_offset0 =
262	0	self.ac_quant.get() as u32 * (if is_intra { 98 } else { 97 }) / 256;
263		self.ac_offset1 =
264	0	self.ac_quant.get() as u32 * (if is_intra { 109 } else { 108 }) / 256;
265		self.ac_offset_eob =
266	0	self.ac_quant.get() as u32 * (if is_intra { 88 } else { 44 }) / 256;
267	0	}
268
269		#[inline]
270	0	pub fn quantize<T: Coefficient>(
271	0	&self, coeffs: &[T], qcoeffs: &mut [T], tx_size: TxSize, tx_type: TxType,
272	0	) -> u16 {
273	0	let scan = av1_scan_orders[tx_size as usize][tx_type as usize].scan;
274	0	let iscan = av1_scan_orders[tx_size as usize][tx_type as usize].iscan;
275	0
276	0	qcoeffs[0] = {
277	0	let coeff: i32 = i32::cast_from(coeffs[0]) << self.log_tx_scale;
278	0	let abs_coeff = coeff.unsigned_abs();
279	0	T::cast_from(copysign(
280	0	divu_pair(abs_coeff + self.dc_offset, self.dc_mul_add),
281	0	coeff,
282	0	))
283	0	};
284	0
285	0	// Find the last non-zero coefficient using our smaller biases and
286	0	// zero everything else.
287	0	// This threshold is such that `abs(coeff) < deadzone` implies:
288	0	// (abs(coeff << log_tx_scale) + ac_offset_eob) / ac_quant == 0
289	0	let deadzone = T::cast_from(
290	0	(self.ac_quant.get() as usize - self.ac_offset_eob as usize)
291	0	.align_power_of_two_and_shift(self.log_tx_scale),
292	0	);
293	0	let eob = {
294	0	let eob_minus_one = iscan
295	0	.iter()
296	0	.zip(coeffs)
297	0	.map(\|(&i, &c)\| if c.abs() >= deadzone { i } else { 0 }) Unexecuted instantiation: <rav1e::quantize::QuantizationContext>::quantize::<i32>::{closure#0} Unexecuted instantiation: <rav1e::quantize::QuantizationContext>::quantize::<i16>::{closure#0}
298	0	.max()
299	0	.unwrap_or(0);
300	0	// We skip the DC coefficient since it has its own quantizer index.
301	0	if eob_minus_one > 0 {
302	0	eob_minus_one + 1
303		} else {
304	0	u16::from(qcoeffs[0] != T::cast_from(0))
305		}
306		};
307
308		// Here we use different rounding biases depending on whether we've
309		// had recent coefficients that are larger than one, or less than
310		// one. The reason for this is that a block usually has a chunk of
311		// large coefficients and a tail of zeroes and ones, and the tradeoffs
312		// for coding these two are different. In the tail of zeroes and ones,
313		// you'll likely end up spending most bits just saying where that
314		// coefficient is in the block, whereas in the chunk of larger
315		// coefficients, most bits will be spent on coding its magnitude.
316		// To that end, we want to bias more toward rounding to zero for
317		// that tail of zeroes and ones than we do for the larger coefficients.
318	0	let mut level_mode = 1;
319	0	let ac_quant = self.ac_quant.get() as u32;
320	0	for &pos in scan.iter().take(usize::from(eob)).skip(1) {
321	0	let coeff = i32::cast_from(coeffs[pos as usize]) << self.log_tx_scale;
322	0	let abs_coeff = coeff.unsigned_abs();
323	0
324	0	let level0 = divu_pair(abs_coeff, self.ac_mul_add);
325	0	let offset = if level0 > 1 - level_mode {
326	0	self.ac_offset1
327		} else {
328	0	self.ac_offset0
329		};
330
331	0	let abs_qcoeff: u32 =
332	0	level0 + (abs_coeff + offset >= (level0 + 1) * ac_quant) as u32;
333	0	if level_mode != 0 && abs_qcoeff == 0 {
334	0	level_mode = 0;
335	0	} else if abs_qcoeff > 1 {
336	0	level_mode = 1;
337	0	}
338
339	0	qcoeffs[pos as usize] = T::cast_from(copysign(abs_qcoeff, coeff));
340		}
341
342		// Rather than zeroing the tail in scan order, assume that qcoeffs is
343		// pre-filled with zeros.
344
345		// Check the eob is correct
346	0	debug_assert_eq!(
347	0	usize::from(eob),
348	0	scan
349	0	.iter()
350	0	.rposition(\|&i\| qcoeffs[i as usize] != T::cast_from(0))
351	0	.map(\|n\| n + 1)
352	0	.unwrap_or(0)
353		);
354
355	0	eob
356	0	} Unexecuted instantiation: <rav1e::quantize::QuantizationContext>::quantize::<i32> Unexecuted instantiation: <rav1e::quantize::QuantizationContext>::quantize::<i16>
357		}
358
359		pub mod rust {
360		use super::*;
361		use crate::cpu_features::CpuFeatureLevel;
362		use std::mem::MaybeUninit;
363
364	0	pub fn dequantize<T: Coefficient>(
365	0	qindex: u8, coeffs: &[T], _eob: u16, rcoeffs: &mut [MaybeUninit<T>],
366	0	tx_size: TxSize, bit_depth: usize, dc_delta_q: i8, ac_delta_q: i8,
367	0	_cpu: CpuFeatureLevel,
368	0	) {
369	0	let log_tx_scale = get_log_tx_scale(tx_size) as i32;
370	0	let offset = (1 << log_tx_scale) - 1;
371	0
372	0	let dc_quant = dc_q(qindex, dc_delta_q, bit_depth).get() as i32;
373	0	let ac_quant = ac_q(qindex, ac_delta_q, bit_depth).get() as i32;
374
375	0	for (i, (r, c)) in rcoeffs
376	0	.iter_mut()
377	0	.zip(coeffs.iter().map(\|&c\| i32::cast_from(c))) Unexecuted instantiation: rav1e::quantize::rust::dequantize::<i32>::{closure#0} Unexecuted instantiation: rav1e::quantize::rust::dequantize::<i16>::{closure#0}
378	0	.enumerate()
379		{
380	0	let quant = if i == 0 { dc_quant } else { ac_quant };
381	0	r.write(T::cast_from(
382	0	(c * quant + ((c >> 31) & offset)) >> log_tx_scale,
383	0	));
384		}
385	0	} Unexecuted instantiation: rav1e::quantize::rust::dequantize::<i32> Unexecuted instantiation: rav1e::quantize::rust::dequantize::<i16>
386		}