Coverage Report

Created: 2025-07-11 07:25

/rust/registry/src/index.crates.io-6f17d22bba15001f/rav1e-0.7.1/src/quantize/mod.rs
Line
Count
Source (jump to first uncovered line)
1
// Copyright (c) 2017-2022, The rav1e contributors. All rights reserved
2
//
3
// This source code is subject to the terms of the BSD 2 Clause License and
4
// the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
5
// was not distributed with this source code in the LICENSE file, you can
6
// obtain it at www.aomedia.org/license/software. If the Alliance for Open
7
// Media Patent License 1.0 was not distributed with this source code in the
8
// PATENTS file, you can obtain it at www.aomedia.org/license/patent.
9
10
#![allow(non_upper_case_globals)]
11
12
mod tables;
13
14
cfg_if::cfg_if! {
15
  if #[cfg(nasm_x86_64)] {
16
    pub use crate::asm::x86::quantize::*;
17
  } else {
18
    pub use self::rust::*;
19
  }
20
}
21
22
pub use tables::*;
23
24
use crate::scan_order::av1_scan_orders;
25
use crate::transform::{TxSize, TxType};
26
use crate::util::*;
27
use std::convert::Into;
28
use std::mem;
29
use std::num::{NonZeroU16, NonZeroU32, NonZeroU64};
30
31
0
pub fn get_log_tx_scale(tx_size: TxSize) -> usize {
32
0
  let num_pixels = tx_size.area();
33
0
34
0
  Into::<usize>::into(num_pixels > 256)
35
0
    + Into::<usize>::into(num_pixels > 1024)
36
0
}
37
38
0
pub fn dc_q(qindex: u8, delta_q: i8, bit_depth: usize) -> NonZeroU16 {
39
0
  let dc_q: [&[NonZeroU16; 256]; 3] =
40
0
    [&dc_qlookup_Q3, &dc_qlookup_10_Q3, &dc_qlookup_12_Q3];
41
0
  let bd = ((bit_depth ^ 8) >> 1).min(2);
42
0
  dc_q[bd][((qindex as isize + delta_q as isize).max(0) as usize).min(255)]
43
0
}
44
45
0
pub fn ac_q(qindex: u8, delta_q: i8, bit_depth: usize) -> NonZeroU16 {
46
0
  let ac_q: [&[NonZeroU16; 256]; 3] =
47
0
    [&ac_qlookup_Q3, &ac_qlookup_10_Q3, &ac_qlookup_12_Q3];
48
0
  let bd = ((bit_depth ^ 8) >> 1).min(2);
49
0
  ac_q[bd][((qindex as isize + delta_q as isize).max(0) as usize).min(255)]
50
0
}
51
52
// TODO: Handle lossless properly.
53
0
fn select_qi(quantizer: i64, qlookup: &[NonZeroU16; QINDEX_RANGE]) -> u8 {
54
0
  if quantizer < qlookup[MINQ].get() as i64 {
55
0
    MINQ as u8
56
0
  } else if quantizer >= qlookup[MAXQ].get() as i64 {
57
0
    MAXQ as u8
58
  } else {
59
0
    match qlookup
60
0
      .binary_search(&NonZeroU16::new(quantizer as u16).expect("Not zero"))
61
    {
62
0
      Ok(qi) => qi as u8,
63
0
      Err(qi) => {
64
0
        debug_assert!(qi > MINQ);
65
0
        debug_assert!(qi <= MAXQ);
66
        // Pick the closest quantizer in the log domain.
67
0
        let qthresh =
68
0
          (qlookup[qi - 1].get() as i32) * (qlookup[qi].get() as i32);
69
0
        let q2_i32 = (quantizer as i32) * (quantizer as i32);
70
0
        if q2_i32 < qthresh {
71
0
          (qi - 1) as u8
72
        } else {
73
0
          qi as u8
74
        }
75
      }
76
    }
77
  }
78
0
}
79
80
0
pub fn select_dc_qi(quantizer: i64, bit_depth: usize) -> u8 {
81
0
  let qlookup = match bit_depth {
82
0
    8 => &dc_qlookup_Q3,
83
0
    10 => &dc_qlookup_10_Q3,
84
0
    12 => &dc_qlookup_12_Q3,
85
0
    _ => unimplemented!(),
86
  };
87
0
  select_qi(quantizer, qlookup)
88
0
}
89
90
0
pub fn select_ac_qi(quantizer: i64, bit_depth: usize) -> u8 {
91
0
  let qlookup = match bit_depth {
92
0
    8 => &ac_qlookup_Q3,
93
0
    10 => &ac_qlookup_10_Q3,
94
0
    12 => &ac_qlookup_12_Q3,
95
0
    _ => unimplemented!(),
96
  };
97
0
  select_qi(quantizer, qlookup)
98
0
}
99
100
#[derive(Debug, Clone, Copy)]
101
pub struct QuantizationContext {
102
  log_tx_scale: usize,
103
  dc_quant: NonZeroU16,
104
  dc_offset: u32,
105
  dc_mul_add: (u32, u32, u32),
106
107
  ac_quant: NonZeroU16,
108
  ac_offset_eob: u32,
109
  ac_offset0: u32,
110
  ac_offset1: u32,
111
  ac_mul_add: (u32, u32, u32),
112
}
113
114
impl Default for QuantizationContext {
115
0
  fn default() -> Self {
116
0
    QuantizationContext {
117
0
      dc_quant: NonZeroU16::new(1).expect("Not zero"),
118
0
      ac_quant: NonZeroU16::new(1).expect("Not zero"),
119
0
      log_tx_scale: Default::default(),
120
0
      dc_offset: Default::default(),
121
0
      dc_mul_add: Default::default(),
122
0
      ac_offset_eob: Default::default(),
123
0
      ac_offset0: Default::default(),
124
0
      ac_offset1: Default::default(),
125
0
      ac_mul_add: Default::default(),
126
0
    }
127
0
  }
128
}
129
130
0
fn divu_gen(d: NonZeroU32) -> (u32, u32, u32) {
131
0
  let nbits = (mem::size_of_val(&d) as u64) * 8;
132
0
  let m = nbits - d.leading_zeros() as u64 - 1;
133
0
  if d.is_power_of_two() {
134
0
    (0xFFFF_FFFF, 0xFFFF_FFFF, m as u32)
135
  } else {
136
0
    let d = NonZeroU64::from(d);
137
0
    let t = (1u64 << (m + nbits)) / d;
138
0
139
0
    let d = d.get();
140
0
    let r = (t * d + d) & ((1 << nbits) - 1);
141
0
    if r <= 1u64 << m {
142
0
      (t as u32 + 1, 0u32, m as u32)
143
    } else {
144
0
      (t as u32, t as u32, m as u32)
145
    }
146
  }
147
0
}
148
149
#[inline]
150
0
const fn divu_pair(x: u32, d: (u32, u32, u32)) -> u32 {
151
0
  let x = x as u64;
152
0
  let (a, b, shift) = d;
153
0
  let shift = shift as u64;
154
0
  let a = a as u64;
155
0
  let b = b as u64;
156
0
157
0
  (((a * x + b) >> 32) >> shift) as u32
158
0
}
Unexecuted instantiation: rav1e::quantize::divu_pair
Unexecuted instantiation: rav1e::quantize::divu_pair
159
160
#[inline]
161
0
const fn copysign(value: u32, signed: i32) -> i32 {
162
0
  if signed < 0 {
163
0
    -(value as i32)
164
  } else {
165
0
    value as i32
166
  }
167
0
}
Unexecuted instantiation: rav1e::quantize::copysign
Unexecuted instantiation: rav1e::quantize::copysign
168
169
#[cfg(test)]
170
mod test {
171
  use super::*;
172
  use crate::transform::TxSize::*;
173
174
  #[test]
175
  fn test_divu_pair() {
176
    for d in 1..1024 {
177
      for x in 0..1000 {
178
        let ab = divu_gen(NonZeroU32::new(d).unwrap());
179
        assert_eq!(x / d, divu_pair(x, ab));
180
      }
181
    }
182
  }
183
  #[test]
184
  fn gen_divu_table() {
185
    let b: Vec<(u32, u32, u32)> =
186
      dc_qlookup_Q3.iter().map(|&v| divu_gen(v.into())).collect();
187
188
    println!("{:?}", b);
189
  }
190
  #[test]
191
  fn test_tx_log_scale() {
192
    let tx_sizes = [
193
      (TX_4X4, 0),
194
      (TX_8X8, 0),
195
      (TX_16X16, 0),
196
      (TX_32X32, 1),
197
      (TX_64X64, 2),
198
      (TX_4X8, 0),
199
      (TX_8X4, 0),
200
      (TX_8X16, 0),
201
      (TX_16X8, 0),
202
      (TX_16X32, 1),
203
      (TX_32X16, 1),
204
      (TX_32X64, 2),
205
      (TX_64X32, 2),
206
      (TX_4X16, 0),
207
      (TX_16X4, 0),
208
      (TX_8X32, 0),
209
      (TX_32X8, 0),
210
      (TX_16X64, 1),
211
      (TX_64X16, 1),
212
    ];
213
    for &tx_size in tx_sizes.iter() {
214
      assert!(tx_size.1 == get_log_tx_scale(tx_size.0));
215
    }
216
  }
217
}
218
219
impl QuantizationContext {
220
0
  pub fn update(
221
0
    &mut self, qindex: u8, tx_size: TxSize, is_intra: bool, bit_depth: usize,
222
0
    dc_delta_q: i8, ac_delta_q: i8,
223
0
  ) {
224
0
    self.log_tx_scale = get_log_tx_scale(tx_size);
225
0
226
0
    self.dc_quant = dc_q(qindex, dc_delta_q, bit_depth);
227
0
    self.dc_mul_add = divu_gen(self.dc_quant.into());
228
0
229
0
    self.ac_quant = ac_q(qindex, ac_delta_q, bit_depth);
230
0
    self.ac_mul_add = divu_gen(self.ac_quant.into());
231
0
232
0
    // All of these biases were derived by measuring the cost of coding
233
0
    // a zero vs coding a one on any given coefficient position, or, in
234
0
    // the case of the EOB bias, the cost of coding the block with
235
0
    // the chosen EOB (rounding to one) vs rounding to zero and continuing
236
0
    // to choose a new EOB. This was done over several clips, with the
237
0
    // average of the bit costs taken over all blocks in the set, and a new
238
0
    // bias derived via the method outlined in Jean-Marc Valin's
239
0
    // Journal of Dubious Theoretical Results[1], aka:
240
0
    //
241
0
    // lambda = ln(2) / 6.0
242
0
    // threshold = 0.5 + (lambda * avg_rate_diff) / 2.0
243
0
    // bias = 1 - threshold
244
0
    //
245
0
    // lambda is a constant since our offsets are already adjusted for the
246
0
    // quantizer.
247
0
    //
248
0
    // Biases were then updated, and cost collection was re-run, until
249
0
    // the calculated biases started to converge after 2-4 iterations.
250
0
    //
251
0
    // In theory, the rounding biases for inter should be somewhat smaller
252
0
    // than the biases for intra, but this turns out to only be the case
253
0
    // for EOB optimization, or at least, is covered by EOB optimization.
254
0
    // The RD-optimal rounding biases for the actual coefficients seem
255
0
    // to be quite close (+/- 1/256), for both inter and intra,
256
0
    // post-deadzoning.
257
0
    //
258
0
    // [1] https://jmvalin.ca/notes/theoretical_results.pdf
259
0
    self.dc_offset =
260
0
      self.dc_quant.get() as u32 * (if is_intra { 109 } else { 108 }) / 256;
261
    self.ac_offset0 =
262
0
      self.ac_quant.get() as u32 * (if is_intra { 98 } else { 97 }) / 256;
263
    self.ac_offset1 =
264
0
      self.ac_quant.get() as u32 * (if is_intra { 109 } else { 108 }) / 256;
265
    self.ac_offset_eob =
266
0
      self.ac_quant.get() as u32 * (if is_intra { 88 } else { 44 }) / 256;
267
0
  }
268
269
  #[inline]
270
0
  pub fn quantize<T: Coefficient>(
271
0
    &self, coeffs: &[T], qcoeffs: &mut [T], tx_size: TxSize, tx_type: TxType,
272
0
  ) -> u16 {
273
0
    let scan = av1_scan_orders[tx_size as usize][tx_type as usize].scan;
274
0
    let iscan = av1_scan_orders[tx_size as usize][tx_type as usize].iscan;
275
0
276
0
    qcoeffs[0] = {
277
0
      let coeff: i32 = i32::cast_from(coeffs[0]) << self.log_tx_scale;
278
0
      let abs_coeff = coeff.unsigned_abs();
279
0
      T::cast_from(copysign(
280
0
        divu_pair(abs_coeff + self.dc_offset, self.dc_mul_add),
281
0
        coeff,
282
0
      ))
283
0
    };
284
0
285
0
    // Find the last non-zero coefficient using our smaller biases and
286
0
    // zero everything else.
287
0
    // This threshold is such that `abs(coeff) < deadzone` implies:
288
0
    // (abs(coeff << log_tx_scale) + ac_offset_eob) / ac_quant == 0
289
0
    let deadzone = T::cast_from(
290
0
      (self.ac_quant.get() as usize - self.ac_offset_eob as usize)
291
0
        .align_power_of_two_and_shift(self.log_tx_scale),
292
0
    );
293
0
    let eob = {
294
0
      let eob_minus_one = iscan
295
0
        .iter()
296
0
        .zip(coeffs)
297
0
        .map(|(&i, &c)| if c.abs() >= deadzone { i } else { 0 })
Unexecuted instantiation: <rav1e::quantize::QuantizationContext>::quantize::<i32>::{closure#0}
Unexecuted instantiation: <rav1e::quantize::QuantizationContext>::quantize::<i16>::{closure#0}
298
0
        .max()
299
0
        .unwrap_or(0);
300
0
      // We skip the DC coefficient since it has its own quantizer index.
301
0
      if eob_minus_one > 0 {
302
0
        eob_minus_one + 1
303
      } else {
304
0
        u16::from(qcoeffs[0] != T::cast_from(0))
305
      }
306
    };
307
308
    // Here we use different rounding biases depending on whether we've
309
    // had recent coefficients that are larger than one, or less than
310
    // one. The reason for this is that a block usually has a chunk of
311
    // large coefficients and a tail of zeroes and ones, and the tradeoffs
312
    // for coding these two are different. In the tail of zeroes and ones,
313
    // you'll likely end up spending most bits just saying where that
314
    // coefficient is in the block, whereas in the chunk of larger
315
    // coefficients, most bits will be spent on coding its magnitude.
316
    // To that end, we want to bias more toward rounding to zero for
317
    // that tail of zeroes and ones than we do for the larger coefficients.
318
0
    let mut level_mode = 1;
319
0
    let ac_quant = self.ac_quant.get() as u32;
320
0
    for &pos in scan.iter().take(usize::from(eob)).skip(1) {
321
0
      let coeff = i32::cast_from(coeffs[pos as usize]) << self.log_tx_scale;
322
0
      let abs_coeff = coeff.unsigned_abs();
323
0
324
0
      let level0 = divu_pair(abs_coeff, self.ac_mul_add);
325
0
      let offset = if level0 > 1 - level_mode {
326
0
        self.ac_offset1
327
      } else {
328
0
        self.ac_offset0
329
      };
330
331
0
      let abs_qcoeff: u32 =
332
0
        level0 + (abs_coeff + offset >= (level0 + 1) * ac_quant) as u32;
333
0
      if level_mode != 0 && abs_qcoeff == 0 {
334
0
        level_mode = 0;
335
0
      } else if abs_qcoeff > 1 {
336
0
        level_mode = 1;
337
0
      }
338
339
0
      qcoeffs[pos as usize] = T::cast_from(copysign(abs_qcoeff, coeff));
340
    }
341
342
    // Rather than zeroing the tail in scan order, assume that qcoeffs is
343
    // pre-filled with zeros.
344
345
    // Check the eob is correct
346
0
    debug_assert_eq!(
347
0
      usize::from(eob),
348
0
      scan
349
0
        .iter()
350
0
        .rposition(|&i| qcoeffs[i as usize] != T::cast_from(0))
351
0
        .map(|n| n + 1)
352
0
        .unwrap_or(0)
353
    );
354
355
0
    eob
356
0
  }
Unexecuted instantiation: <rav1e::quantize::QuantizationContext>::quantize::<i32>
Unexecuted instantiation: <rav1e::quantize::QuantizationContext>::quantize::<i16>
357
}
358
359
pub mod rust {
360
  use super::*;
361
  use crate::cpu_features::CpuFeatureLevel;
362
  use std::mem::MaybeUninit;
363
364
0
  pub fn dequantize<T: Coefficient>(
365
0
    qindex: u8, coeffs: &[T], _eob: u16, rcoeffs: &mut [MaybeUninit<T>],
366
0
    tx_size: TxSize, bit_depth: usize, dc_delta_q: i8, ac_delta_q: i8,
367
0
    _cpu: CpuFeatureLevel,
368
0
  ) {
369
0
    let log_tx_scale = get_log_tx_scale(tx_size) as i32;
370
0
    let offset = (1 << log_tx_scale) - 1;
371
0
372
0
    let dc_quant = dc_q(qindex, dc_delta_q, bit_depth).get() as i32;
373
0
    let ac_quant = ac_q(qindex, ac_delta_q, bit_depth).get() as i32;
374
375
0
    for (i, (r, c)) in rcoeffs
376
0
      .iter_mut()
377
0
      .zip(coeffs.iter().map(|&c| i32::cast_from(c)))
Unexecuted instantiation: rav1e::quantize::rust::dequantize::<i32>::{closure#0}
Unexecuted instantiation: rav1e::quantize::rust::dequantize::<i16>::{closure#0}
378
0
      .enumerate()
379
    {
380
0
      let quant = if i == 0 { dc_quant } else { ac_quant };
381
0
      r.write(T::cast_from(
382
0
        (c * quant + ((c >> 31) & offset)) >> log_tx_scale,
383
0
      ));
384
    }
385
0
  }
Unexecuted instantiation: rav1e::quantize::rust::dequantize::<i32>
Unexecuted instantiation: rav1e::quantize::rust::dequantize::<i16>
386
}