/rust/registry/src/index.crates.io-6f17d22bba15001f/rav1e-0.7.1/src/activity.rs
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright (c) 2017-2022, The rav1e contributors. All rights reserved |
2 | | // |
3 | | // This source code is subject to the terms of the BSD 2 Clause License and |
4 | | // the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License |
5 | | // was not distributed with this source code in the LICENSE file, you can |
6 | | // obtain it at www.aomedia.org/license/software. If the Alliance for Open |
7 | | // Media Patent License 1.0 was not distributed with this source code in the |
8 | | // PATENTS file, you can obtain it at www.aomedia.org/license/patent. |
9 | | |
10 | | use crate::frame::*; |
11 | | use crate::rdo::DistortionScale; |
12 | | use crate::tiling::*; |
13 | | use crate::util::*; |
14 | | use itertools::izip; |
15 | | |
16 | | #[derive(Debug, Default, Clone)] |
17 | | pub struct ActivityMask { |
18 | | variances: Box<[u32]>, |
19 | | } |
20 | | |
21 | | impl ActivityMask { |
22 | 0 | #[profiling::function] Unexecuted instantiation: <rav1e::activity::ActivityMask>::from_plane::<u16> Unexecuted instantiation: <rav1e::activity::ActivityMask>::from_plane::<u8> |
23 | | pub fn from_plane<T: Pixel>(luma_plane: &Plane<T>) -> ActivityMask { |
24 | | let PlaneConfig { width, height, .. } = luma_plane.cfg; |
25 | | |
26 | | // Width and height are padded to 8×8 block size. |
27 | | let w_in_imp_b = width.align_power_of_two_and_shift(3); |
28 | | let h_in_imp_b = height.align_power_of_two_and_shift(3); |
29 | | |
30 | | let aligned_luma = Rect { |
31 | | x: 0_isize, |
32 | | y: 0_isize, |
33 | | width: w_in_imp_b << 3, |
34 | | height: h_in_imp_b << 3, |
35 | | }; |
36 | | let luma = PlaneRegion::new(luma_plane, aligned_luma); |
37 | | |
38 | | let mut variances = Vec::with_capacity(w_in_imp_b * h_in_imp_b); |
39 | | |
40 | | for y in 0..h_in_imp_b { |
41 | | for x in 0..w_in_imp_b { |
42 | | let block_rect = Area::Rect { |
43 | | x: (x << 3) as isize, |
44 | | y: (y << 3) as isize, |
45 | | width: 8, |
46 | | height: 8, |
47 | | }; |
48 | | |
49 | | let block = luma.subregion(block_rect); |
50 | | let variance = variance_8x8(&block); |
51 | | variances.push(variance); |
52 | | } |
53 | | } |
54 | | ActivityMask { variances: variances.into_boxed_slice() } |
55 | | } |
56 | | |
57 | 0 | #[profiling::function] |
58 | | pub fn fill_scales( |
59 | | &self, bit_depth: usize, activity_scales: &mut Box<[DistortionScale]>, |
60 | | ) { |
61 | | for (dst, &src) in activity_scales.iter_mut().zip(self.variances.iter()) { |
62 | | *dst = ssim_boost(src, src, bit_depth); |
63 | | } |
64 | | } |
65 | | } |
66 | | |
67 | | // Adapted from the source variance calculation in `cdef_dist_wxh_8x8`. |
68 | | #[inline(never)] |
69 | 0 | fn variance_8x8<T: Pixel>(src: &PlaneRegion<'_, T>) -> u32 { |
70 | 0 | debug_assert!(src.plane_cfg.xdec == 0); |
71 | 0 | debug_assert!(src.plane_cfg.ydec == 0); |
72 | | |
73 | | // Sum into columns to improve auto-vectorization |
74 | 0 | let mut sum_s_cols: [u16; 8] = [0; 8]; |
75 | 0 | let mut sum_s2_cols: [u32; 8] = [0; 8]; |
76 | 0 |
|
77 | 0 | // Check upfront that 8 rows are available. |
78 | 0 | let _row = &src[7]; |
79 | | |
80 | 0 | for j in 0..8 { |
81 | 0 | let row = &src[j][0..8]; |
82 | 0 | for (sum_s, sum_s2, s) in izip!(&mut sum_s_cols, &mut sum_s2_cols, row) { |
83 | 0 | // Don't convert directly to u32 to allow better vectorization |
84 | 0 | let s: u16 = u16::cast_from(*s); |
85 | 0 | *sum_s += s; |
86 | 0 |
|
87 | 0 | // Convert to u32 to avoid overflows when multiplying |
88 | 0 | let s: u32 = s as u32; |
89 | 0 | *sum_s2 += s * s; |
90 | 0 | } |
91 | | } |
92 | | |
93 | | // Sum together the sum of columns |
94 | 0 | let sum_s = sum_s_cols.iter().copied().map(u64::from).sum::<u64>(); |
95 | 0 | let sum_s2 = sum_s2_cols.iter().copied().map(u64::from).sum::<u64>(); |
96 | 0 |
|
97 | 0 | // Use sums to calculate variance |
98 | 0 | u32::try_from(sum_s2 - ((sum_s * sum_s + 32) >> 6)).unwrap_or(u32::MAX) |
99 | 0 | } Unexecuted instantiation: rav1e::activity::variance_8x8::<u16> Unexecuted instantiation: rav1e::activity::variance_8x8::<u8> |
100 | | |
101 | | /// `rsqrt` result stored in fixed point w/ scaling such that: |
102 | | /// `rsqrt = output.rsqrt_norm / (1 << output.shift)` |
103 | | struct RsqrtOutput { |
104 | | norm: u16, |
105 | | shift: u8, |
106 | | } |
107 | | |
108 | | /// Fixed point `rsqrt` for `ssim_boost` |
109 | 0 | fn ssim_boost_rsqrt(x: u64) -> RsqrtOutput { |
110 | | const INSHIFT: u8 = 16; |
111 | | const OUTSHIFT: u8 = 14; |
112 | | |
113 | 0 | let k = ((ILog::ilog(x) - 1) >> 1) as i16; |
114 | 0 | /*t is x in the range [0.25, 1) in QINSHIFT, or x*2^(-s). |
115 | 0 | Shift by log2(x) - log2(0.25*(1 << INSHIFT)) to ensure 0.25 lower bound.*/ |
116 | 0 | let s: i16 = 2 * k - (INSHIFT as i16 - 2); |
117 | 0 | let t: u16 = if s > 0 { x >> s } else { x << -s } as u16; |
118 | | |
119 | | /*We want to express od_rsqrt() in terms of od_rsqrt_norm(), which is |
120 | | defined as (2^OUTSHIFT)/sqrt(t*(2^-INSHIFT)) with t=x*(2^-s). |
121 | | This simplifies to 2^(OUTSHIFT+(INSHIFT/2)+(s/2))/sqrt(x), so the caller |
122 | | needs to shift right by OUTSHIFT + INSHIFT/2 + s/2.*/ |
123 | 0 | let rsqrt_shift: u8 = (OUTSHIFT as i16 + ((s + INSHIFT as i16) >> 1)) as u8; |
124 | | |
125 | | #[inline(always)] |
126 | 0 | const fn mult16_16_q15(a: i32, b: i32) -> i32 { |
127 | 0 | (a * b) >> 15 |
128 | 0 | } |
129 | | |
130 | | /* Reciprocal sqrt approximation where the input is in the range [0.25,1) in |
131 | | Q16 and the output is in the range (1.0, 2.0] in Q14). */ |
132 | | |
133 | | /* Range of n is [-16384,32767] ([-0.5,1) in Q15). */ |
134 | 0 | let n: i32 = t as i32 - 32768; |
135 | 0 | debug_assert!(n >= -16384); |
136 | | |
137 | | /* Get a rough guess for the root. |
138 | | The optimal minimax quadratic approximation (using relative error) is |
139 | | r = 1.437799046117536+n*(-0.823394375837328+n*0.4096419668459485). |
140 | | Coefficients here, and the final result r, are Q14. */ |
141 | 0 | let rsqrt: i32 = 23557 + mult16_16_q15(n, -13490 + mult16_16_q15(n, 6711)); |
142 | 0 |
|
143 | 0 | debug_assert!((16384..32768).contains(&rsqrt)); |
144 | 0 | RsqrtOutput { norm: rsqrt as u16, shift: rsqrt_shift } |
145 | 0 | } |
146 | | |
147 | | #[inline(always)] |
148 | 0 | pub fn ssim_boost(svar: u32, dvar: u32, bit_depth: usize) -> DistortionScale { |
149 | 0 | DistortionScale(apply_ssim_boost( |
150 | 0 | DistortionScale::default().0, |
151 | 0 | svar, |
152 | 0 | dvar, |
153 | 0 | bit_depth, |
154 | 0 | )) |
155 | 0 | } |
156 | | |
157 | | /// Apply ssim boost to a given input |
158 | | #[inline(always)] |
159 | 0 | pub fn apply_ssim_boost( |
160 | 0 | input: u32, svar: u32, dvar: u32, bit_depth: usize, |
161 | 0 | ) -> u32 { |
162 | 0 | let coeff_shift = bit_depth - 8; |
163 | 0 |
|
164 | 0 | // Scale dvar and svar to lbd range to prevent overflows. |
165 | 0 | let svar = (svar >> (2 * coeff_shift)) as u64; |
166 | 0 | let dvar = (dvar >> (2 * coeff_shift)) as u64; |
167 | | |
168 | | // The constants are such that when source and destination variance are equal, |
169 | | // ssim_boost ~= (x/2)^(-1/3) where x = variance / scale and the scale is |
170 | | // (maximum variance / sample range) << (bit depth - 8). |
171 | | // C2 is the variance floor, equivalent to a flat block of mean valued samples |
172 | | // with a single maximum value sample. |
173 | | const C1: u64 = 3355; |
174 | | const C2: u64 = 16128; |
175 | | const C3: u64 = 12338; |
176 | | const RATIO_SHIFT: u8 = 14; |
177 | | const RATIO: u64 = (((C1 << (RATIO_SHIFT + 1)) / C3) + 1) >> 1; |
178 | | |
179 | | // C1 (svar + dvar + C2) |
180 | | // input * ---- * -------------------------- |
181 | | // C3 sqrt(C1^2 + svar * dvar) |
182 | 0 | let rsqrt = ssim_boost_rsqrt((C1 * C1) + svar * dvar); |
183 | 0 | ((input as u64 |
184 | 0 | * (((RATIO * (svar + dvar + C2)) * rsqrt.norm as u64) >> RATIO_SHIFT)) |
185 | 0 | >> rsqrt.shift) as u32 |
186 | 0 | } |
187 | | |
188 | | #[cfg(test)] |
189 | | mod ssim_boost_tests { |
190 | | use super::*; |
191 | | use interpolate_name::interpolate_test; |
192 | | use rand::Rng; |
193 | | |
194 | | /// Test to make sure extreme values of `ssim_boost` don't overflow. |
195 | | #[test] |
196 | | fn overflow_test() { |
197 | | // Test variance for 8x8 region with a bit depth of 12 |
198 | | let max_pix_diff = (1 << 12) - 1; |
199 | | let max_pix_sse = max_pix_diff * max_pix_diff; |
200 | | let max_variance = max_pix_diff * 8 * 8 / 4; |
201 | | apply_ssim_boost(max_pix_sse * 8 * 8, max_variance, max_variance, 12); |
202 | | } |
203 | | |
204 | | /// Floating point reference version of `ssim_boost` |
205 | | fn reference_ssim_boost(svar: u32, dvar: u32, bit_depth: usize) -> f64 { |
206 | | let coeff_shift = bit_depth - 8; |
207 | | let var_scale = 1f64 / (1 << (2 * coeff_shift)) as f64; |
208 | | let svar = svar as f64 * var_scale; |
209 | | let dvar = dvar as f64 * var_scale; |
210 | | // These constants are from ssim boost and need to be updated if the |
211 | | // constants in ssim boost change. |
212 | | const C1: f64 = 3355f64; |
213 | | const C2: f64 = 16128f64; |
214 | | const C3: f64 = 12338f64; |
215 | | const RATIO: f64 = C1 / C3; |
216 | | |
217 | | RATIO * (svar + dvar + C2) / f64::sqrt(C1.mul_add(C1, svar * dvar)) |
218 | | } |
219 | | |
220 | | /// Test that `ssim_boost` has sufficient accuracy. |
221 | | #[test] |
222 | | fn accuracy_test() { |
223 | | let mut rng = rand::thread_rng(); |
224 | | |
225 | | let mut max_relative_error = 0f64; |
226 | | let bd = 12; |
227 | | |
228 | | // Test different log scale ranges for the variance. |
229 | | // Each scale is tested multiple times with randomized variances. |
230 | | for scale in 0..(bd + 3 * 2 - 2) { |
231 | | for _ in 0..40 { |
232 | | let svar = rng.gen_range(0..(1 << scale)); |
233 | | let dvar = rng.gen_range(0..(1 << scale)); |
234 | | |
235 | | let float = reference_ssim_boost(svar, dvar, 12); |
236 | | let fixed = |
237 | | apply_ssim_boost(1 << 23, svar, dvar, 12) as f64 / (1 << 23) as f64; |
238 | | |
239 | | // Compare the two versions |
240 | | max_relative_error = |
241 | | max_relative_error.max(f64::abs(1f64 - fixed / float)); |
242 | | } |
243 | | } |
244 | | |
245 | | assert!( |
246 | | max_relative_error < 0.05, |
247 | | "SSIM boost error too high. Measured max relative error: {}.", |
248 | | max_relative_error |
249 | | ); |
250 | | } |
251 | | |
252 | | #[interpolate_test(8, 8)] |
253 | | #[interpolate_test(10, 10)] |
254 | | #[interpolate_test(12, 12)] |
255 | | fn reciprocal_cube_root_test(bd: usize) { |
256 | | let mut max_relative_error = 0f64; |
257 | | |
258 | | let scale = ((1 << bd) - 1) << (6 - 2 + bd - 8); |
259 | | for svar in scale..(scale << 2) { |
260 | | let float = ((scale << 1) as f64 / svar as f64).cbrt(); |
261 | | let fixed = |
262 | | apply_ssim_boost(1 << 23, svar, svar, bd) as f64 / (1 << 23) as f64; |
263 | | |
264 | | // Compare the two versions |
265 | | max_relative_error = |
266 | | max_relative_error.max(f64::abs(1f64 - fixed / float)); |
267 | | } |
268 | | |
269 | | assert!( |
270 | | max_relative_error < 0.0273, |
271 | | "SSIM boost error too high. Measured max relative error: {}.", |
272 | | max_relative_error |
273 | | ); |
274 | | } |
275 | | } |