/rust/registry/src/index.crates.io-1949cf8c6b5b557f/rav1e-0.7.1/src/dist.rs
Line | Count | Source |
1 | | // Copyright (c) 2019-2022, The rav1e contributors. All rights reserved |
2 | | // |
3 | | // This source code is subject to the terms of the BSD 2 Clause License and |
4 | | // the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License |
5 | | // was not distributed with this source code in the LICENSE file, you can |
6 | | // obtain it at www.aomedia.org/license/software. If the Alliance for Open |
7 | | // Media Patent License 1.0 was not distributed with this source code in the |
8 | | // PATENTS file, you can obtain it at www.aomedia.org/license/patent. |
9 | | |
10 | | cfg_if::cfg_if! { |
11 | | if #[cfg(nasm_x86_64)] { |
12 | | pub use crate::asm::x86::dist::*; |
13 | | } else if #[cfg(asm_neon)] { |
14 | | pub use crate::asm::aarch64::dist::*; |
15 | | } else { |
16 | | pub use self::rust::*; |
17 | | } |
18 | | } |
19 | | |
20 | | pub(crate) mod rust { |
21 | | use crate::activity::apply_ssim_boost; |
22 | | use crate::cpu_features::CpuFeatureLevel; |
23 | | use crate::tiling::*; |
24 | | use crate::util::*; |
25 | | |
26 | | use crate::encoder::IMPORTANCE_BLOCK_SIZE; |
27 | | use crate::rdo::DistortionScale; |
28 | | |
29 | | /// Compute the sum of absolute differences over a block. |
30 | | /// w and h can be at most 128, the size of the largest block. |
31 | 0 | pub fn get_sad<T: Pixel>( |
32 | 0 | plane_org: &PlaneRegion<'_, T>, plane_ref: &PlaneRegion<'_, T>, w: usize, |
33 | 0 | h: usize, _bit_depth: usize, _cpu: CpuFeatureLevel, |
34 | 0 | ) -> u32 { |
35 | 0 | debug_assert!(w <= 128 && h <= 128); |
36 | 0 | let plane_org = |
37 | 0 | plane_org.subregion(Area::Rect { x: 0, y: 0, width: w, height: h }); |
38 | 0 | let plane_ref = |
39 | 0 | plane_ref.subregion(Area::Rect { x: 0, y: 0, width: w, height: h }); |
40 | | |
41 | 0 | plane_org |
42 | 0 | .rows_iter() |
43 | 0 | .zip(plane_ref.rows_iter()) |
44 | 0 | .map(|(src, dst)| { |
45 | 0 | src |
46 | 0 | .iter() |
47 | 0 | .zip(dst) |
48 | 0 | .map(|(&p1, &p2)| i32::cast_from(p1).abs_diff(i32::cast_from(p2))) Unexecuted instantiation: rav1e::dist::rust::get_sad::<u16>::{closure#0}::{closure#0}Unexecuted instantiation: rav1e::dist::rust::get_sad::<u8>::{closure#0}::{closure#0} |
49 | 0 | .sum::<u32>() |
50 | 0 | }) Unexecuted instantiation: rav1e::dist::rust::get_sad::<u16>::{closure#0}Unexecuted instantiation: rav1e::dist::rust::get_sad::<u8>::{closure#0} |
51 | 0 | .sum() |
52 | 0 | } Unexecuted instantiation: rav1e::dist::rust::get_sad::<u16> Unexecuted instantiation: rav1e::dist::rust::get_sad::<u8> |
53 | | |
54 | | #[inline(always)] |
55 | 0 | const fn butterfly(a: i32, b: i32) -> (i32, i32) { |
56 | 0 | ((a + b), (a - b)) |
57 | 0 | } |
58 | | |
59 | | #[inline(always)] |
60 | | #[allow(clippy::identity_op, clippy::erasing_op)] |
61 | 0 | fn hadamard4_1d< |
62 | 0 | const LEN: usize, |
63 | 0 | const N: usize, |
64 | 0 | const STRIDE0: usize, |
65 | 0 | const STRIDE1: usize, |
66 | 0 | >( |
67 | 0 | data: &mut [i32; LEN], |
68 | 0 | ) { |
69 | 0 | for i in 0..N { |
70 | 0 | let sub: &mut [i32] = &mut data[i * STRIDE0..]; |
71 | 0 | let (a0, a1) = butterfly(sub[0 * STRIDE1], sub[1 * STRIDE1]); |
72 | 0 | let (a2, a3) = butterfly(sub[2 * STRIDE1], sub[3 * STRIDE1]); |
73 | 0 | let (b0, b2) = butterfly(a0, a2); |
74 | 0 | let (b1, b3) = butterfly(a1, a3); |
75 | 0 | sub[0 * STRIDE1] = b0; |
76 | 0 | sub[1 * STRIDE1] = b1; |
77 | 0 | sub[2 * STRIDE1] = b2; |
78 | 0 | sub[3 * STRIDE1] = b3; |
79 | 0 | } |
80 | 0 | } Unexecuted instantiation: rav1e::dist::rust::hadamard4_1d::<16, 4, 4, 1> Unexecuted instantiation: rav1e::dist::rust::hadamard4_1d::<16, 4, 1, 4> Unexecuted instantiation: rav1e::dist::rust::hadamard4_1d::<64, 8, 8, 1> Unexecuted instantiation: rav1e::dist::rust::hadamard4_1d::<64, 8, 1, 8> |
81 | | |
82 | | #[inline(always)] |
83 | | #[allow(clippy::identity_op, clippy::erasing_op)] |
84 | 0 | fn hadamard8_1d< |
85 | 0 | const LEN: usize, |
86 | 0 | const N: usize, |
87 | 0 | const STRIDE0: usize, |
88 | 0 | const STRIDE1: usize, |
89 | 0 | >( |
90 | 0 | data: &mut [i32; LEN], |
91 | 0 | ) { |
92 | 0 | for i in 0..N { |
93 | 0 | let sub: &mut [i32] = &mut data[i * STRIDE0..]; |
94 | 0 |
|
95 | 0 | let (a0, a1) = butterfly(sub[0 * STRIDE1], sub[1 * STRIDE1]); |
96 | 0 | let (a2, a3) = butterfly(sub[2 * STRIDE1], sub[3 * STRIDE1]); |
97 | 0 | let (a4, a5) = butterfly(sub[4 * STRIDE1], sub[5 * STRIDE1]); |
98 | 0 | let (a6, a7) = butterfly(sub[6 * STRIDE1], sub[7 * STRIDE1]); |
99 | 0 |
|
100 | 0 | let (b0, b2) = butterfly(a0, a2); |
101 | 0 | let (b1, b3) = butterfly(a1, a3); |
102 | 0 | let (b4, b6) = butterfly(a4, a6); |
103 | 0 | let (b5, b7) = butterfly(a5, a7); |
104 | 0 |
|
105 | 0 | let (c0, c4) = butterfly(b0, b4); |
106 | 0 | let (c1, c5) = butterfly(b1, b5); |
107 | 0 | let (c2, c6) = butterfly(b2, b6); |
108 | 0 | let (c3, c7) = butterfly(b3, b7); |
109 | 0 |
|
110 | 0 | sub[0 * STRIDE1] = c0; |
111 | 0 | sub[1 * STRIDE1] = c1; |
112 | 0 | sub[2 * STRIDE1] = c2; |
113 | 0 | sub[3 * STRIDE1] = c3; |
114 | 0 | sub[4 * STRIDE1] = c4; |
115 | 0 | sub[5 * STRIDE1] = c5; |
116 | 0 | sub[6 * STRIDE1] = c6; |
117 | 0 | sub[7 * STRIDE1] = c7; |
118 | 0 | } |
119 | 0 | } Unexecuted instantiation: rav1e::dist::rust::hadamard8_1d::<16, 4, 4, 1> Unexecuted instantiation: rav1e::dist::rust::hadamard8_1d::<16, 4, 1, 4> Unexecuted instantiation: rav1e::dist::rust::hadamard8_1d::<64, 8, 8, 1> Unexecuted instantiation: rav1e::dist::rust::hadamard8_1d::<64, 8, 1, 8> |
120 | | |
121 | | #[inline(always)] |
122 | 0 | fn hadamard2d<const LEN: usize, const W: usize, const H: usize>( |
123 | 0 | data: &mut [i32; LEN], |
124 | 0 | ) { |
125 | | /*Vertical transform.*/ |
126 | 0 | let vert_func = if H == 4 { |
127 | 0 | hadamard4_1d::<LEN, W, 1, H> |
128 | 0 | } else { |
129 | 0 | hadamard8_1d::<LEN, W, 1, H> |
130 | 0 | }; |
131 | 0 | vert_func(data); |
132 | | /*Horizontal transform.*/ |
133 | 0 | let horz_func = if W == 4 { |
134 | 0 | hadamard4_1d::<LEN, H, W, 1> |
135 | 0 | } else { |
136 | 0 | hadamard8_1d::<LEN, H, W, 1> |
137 | 0 | }; |
138 | 0 | horz_func(data); |
139 | 0 | } Unexecuted instantiation: rav1e::dist::rust::hadamard2d::<16, 4, 4> Unexecuted instantiation: rav1e::dist::rust::hadamard2d::<64, 8, 8> |
140 | | |
141 | | // SAFETY: The length of data must be 16. |
142 | 0 | unsafe fn hadamard4x4(data: &mut [i32]) { |
143 | 0 | hadamard2d::<{ 4 * 4 }, 4, 4>(&mut *(data.as_mut_ptr() as *mut [i32; 16])); |
144 | 0 | } |
145 | | |
146 | | // SAFETY: The length of data must be 64. |
147 | 0 | unsafe fn hadamard8x8(data: &mut [i32]) { |
148 | 0 | hadamard2d::<{ 8 * 8 }, 8, 8>(&mut *(data.as_mut_ptr() as *mut [i32; 64])); |
149 | 0 | } |
150 | | |
151 | | /// Sum of absolute transformed differences over a block. |
152 | | /// w and h can be at most 128, the size of the largest block. |
153 | | /// Use the sum of 4x4 and 8x8 hadamard transforms for the transform, but |
154 | | /// revert to sad on edges when these transforms do not fit into w and h. |
155 | | /// 4x4 transforms instead of 8x8 transforms when width or height < 8. |
156 | 0 | pub fn get_satd<T: Pixel>( |
157 | 0 | plane_org: &PlaneRegion<'_, T>, plane_ref: &PlaneRegion<'_, T>, w: usize, |
158 | 0 | h: usize, _bit_depth: usize, _cpu: CpuFeatureLevel, |
159 | 0 | ) -> u32 { |
160 | 0 | assert!(w <= 128 && h <= 128); |
161 | 0 | assert!(plane_org.rect().width >= w && plane_org.rect().height >= h); |
162 | 0 | assert!(plane_ref.rect().width >= w && plane_ref.rect().height >= h); |
163 | | |
164 | | // Size of hadamard transform should be 4x4 or 8x8 |
165 | | // 4x* and *x4 use 4x4 and all other use 8x8 |
166 | 0 | let size: usize = w.min(h).min(8); |
167 | 0 | let tx2d = if size == 4 { hadamard4x4 } else { hadamard8x8 }; |
168 | | |
169 | 0 | let mut sum: u64 = 0; |
170 | | |
171 | | // Loop over chunks the size of the chosen transform |
172 | 0 | for chunk_y in (0..h).step_by(size) { |
173 | 0 | let chunk_h = (h - chunk_y).min(size); |
174 | 0 | for chunk_x in (0..w).step_by(size) { |
175 | 0 | let chunk_w = (w - chunk_x).min(size); |
176 | 0 | let chunk_area: Area = Area::Rect { |
177 | 0 | x: chunk_x as isize, |
178 | 0 | y: chunk_y as isize, |
179 | 0 | width: chunk_w, |
180 | 0 | height: chunk_h, |
181 | 0 | }; |
182 | 0 | let chunk_org = plane_org.subregion(chunk_area); |
183 | 0 | let chunk_ref = plane_ref.subregion(chunk_area); |
184 | | |
185 | | // Revert to sad on edge blocks (frame edges) |
186 | 0 | if chunk_w != size || chunk_h != size { |
187 | 0 | sum += get_sad( |
188 | 0 | &chunk_org, &chunk_ref, chunk_w, chunk_h, _bit_depth, _cpu, |
189 | 0 | ) as u64; |
190 | 0 | continue; |
191 | 0 | } |
192 | | |
193 | 0 | let buf: &mut [i32] = &mut [0; 8 * 8][..size * size]; |
194 | | |
195 | | // Move the difference of the transforms to a buffer |
196 | 0 | for (row_diff, (row_org, row_ref)) in buf |
197 | 0 | .chunks_mut(size) |
198 | 0 | .zip(chunk_org.rows_iter().zip(chunk_ref.rows_iter())) |
199 | | { |
200 | 0 | for (diff, (a, b)) in |
201 | 0 | row_diff.iter_mut().zip(row_org.iter().zip(row_ref.iter())) |
202 | 0 | { |
203 | 0 | *diff = i32::cast_from(*a) - i32::cast_from(*b); |
204 | 0 | } |
205 | | } |
206 | | |
207 | | // Perform the hadamard transform on the differences |
208 | | // SAFETY: A sufficient number elements exist for the size of the transform. |
209 | 0 | unsafe { |
210 | 0 | tx2d(buf); |
211 | 0 | } |
212 | | |
213 | | // Sum the absolute values of the transformed differences |
214 | 0 | sum += buf.iter().map(|a| a.unsigned_abs() as u64).sum::<u64>(); Unexecuted instantiation: rav1e::dist::rust::get_satd::<u16>::{closure#0}Unexecuted instantiation: rav1e::dist::rust::get_satd::<u8>::{closure#0} |
215 | | } |
216 | | } |
217 | | |
218 | | // Normalize the results |
219 | 0 | let ln = msb(size as i32) as u64; |
220 | 0 | ((sum + (1 << ln >> 1)) >> ln) as u32 |
221 | 0 | } Unexecuted instantiation: rav1e::dist::rust::get_satd::<u16> Unexecuted instantiation: rav1e::dist::rust::get_satd::<u8> |
222 | | |
223 | | /// Number of bits rounded off before summing in `get_weighted_sse` |
224 | | pub const GET_WEIGHTED_SSE_SHIFT: u8 = 8; |
225 | | |
226 | | /// Computes weighted sum of squared error. |
227 | | /// |
228 | | /// Each scale is applied to a 4x4 region in the provided inputs. Each scale |
229 | | /// value is a fixed point number, currently [`DistortionScale`]. |
230 | | /// |
231 | | /// Implementations can require alignment (`bw` (block width) for [`src1`] and |
232 | | /// [`src2`] and `bw/4` for `scale`). |
233 | | #[inline(never)] |
234 | 0 | pub fn get_weighted_sse<T: Pixel>( |
235 | 0 | src1: &PlaneRegion<'_, T>, src2: &PlaneRegion<'_, T>, scale: &[u32], |
236 | 0 | scale_stride: usize, w: usize, h: usize, _bit_depth: usize, |
237 | 0 | _cpu: CpuFeatureLevel, |
238 | 0 | ) -> u64 { |
239 | 0 | let src1 = src1.subregion(Area::Rect { x: 0, y: 0, width: w, height: h }); |
240 | | // Always chunk and apply scaling on the sse of squares the size of |
241 | | // decimated/sub-sampled importance block sizes. |
242 | | // Warning: Changing this will require changing/disabling assembly. |
243 | 0 | let chunk_size: usize = IMPORTANCE_BLOCK_SIZE >> 1; |
244 | | |
245 | | // Iterator of a row of scales, stretched out to be per row |
246 | 0 | let scales = scale.chunks_exact(scale_stride); |
247 | | |
248 | 0 | let sse = src1 |
249 | 0 | .vert_windows(chunk_size) |
250 | 0 | .step_by(chunk_size) |
251 | 0 | .zip(src2.vert_windows(chunk_size).step_by(chunk_size)) |
252 | 0 | .zip(scales) |
253 | 0 | .map(|((row1, row2), scales)| { |
254 | 0 | row1 |
255 | 0 | .horz_windows(chunk_size) |
256 | 0 | .step_by(chunk_size) |
257 | 0 | .zip(row2.horz_windows(chunk_size).step_by(chunk_size)) |
258 | 0 | .zip(scales) |
259 | 0 | .map(|((chunk1, chunk2), &scale)| { |
260 | 0 | let sum = chunk1 |
261 | 0 | .rows_iter() |
262 | 0 | .zip(chunk2.rows_iter()) |
263 | 0 | .map(|(chunk_row1, chunk_row2)| { |
264 | 0 | chunk_row1 |
265 | 0 | .iter() |
266 | 0 | .zip(chunk_row2) |
267 | 0 | .map(|(&a, &b)| { |
268 | 0 | let c = i32::cast_from(a) - i32::cast_from(b); |
269 | 0 | (c * c) as u32 |
270 | 0 | }) Unexecuted instantiation: rav1e::dist::rust::get_weighted_sse::<u16>::{closure#0}::{closure#0}::{closure#0}::{closure#0}Unexecuted instantiation: rav1e::dist::rust::get_weighted_sse::<u8>::{closure#0}::{closure#0}::{closure#0}::{closure#0} |
271 | 0 | .sum::<u32>() |
272 | 0 | }) Unexecuted instantiation: rav1e::dist::rust::get_weighted_sse::<u16>::{closure#0}::{closure#0}::{closure#0}Unexecuted instantiation: rav1e::dist::rust::get_weighted_sse::<u8>::{closure#0}::{closure#0}::{closure#0} |
273 | 0 | .sum::<u32>(); |
274 | 0 | (sum as u64 * scale as u64 + (1 << GET_WEIGHTED_SSE_SHIFT >> 1)) |
275 | 0 | >> GET_WEIGHTED_SSE_SHIFT |
276 | 0 | }) Unexecuted instantiation: rav1e::dist::rust::get_weighted_sse::<u16>::{closure#0}::{closure#0}Unexecuted instantiation: rav1e::dist::rust::get_weighted_sse::<u8>::{closure#0}::{closure#0} |
277 | 0 | .sum::<u64>() |
278 | 0 | }) Unexecuted instantiation: rav1e::dist::rust::get_weighted_sse::<u16>::{closure#0}Unexecuted instantiation: rav1e::dist::rust::get_weighted_sse::<u8>::{closure#0} |
279 | 0 | .sum::<u64>(); |
280 | | |
281 | 0 | let den = DistortionScale::new(1, 1 << GET_WEIGHTED_SSE_SHIFT).0 as u64; |
282 | 0 | (sse + (den >> 1)) / den |
283 | 0 | } Unexecuted instantiation: rav1e::dist::rust::get_weighted_sse::<u16> Unexecuted instantiation: rav1e::dist::rust::get_weighted_sse::<u8> |
284 | | |
285 | | /// Number of bits of precision used in `AREA_DIVISORS` |
286 | | const AREA_DIVISOR_BITS: u8 = 14; |
287 | | |
288 | | /// Lookup table for 2^`AREA_DIVISOR_BITS` / (1 + x) |
289 | | #[rustfmt::skip] |
290 | | const AREA_DIVISORS: [u16; 64] = [ |
291 | | 16384, 8192, 5461, 4096, 3277, 2731, 2341, 2048, 1820, 1638, 1489, 1365, |
292 | | 1260, 1170, 1092, 1024, 964, 910, 862, 819, 780, 745, 712, 683, |
293 | | 655, 630, 607, 585, 565, 546, 529, 512, 496, 482, 468, 455, |
294 | | 443, 431, 420, 410, 400, 390, 381, 372, 364, 356, 349, 341, |
295 | | 334, 328, 321, 315, 309, 303, 298, 293, 287, 282, 278, 273, |
296 | | 269, 264, 260, 256, |
297 | | ]; |
298 | | |
299 | | /// Computes a distortion metric of the sum of squares weighted by activity. |
300 | | /// w and h should be <= 8. |
301 | | #[inline(never)] |
302 | 0 | pub fn cdef_dist_kernel<T: Pixel>( |
303 | 0 | src: &PlaneRegion<'_, T>, dst: &PlaneRegion<'_, T>, w: usize, h: usize, |
304 | 0 | bit_depth: usize, _cpu: CpuFeatureLevel, |
305 | 0 | ) -> u32 { |
306 | | // TODO: Investigate using different constants in ssim boost for block sizes |
307 | | // smaller than 8x8. |
308 | | |
309 | 0 | debug_assert!(src.plane_cfg.xdec == 0); |
310 | 0 | debug_assert!(src.plane_cfg.ydec == 0); |
311 | 0 | debug_assert!(dst.plane_cfg.xdec == 0); |
312 | 0 | debug_assert!(dst.plane_cfg.ydec == 0); |
313 | | |
314 | | // Limit kernel to 8x8 |
315 | 0 | debug_assert!(w <= 8); |
316 | 0 | debug_assert!(h <= 8); |
317 | | |
318 | | // Compute the following summations. |
319 | 0 | let mut sum_s: u32 = 0; // sum(src_{i,j}) |
320 | 0 | let mut sum_d: u32 = 0; // sum(dst_{i,j}) |
321 | 0 | let mut sum_s2: u32 = 0; // sum(src_{i,j}^2) |
322 | 0 | let mut sum_d2: u32 = 0; // sum(dst_{i,j}^2) |
323 | 0 | let mut sum_sd: u32 = 0; // sum(src_{i,j} * dst_{i,j}) |
324 | 0 | for (row1, row2) in src.rows_iter().take(h).zip(dst.rows_iter()) { |
325 | 0 | for (s, d) in row1[..w].iter().zip(row2) { |
326 | 0 | let s: u32 = u32::cast_from(*s); |
327 | 0 | let d: u32 = u32::cast_from(*d); |
328 | 0 | sum_s += s; |
329 | 0 | sum_d += d; |
330 | 0 |
|
331 | 0 | sum_s2 += s * s; |
332 | 0 | sum_d2 += d * d; |
333 | 0 | sum_sd += s * d; |
334 | 0 | } |
335 | | } |
336 | | |
337 | | // To get the distortion, compute sum of squared error and apply a weight |
338 | | // based on the variance of the two planes. |
339 | 0 | let sse = sum_d2 + sum_s2 - 2 * sum_sd; |
340 | | |
341 | | // Convert to 64-bits to avoid overflow when squaring |
342 | 0 | let sum_s = sum_s as u64; |
343 | 0 | let sum_d = sum_d as u64; |
344 | | |
345 | | // Calculate the variance (more accurately variance*area) of each plane. |
346 | | // var[iance] = avg(X^2) - avg(X)^2 = sum(X^2) / n - sum(X)^2 / n^2 |
347 | | // (n = # samples i.e. area) |
348 | | // var * n = sum(X^2) - sum(X)^2 / n |
349 | | // When w and h are powers of two, this can be done via shifting. |
350 | 0 | let div = AREA_DIVISORS[w * h - 1] as u64; |
351 | 0 | let div_shift = AREA_DIVISOR_BITS; |
352 | | // Due to rounding, negative values can occur when w or h aren't powers of |
353 | | // two. Saturate to avoid underflow. |
354 | 0 | let mut svar = sum_s2.saturating_sub( |
355 | 0 | ((sum_s * sum_s * div + (1 << div_shift >> 1)) >> div_shift) as u32, |
356 | | ); |
357 | 0 | let mut dvar = sum_d2.saturating_sub( |
358 | 0 | ((sum_d * sum_d * div + (1 << div_shift >> 1)) >> div_shift) as u32, |
359 | | ); |
360 | | |
361 | | // Scale variances up to 8x8 size. |
362 | | // scaled variance = var * (8x8) / wxh |
363 | | // For 8x8, this is a nop. For powers of 2, this is doable with shifting. |
364 | | // TODO: It should be possible and faster to do this adjustment in ssim boost |
365 | 0 | let scale_shift = AREA_DIVISOR_BITS - 6; |
366 | 0 | svar = |
367 | 0 | ((svar as u64 * div + (1 << scale_shift >> 1)) >> scale_shift) as u32; |
368 | 0 | dvar = |
369 | 0 | ((dvar as u64 * div + (1 << scale_shift >> 1)) >> scale_shift) as u32; |
370 | | |
371 | 0 | apply_ssim_boost(sse, svar, dvar, bit_depth) |
372 | 0 | } Unexecuted instantiation: rav1e::dist::rust::cdef_dist_kernel::<u16> Unexecuted instantiation: rav1e::dist::rust::cdef_dist_kernel::<u8> |
373 | | } |
374 | | |
375 | | #[cfg(test)] |
376 | | pub mod test { |
377 | | use super::*; |
378 | | use crate::cpu_features::CpuFeatureLevel; |
379 | | use crate::frame::*; |
380 | | use crate::tiling::Area; |
381 | | use crate::util::Pixel; |
382 | | |
383 | | // Generate plane data for get_sad_same() |
384 | | fn setup_planes<T: Pixel>() -> (Plane<T>, Plane<T>) { |
385 | | // Two planes with different strides |
386 | | let mut input_plane = Plane::new(640, 480, 0, 0, 128 + 8, 128 + 8); |
387 | | let mut rec_plane = Plane::new(640, 480, 0, 0, 2 * 128 + 8, 2 * 128 + 8); |
388 | | |
389 | | // Make the test pattern robust to data alignment |
390 | | let xpad_off = |
391 | | (input_plane.cfg.xorigin - input_plane.cfg.xpad) as i32 - 8i32; |
392 | | |
393 | | for (i, row) in |
394 | | input_plane.data.chunks_mut(input_plane.cfg.stride).enumerate() |
395 | | { |
396 | | for (j, pixel) in row.iter_mut().enumerate() { |
397 | | let val = ((j + i) as i32 - xpad_off) & 255i32; |
398 | | assert!(val >= u8::MIN.into() && val <= u8::MAX.into()); |
399 | | *pixel = T::cast_from(val); |
400 | | } |
401 | | } |
402 | | |
403 | | for (i, row) in rec_plane.data.chunks_mut(rec_plane.cfg.stride).enumerate() |
404 | | { |
405 | | for (j, pixel) in row.iter_mut().enumerate() { |
406 | | let val = (j as i32 - i as i32 - xpad_off) & 255i32; |
407 | | assert!(val >= u8::MIN.into() && val <= u8::MAX.into()); |
408 | | *pixel = T::cast_from(val); |
409 | | } |
410 | | } |
411 | | |
412 | | (input_plane, rec_plane) |
413 | | } |
414 | | |
415 | | // Regression and validation test for SAD computation |
416 | | fn get_sad_same_inner<T: Pixel>() { |
417 | | // dynamic allocation: test |
418 | | let blocks: Vec<(usize, usize, u32)> = vec![ |
419 | | (4, 4, 1912), |
420 | | (4, 8, 4296), |
421 | | (8, 4, 3496), |
422 | | (8, 8, 7824), |
423 | | (8, 16, 16592), |
424 | | (16, 8, 14416), |
425 | | (16, 16, 31136), |
426 | | (16, 32, 60064), |
427 | | (32, 16, 59552), |
428 | | (32, 32, 120128), |
429 | | (32, 64, 186688), |
430 | | (64, 32, 250176), |
431 | | (64, 64, 438912), |
432 | | (64, 128, 654272), |
433 | | (128, 64, 1016768), |
434 | | (128, 128, 1689792), |
435 | | (4, 16, 8680), |
436 | | (16, 4, 6664), |
437 | | (8, 32, 31056), |
438 | | (32, 8, 27600), |
439 | | (16, 64, 93344), |
440 | | (64, 16, 116384), |
441 | | ]; |
442 | | |
443 | | let bit_depth: usize = 8; |
444 | | let (input_plane, rec_plane) = setup_planes::<T>(); |
445 | | |
446 | | for (w, h, distortion) in blocks { |
447 | | let area = Area::StartingAt { x: 32, y: 40 }; |
448 | | |
449 | | let input_region = input_plane.region(area); |
450 | | let rec_region = rec_plane.region(area); |
451 | | |
452 | | assert_eq!( |
453 | | distortion, |
454 | | get_sad( |
455 | | &input_region, |
456 | | &rec_region, |
457 | | w, |
458 | | h, |
459 | | bit_depth, |
460 | | CpuFeatureLevel::default() |
461 | | ) |
462 | | ); |
463 | | } |
464 | | } |
465 | | |
466 | | #[test] |
467 | | fn get_sad_same_u8() { |
468 | | get_sad_same_inner::<u8>(); |
469 | | } |
470 | | |
471 | | #[test] |
472 | | fn get_sad_same_u16() { |
473 | | get_sad_same_inner::<u16>(); |
474 | | } |
475 | | |
476 | | fn get_satd_same_inner<T: Pixel>() { |
477 | | let blocks: Vec<(usize, usize, u32)> = vec![ |
478 | | (4, 4, 1408), |
479 | | (4, 8, 2016), |
480 | | (8, 4, 1816), |
481 | | (8, 8, 3984), |
482 | | (8, 16, 5136), |
483 | | (16, 8, 4864), |
484 | | (16, 16, 9984), |
485 | | (16, 32, 13824), |
486 | | (32, 16, 13760), |
487 | | (32, 32, 27952), |
488 | | (32, 64, 37168), |
489 | | (64, 32, 45104), |
490 | | (64, 64, 84176), |
491 | | (64, 128, 127920), |
492 | | (128, 64, 173680), |
493 | | (128, 128, 321456), |
494 | | (4, 16, 3136), |
495 | | (16, 4, 2632), |
496 | | (8, 32, 7056), |
497 | | (32, 8, 6624), |
498 | | (16, 64, 18432), |
499 | | (64, 16, 21312), |
500 | | ]; |
501 | | |
502 | | let bit_depth: usize = 8; |
503 | | let (input_plane, rec_plane) = setup_planes::<T>(); |
504 | | |
505 | | for (w, h, distortion) in blocks { |
506 | | let area = Area::StartingAt { x: 32, y: 40 }; |
507 | | |
508 | | let input_region = input_plane.region(area); |
509 | | let rec_region = rec_plane.region(area); |
510 | | |
511 | | assert_eq!( |
512 | | distortion, |
513 | | get_satd( |
514 | | &input_region, |
515 | | &rec_region, |
516 | | w, |
517 | | h, |
518 | | bit_depth, |
519 | | CpuFeatureLevel::default() |
520 | | ) |
521 | | ); |
522 | | } |
523 | | } |
524 | | |
525 | | #[test] |
526 | | fn get_satd_same_u8() { |
527 | | get_satd_same_inner::<u8>(); |
528 | | } |
529 | | |
530 | | #[test] |
531 | | fn get_satd_same_u16() { |
532 | | get_satd_same_inner::<u16>(); |
533 | | } |
534 | | } |