/rust/registry/src/index.crates.io-1949cf8c6b5b557f/rav1e-0.8.1/src/deblock.rs
Line | Count | Source |
1 | | // Copyright (c) 2018-2022, The rav1e contributors. All rights reserved |
2 | | // |
3 | | // This source code is subject to the terms of the BSD 2 Clause License and |
4 | | // the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License |
5 | | // was not distributed with this source code in the LICENSE file, you can |
6 | | // obtain it at www.aomedia.org/license/software. If the Alliance for Open |
7 | | // Media Patent License 1.0 was not distributed with this source code in the |
8 | | // PATENTS file, you can obtain it at www.aomedia.org/license/patent. |
9 | | |
10 | | use crate::api::FrameType; |
11 | | use crate::color::ChromaSampling::Cs400; |
12 | | use crate::context::*; |
13 | | use crate::encoder::FrameInvariants; |
14 | | use crate::partition::RefType::*; |
15 | | use crate::predict::PredictionMode::*; |
16 | | use crate::quantize::*; |
17 | | use crate::tiling::*; |
18 | | use crate::util::{clamp, ILog, Pixel}; |
19 | | use crate::DeblockState; |
20 | | use rayon::iter::*; |
21 | | use std::cmp; |
22 | | |
23 | 0 | fn deblock_adjusted_level( |
24 | 0 | deblock: &DeblockState, block: &Block, pli: usize, vertical: bool, |
25 | 0 | ) -> usize { |
26 | 0 | let idx = if pli == 0 { usize::from(!vertical) } else { pli + 1 }; |
27 | | |
28 | 0 | let level = if deblock.block_deltas_enabled { |
29 | | // By-block filter strength delta, if the feature is active. |
30 | 0 | let block_delta = if deblock.block_delta_multi { |
31 | 0 | block.deblock_deltas[idx] << deblock.block_delta_shift |
32 | | } else { |
33 | 0 | block.deblock_deltas[0] << deblock.block_delta_shift |
34 | | }; |
35 | | |
36 | | // Add to frame-specified filter strength (Y-vertical, Y-horizontal, U, V) |
37 | 0 | clamp(block_delta + deblock.levels[idx] as i8, 0, MAX_LOOP_FILTER as i8) |
38 | 0 | as u8 |
39 | | } else { |
40 | 0 | deblock.levels[idx] |
41 | | }; |
42 | | |
43 | | // if fi.seg_feaure_active { |
44 | | // rav1e does not yet support segments or segment features |
45 | | // } |
46 | | |
47 | | // Are delta modifiers for specific references and modes active? If so, add them too. |
48 | 0 | if deblock.deltas_enabled { |
49 | 0 | let mode = block.mode; |
50 | 0 | let reference = block.ref_frames[0]; |
51 | 0 | let mode_type = usize::from( |
52 | 0 | mode >= NEARESTMV && mode != GLOBALMV && mode != GLOBAL_GLOBALMV, |
53 | | ); |
54 | 0 | let l5 = level >> 5; |
55 | 0 | clamp( |
56 | 0 | level as i32 |
57 | 0 | + ((deblock.ref_deltas[reference.to_index()] as i32) << l5) |
58 | 0 | + if reference == INTRA_FRAME { |
59 | 0 | 0 |
60 | | } else { |
61 | 0 | (deblock.mode_deltas[mode_type] as i32) << l5 |
62 | | }, |
63 | | 0, |
64 | 0 | MAX_LOOP_FILTER as i32, |
65 | | ) as usize |
66 | | } else { |
67 | 0 | level as usize |
68 | | } |
69 | 0 | } |
70 | | |
71 | | #[inline] |
72 | 0 | fn deblock_left<'a, T: Pixel>( |
73 | 0 | blocks: &'a TileBlocks, in_bo: TileBlockOffset, p: &PlaneRegion<T>, |
74 | 0 | ) -> &'a Block { |
75 | 0 | let xdec = p.plane_cfg.xdec; |
76 | 0 | let ydec = p.plane_cfg.ydec; |
77 | | |
78 | | // subsampled chroma uses odd mi row/col |
79 | | // We already know we're not at the upper/left corner, so prev_block is in frame |
80 | 0 | &blocks[in_bo.0.y | ydec][(in_bo.0.x | xdec) - (1 << xdec)] |
81 | 0 | } Unexecuted instantiation: rav1e::deblock::deblock_left::<u16> Unexecuted instantiation: rav1e::deblock::deblock_left::<u8> |
82 | | |
83 | | #[inline] |
84 | 0 | fn deblock_up<'a, T: Pixel>( |
85 | 0 | blocks: &'a TileBlocks, in_bo: TileBlockOffset, p: &PlaneRegion<T>, |
86 | 0 | ) -> &'a Block { |
87 | 0 | let xdec = p.plane_cfg.xdec; |
88 | 0 | let ydec = p.plane_cfg.ydec; |
89 | | |
90 | | // subsampled chroma uses odd mi row/col |
91 | 0 | &blocks[(in_bo.0.y | ydec) - (1 << ydec)][in_bo.0.x | xdec] |
92 | 0 | } Unexecuted instantiation: rav1e::deblock::deblock_up::<u16> Unexecuted instantiation: rav1e::deblock::deblock_up::<u8> |
93 | | |
94 | | // Must be called on a tx edge, and not on a frame edge. This is enforced above the call. |
95 | 0 | fn deblock_size<T: Pixel>( |
96 | 0 | block: &Block, prev_block: &Block, p: &PlaneRegion<T>, pli: usize, |
97 | 0 | vertical: bool, block_edge: bool, |
98 | 0 | ) -> usize { |
99 | 0 | let xdec = p.plane_cfg.xdec; |
100 | 0 | let ydec = p.plane_cfg.ydec; |
101 | | |
102 | | // filter application is conditional on skip and block edge |
103 | 0 | if !(block_edge |
104 | 0 | || !block.skip |
105 | 0 | || !prev_block.skip |
106 | 0 | || block.ref_frames[0] == INTRA_FRAME |
107 | 0 | || prev_block.ref_frames[0] == INTRA_FRAME) |
108 | | { |
109 | 0 | 0 |
110 | | } else { |
111 | 0 | let (txsize, prev_txsize) = if pli == 0 { |
112 | 0 | (block.txsize, prev_block.txsize) |
113 | | } else { |
114 | 0 | ( |
115 | 0 | block.bsize.largest_chroma_tx_size(xdec, ydec), |
116 | 0 | prev_block.bsize.largest_chroma_tx_size(xdec, ydec), |
117 | 0 | ) |
118 | | }; |
119 | 0 | let (tx_n, prev_tx_n) = if vertical { |
120 | 0 | (cmp::max(txsize.width_mi(), 1), cmp::max(prev_txsize.width_mi(), 1)) |
121 | | } else { |
122 | 0 | (cmp::max(txsize.height_mi(), 1), cmp::max(prev_txsize.height_mi(), 1)) |
123 | | }; |
124 | 0 | cmp::min( |
125 | 0 | if pli == 0 { 14 } else { 6 }, |
126 | 0 | cmp::min(tx_n, prev_tx_n) << MI_SIZE_LOG2, |
127 | | ) |
128 | | } |
129 | 0 | } Unexecuted instantiation: rav1e::deblock::deblock_size::<u16> Unexecuted instantiation: rav1e::deblock::deblock_size::<u8> |
130 | | |
131 | | // Must be called on a tx edge |
132 | | #[inline] |
133 | 0 | fn deblock_level( |
134 | 0 | deblock: &DeblockState, block: &Block, prev_block: &Block, pli: usize, |
135 | 0 | vertical: bool, |
136 | 0 | ) -> usize { |
137 | 0 | let level = deblock_adjusted_level(deblock, block, pli, vertical); |
138 | 0 | if level == 0 { |
139 | 0 | deblock_adjusted_level(deblock, prev_block, pli, vertical) |
140 | | } else { |
141 | 0 | level |
142 | | } |
143 | 0 | } Unexecuted instantiation: rav1e::deblock::deblock_level Unexecuted instantiation: rav1e::deblock::deblock_level |
144 | | |
145 | | // four taps, 4 outputs (two are trivial) |
146 | | #[inline] |
147 | 0 | fn filter_narrow2_4( |
148 | 0 | p1: i32, p0: i32, q0: i32, q1: i32, shift: usize, |
149 | 0 | ) -> [i32; 4] { |
150 | 0 | let filter0 = clamp(p1 - q1, -128 << shift, (128 << shift) - 1); |
151 | 0 | let filter1 = |
152 | 0 | clamp(filter0 + 3 * (q0 - p0) + 4, -128 << shift, (128 << shift) - 1) >> 3; |
153 | | // be certain our optimization removing a clamp is sound |
154 | 0 | debug_assert!({ |
155 | 0 | let base = |
156 | 0 | clamp(filter0 + 3 * (q0 - p0), -128 << shift, (128 << shift) - 1); |
157 | 0 | let test = clamp(base + 4, -128 << shift, (128 << shift) - 1) >> 3; |
158 | 0 | filter1 == test |
159 | 0 | }); |
160 | 0 | let filter2 = |
161 | 0 | clamp(filter0 + 3 * (q0 - p0) + 3, -128 << shift, (128 << shift) - 1) >> 3; |
162 | | // be certain our optimization removing a clamp is sound |
163 | 0 | debug_assert!({ |
164 | 0 | let base = |
165 | 0 | clamp(filter0 + 3 * (q0 - p0), -128 << shift, (128 << shift) - 1); |
166 | 0 | let test = clamp(base + 3, -128 << shift, (128 << shift) - 1) >> 3; |
167 | 0 | filter2 == test |
168 | 0 | }); |
169 | 0 | [ |
170 | 0 | p1, |
171 | 0 | clamp(p0 + filter2, 0, (256 << shift) - 1), |
172 | 0 | clamp(q0 - filter1, 0, (256 << shift) - 1), |
173 | 0 | q1, |
174 | 0 | ] |
175 | 0 | } Unexecuted instantiation: rav1e::deblock::filter_narrow2_4 Unexecuted instantiation: rav1e::deblock::filter_narrow2_4 |
176 | | |
177 | | // six taps, 6 outputs (four are trivial) |
178 | | #[inline] |
179 | 0 | fn filter_narrow2_6( |
180 | 0 | p2: i32, p1: i32, p0: i32, q0: i32, q1: i32, q2: i32, shift: usize, |
181 | 0 | ) -> [i32; 6] { |
182 | 0 | let x = filter_narrow2_4(p1, p0, q0, q1, shift); |
183 | 0 | [p2, x[0], x[1], x[2], x[3], q2] |
184 | 0 | } Unexecuted instantiation: rav1e::deblock::filter_narrow2_6 Unexecuted instantiation: rav1e::deblock::filter_narrow2_6 |
185 | | |
186 | | // 12 taps, 12 outputs (ten are trivial) |
187 | | #[inline] |
188 | 0 | fn filter_narrow2_12( |
189 | 0 | p5: i32, p4: i32, p3: i32, p2: i32, p1: i32, p0: i32, q0: i32, q1: i32, |
190 | 0 | q2: i32, q3: i32, q4: i32, q5: i32, shift: usize, |
191 | 0 | ) -> [i32; 12] { |
192 | 0 | let x = filter_narrow2_4(p1, p0, q0, q1, shift); |
193 | 0 | [p5, p4, p3, p2, x[0], x[1], x[2], x[3], q2, q3, q4, q5] |
194 | 0 | } Unexecuted instantiation: rav1e::deblock::filter_narrow2_12 Unexecuted instantiation: rav1e::deblock::filter_narrow2_12 |
195 | | |
196 | | // four taps, 4 outputs |
197 | | #[inline] |
198 | 0 | fn filter_narrow4_4( |
199 | 0 | p1: i32, p0: i32, q0: i32, q1: i32, shift: usize, |
200 | 0 | ) -> [i32; 4] { |
201 | 0 | let filter1 = |
202 | 0 | clamp(3 * (q0 - p0) + 4, -128 << shift, (128 << shift) - 1) >> 3; |
203 | | // be certain our optimization removing a clamp is sound |
204 | 0 | debug_assert!({ |
205 | 0 | let base = clamp(3 * (q0 - p0), -128 << shift, (128 << shift) - 1); |
206 | 0 | let test = clamp(base + 4, -128 << shift, (128 << shift) - 1) >> 3; |
207 | 0 | filter1 == test |
208 | 0 | }); |
209 | 0 | let filter2 = |
210 | 0 | clamp(3 * (q0 - p0) + 3, -128 << shift, (128 << shift) - 1) >> 3; |
211 | | // be certain our optimization removing a clamp is sound |
212 | 0 | debug_assert!({ |
213 | 0 | let base = clamp(3 * (q0 - p0), -128 << shift, (128 << shift) - 1); |
214 | 0 | let test = clamp(base + 3, -128 << shift, (128 << shift) - 1) >> 3; |
215 | 0 | filter2 == test |
216 | 0 | }); |
217 | 0 | let filter3 = (filter1 + 1) >> 1; |
218 | 0 | [ |
219 | 0 | clamp(p1 + filter3, 0, (256 << shift) - 1), |
220 | 0 | clamp(p0 + filter2, 0, (256 << shift) - 1), |
221 | 0 | clamp(q0 - filter1, 0, (256 << shift) - 1), |
222 | 0 | clamp(q1 - filter3, 0, (256 << shift) - 1), |
223 | 0 | ] |
224 | 0 | } Unexecuted instantiation: rav1e::deblock::filter_narrow4_4 Unexecuted instantiation: rav1e::deblock::filter_narrow4_4 |
225 | | |
226 | | // six taps, 6 outputs (two are trivial) |
227 | | #[inline] |
228 | 0 | fn filter_narrow4_6( |
229 | 0 | p2: i32, p1: i32, p0: i32, q0: i32, q1: i32, q2: i32, shift: usize, |
230 | 0 | ) -> [i32; 6] { |
231 | 0 | let x = filter_narrow4_4(p1, p0, q0, q1, shift); |
232 | 0 | [p2, x[0], x[1], x[2], x[3], q2] |
233 | 0 | } Unexecuted instantiation: rav1e::deblock::filter_narrow4_6 Unexecuted instantiation: rav1e::deblock::filter_narrow4_6 |
234 | | |
235 | | // 12 taps, 12 outputs (eight are trivial) |
236 | | #[inline] |
237 | 0 | fn filter_narrow4_12( |
238 | 0 | p5: i32, p4: i32, p3: i32, p2: i32, p1: i32, p0: i32, q0: i32, q1: i32, |
239 | 0 | q2: i32, q3: i32, q4: i32, q5: i32, shift: usize, |
240 | 0 | ) -> [i32; 12] { |
241 | 0 | let x = filter_narrow4_4(p1, p0, q0, q1, shift); |
242 | 0 | [p5, p4, p3, p2, x[0], x[1], x[2], x[3], q2, q3, q4, q5] |
243 | 0 | } Unexecuted instantiation: rav1e::deblock::filter_narrow4_12 Unexecuted instantiation: rav1e::deblock::filter_narrow4_12 |
244 | | |
245 | | // six taps, 4 outputs |
246 | | #[rustfmt::skip] |
247 | | #[inline] |
248 | 0 | const fn filter_wide6_4( |
249 | 0 | p2: i32, p1: i32, p0: i32, q0: i32, q1: i32, q2: i32 |
250 | 0 | ) -> [i32; 4] { |
251 | 0 | [ |
252 | 0 | (p2*3 + p1*2 + p0*2 + q0 + (1<<2)) >> 3, |
253 | 0 | (p2 + p1*2 + p0*2 + q0*2 + q1 + (1<<2)) >> 3, |
254 | 0 | (p1 + p0*2 + q0*2 + q1*2 + q2 + (1<<2)) >> 3, |
255 | 0 | (p0 + q0*2 + q1*2 + q2*3 + (1<<2)) >> 3 |
256 | 0 | ] |
257 | 0 | } Unexecuted instantiation: rav1e::deblock::filter_wide6_4 Unexecuted instantiation: rav1e::deblock::filter_wide6_4 |
258 | | |
259 | | // eight taps, 6 outputs |
260 | | #[rustfmt::skip] |
261 | | #[inline] |
262 | 0 | const fn filter_wide8_6( |
263 | 0 | p3: i32, p2: i32, p1: i32, p0: i32, q0: i32, q1: i32, q2: i32, q3: i32 |
264 | 0 | ) -> [i32; 6] { |
265 | 0 | [ |
266 | 0 | (p3*3 + p2*2 + p1 + p0 + q0 + (1<<2)) >> 3, |
267 | 0 | (p3*2 + p2 + p1*2 + p0 + q0 + q1 + (1<<2)) >> 3, |
268 | 0 | (p3 + p2 + p1 + p0*2 + q0 + q1 + q2 +(1<<2)) >> 3, |
269 | 0 | (p2 + p1 + p0 + q0*2 + q1 + q2 + q3 + (1<<2)) >> 3, |
270 | 0 | (p1 + p0 + q0 + q1*2 + q2 + q3*2 + (1<<2)) >> 3, |
271 | 0 | (p0 + q0 + q1 + q2*2 + q3*3 + (1<<2)) >> 3 |
272 | 0 | ] |
273 | 0 | } Unexecuted instantiation: rav1e::deblock::filter_wide8_6 Unexecuted instantiation: rav1e::deblock::filter_wide8_6 |
274 | | |
275 | | // 12 taps, 12 outputs (six are trivial) |
276 | | #[inline] |
277 | 0 | const fn filter_wide8_12( |
278 | 0 | p5: i32, p4: i32, p3: i32, p2: i32, p1: i32, p0: i32, q0: i32, q1: i32, |
279 | 0 | q2: i32, q3: i32, q4: i32, q5: i32, |
280 | 0 | ) -> [i32; 12] { |
281 | 0 | let x = filter_wide8_6(p3, p2, p1, p0, q0, q1, q2, q3); |
282 | 0 | [p5, p4, p3, x[0], x[1], x[2], x[3], x[4], x[5], q3, q4, q5] |
283 | 0 | } Unexecuted instantiation: rav1e::deblock::filter_wide8_12 Unexecuted instantiation: rav1e::deblock::filter_wide8_12 |
284 | | |
285 | | // fourteen taps, 12 outputs |
286 | | #[rustfmt::skip] |
287 | | #[inline] |
288 | 0 | const fn filter_wide14_12( |
289 | 0 | p6: i32, p5: i32, p4: i32, p3: i32, p2: i32, p1: i32, p0: i32, q0: i32, |
290 | 0 | q1: i32, q2: i32, q3: i32, q4: i32, q5: i32, q6: i32 |
291 | 0 | ) -> [i32; 12] { |
292 | 0 | [ |
293 | 0 | (p6*7 + p5*2 + p4*2 + p3 + p2 + p1 + p0 + q0 + (1<<3)) >> 4, |
294 | 0 | (p6*5 + p5*2 + p4*2 + p3*2 + p2 + p1 + p0 + q0 + q1 + (1<<3)) >> 4, |
295 | 0 | (p6*4 + p5 + p4*2 + p3*2 + p2*2 + p1 + p0 + q0 + q1 + q2 + (1<<3)) >> 4, |
296 | 0 | (p6*3 + p5 + p4 + p3*2 + p2*2 + p1*2 + p0 + q0 + q1 + q2 + q3 + (1<<3)) >> 4, |
297 | 0 | (p6*2 + p5 + p4 + p3 + p2*2 + p1*2 + p0*2 + q0 + q1 + q2 + q3 + q4 + (1<<3)) >> 4, |
298 | 0 | (p6 + p5 + p4 + p3 + p2 + p1*2 + p0*2 + q0*2 + q1 + q2 + q3 + q4 + q5 + (1<<3)) >> 4, |
299 | 0 | (p5 + p4 + p3 + p2 + p1 + p0*2 + q0*2 + q1*2 + q2 + q3 + q4 + q5 + q6 + (1<<3)) >> 4, |
300 | 0 | (p4 + p3 + p2 + p1 + p0 + q0*2 + q1*2 + q2*2 + q3 + q4 + q5 + q6*2 + (1<<3)) >> 4, |
301 | 0 | (p3 + p2 + p1 + p0 + q0 + q1*2 + q2*2 + q3*2 + q4 + q5 + q6*3 + (1<<3)) >> 4, |
302 | 0 | (p2 + p1 + p0 + q0 + q1 + q2*2 + q3*2 + q4*2 + q5 + q6*4 + (1<<3)) >> 4, |
303 | 0 | (p1 + p0 + q0 + q1 + q2 + q3*2 + q4*2 + q5*2 + q6*5 + (1<<3)) >> 4, |
304 | 0 | (p0 + q0 + q1 + q2 + q3 + q4*2 + q5*2 + q6*7 + (1<<3)) >> 4 |
305 | 0 | ] |
306 | 0 | } Unexecuted instantiation: rav1e::deblock::filter_wide14_12 Unexecuted instantiation: rav1e::deblock::filter_wide14_12 |
307 | | |
308 | | #[inline] |
309 | 0 | fn copy_horizontal<T: Pixel>( |
310 | 0 | dst: &mut PlaneRegionMut<'_, T>, x: usize, y: usize, src: &[i32], |
311 | 0 | ) { |
312 | 0 | let row = &mut dst[y][x..]; |
313 | 0 | for (dst, src) in row.iter_mut().take(src.len()).zip(src) { |
314 | 0 | *dst = T::cast_from(*src); |
315 | 0 | } |
316 | 0 | } Unexecuted instantiation: rav1e::deblock::copy_horizontal::<u16> Unexecuted instantiation: rav1e::deblock::copy_horizontal::<u8> |
317 | | |
318 | | #[inline] |
319 | 0 | fn copy_vertical<T: Pixel>( |
320 | 0 | dst: &mut PlaneRegionMut<'_, T>, x: usize, y: usize, src: &[i32], |
321 | 0 | ) { |
322 | 0 | for (i, v) in src.iter().enumerate() { |
323 | 0 | let p = &mut dst[y + i][x]; |
324 | 0 | *p = T::cast_from(*v); |
325 | 0 | } |
326 | 0 | } Unexecuted instantiation: rav1e::deblock::copy_vertical::<u16> Unexecuted instantiation: rav1e::deblock::copy_vertical::<u8> |
327 | | |
328 | | #[inline] |
329 | 0 | fn stride_sse<const LEN: usize>(a: &[i32; LEN], b: &[i32; LEN]) -> i64 { |
330 | 0 | a.iter().zip(b).map(|(a, b)| (a - b) * (a - b)).sum::<i32>() as i64 Unexecuted instantiation: rav1e::deblock::stride_sse::<4>::{closure#0}Unexecuted instantiation: rav1e::deblock::stride_sse::<6>::{closure#0}Unexecuted instantiation: rav1e::deblock::stride_sse::<12>::{closure#0} |
331 | 0 | } Unexecuted instantiation: rav1e::deblock::stride_sse::<4> Unexecuted instantiation: rav1e::deblock::stride_sse::<6> Unexecuted instantiation: rav1e::deblock::stride_sse::<12> |
332 | | |
333 | | #[inline] |
334 | 0 | const fn _level_to_limit(level: i32, shift: usize) -> i32 { |
335 | 0 | level << shift |
336 | 0 | } |
337 | | |
338 | | #[inline] |
339 | 0 | const fn limit_to_level(limit: i32, shift: usize) -> i32 { |
340 | 0 | (limit + (1 << shift) - 1) >> shift |
341 | 0 | } Unexecuted instantiation: rav1e::deblock::limit_to_level Unexecuted instantiation: rav1e::deblock::limit_to_level |
342 | | |
343 | | #[inline] |
344 | 0 | const fn _level_to_blimit(level: i32, shift: usize) -> i32 { |
345 | 0 | (3 * level + 4) << shift |
346 | 0 | } |
347 | | |
348 | | #[inline] |
349 | 0 | const fn blimit_to_level(blimit: i32, shift: usize) -> i32 { |
350 | 0 | (((blimit + (1 << shift) - 1) >> shift) - 2) / 3 |
351 | 0 | } Unexecuted instantiation: rav1e::deblock::blimit_to_level Unexecuted instantiation: rav1e::deblock::blimit_to_level |
352 | | |
353 | | #[inline] |
354 | 0 | const fn _level_to_thresh(level: i32, shift: usize) -> i32 { |
355 | 0 | level >> 4 << shift |
356 | 0 | } |
357 | | |
358 | | #[inline] |
359 | 0 | const fn thresh_to_level(thresh: i32, shift: usize) -> i32 { |
360 | 0 | (thresh + (1 << shift) - 1) >> shift << 4 |
361 | 0 | } Unexecuted instantiation: rav1e::deblock::thresh_to_level Unexecuted instantiation: rav1e::deblock::thresh_to_level |
362 | | |
363 | | #[inline] |
364 | 0 | fn nhev4(p1: i32, p0: i32, q0: i32, q1: i32, shift: usize) -> usize { |
365 | 0 | thresh_to_level(cmp::max((p1 - p0).abs(), (q1 - q0).abs()), shift) as usize |
366 | 0 | } Unexecuted instantiation: rav1e::deblock::nhev4 Unexecuted instantiation: rav1e::deblock::nhev4 |
367 | | |
368 | | #[inline] |
369 | 0 | fn mask4(p1: i32, p0: i32, q0: i32, q1: i32, shift: usize) -> usize { |
370 | 0 | cmp::max( |
371 | 0 | limit_to_level(cmp::max((p1 - p0).abs(), (q1 - q0).abs()), shift), |
372 | 0 | blimit_to_level((p0 - q0).abs() * 2 + (p1 - q1).abs() / 2, shift), |
373 | 0 | ) as usize |
374 | 0 | } Unexecuted instantiation: rav1e::deblock::mask4 Unexecuted instantiation: rav1e::deblock::mask4 |
375 | | |
376 | | #[inline] |
377 | 0 | fn deblock_size4_inner( |
378 | 0 | [p1, p0, q0, q1]: [i32; 4], level: usize, bd: usize, |
379 | 0 | ) -> Option<[i32; 4]> { |
380 | 0 | if mask4(p1, p0, q0, q1, bd - 8) <= level { |
381 | 0 | let x = if nhev4(p1, p0, q0, q1, bd - 8) <= level { |
382 | 0 | filter_narrow4_4(p1, p0, q0, q1, bd - 8) |
383 | | } else { |
384 | 0 | filter_narrow2_4(p1, p0, q0, q1, bd - 8) |
385 | | }; |
386 | 0 | Some(x) |
387 | | } else { |
388 | 0 | None |
389 | | } |
390 | 0 | } Unexecuted instantiation: rav1e::deblock::deblock_size4_inner Unexecuted instantiation: rav1e::deblock::deblock_size4_inner |
391 | | |
392 | | // Assumes rec[0] is set 2 taps back from the edge |
393 | 0 | fn deblock_v_size4<T: Pixel>( |
394 | 0 | rec: &mut PlaneRegionMut<'_, T>, level: usize, bd: usize, |
395 | 0 | ) { |
396 | 0 | for y in 0..4 { |
397 | 0 | let p = &rec[y]; |
398 | 0 | let vals = [p[0].as_(), p[1].as_(), p[2].as_(), p[3].as_()]; |
399 | 0 | if let Some(data) = deblock_size4_inner(vals, level, bd) { |
400 | 0 | copy_horizontal(rec, 0, y, &data); |
401 | 0 | } |
402 | | } |
403 | 0 | } Unexecuted instantiation: rav1e::deblock::deblock_v_size4::<u16> Unexecuted instantiation: rav1e::deblock::deblock_v_size4::<u8> |
404 | | |
405 | | // Assumes rec[0] is set 2 taps back from the edge |
406 | 0 | fn deblock_h_size4<T: Pixel>( |
407 | 0 | rec: &mut PlaneRegionMut<'_, T>, level: usize, bd: usize, |
408 | 0 | ) { |
409 | 0 | for x in 0..4 { |
410 | 0 | let vals = |
411 | 0 | [rec[0][x].as_(), rec[1][x].as_(), rec[2][x].as_(), rec[3][x].as_()]; |
412 | 0 | if let Some(data) = deblock_size4_inner(vals, level, bd) { |
413 | 0 | copy_vertical(rec, x, 0, &data); |
414 | 0 | } |
415 | | } |
416 | 0 | } Unexecuted instantiation: rav1e::deblock::deblock_h_size4::<u16> Unexecuted instantiation: rav1e::deblock::deblock_h_size4::<u8> |
417 | | |
418 | | // Assumes rec[0] and src[0] are set 2 taps back from the edge. |
419 | | // Accesses four taps, accumulates four pixels into the tally |
420 | 0 | fn sse_size4<T: Pixel>( |
421 | 0 | rec: &PlaneRegion<'_, T>, src: &PlaneRegion<'_, T>, |
422 | 0 | tally: &mut [i64; MAX_LOOP_FILTER + 2], horizontal_p: bool, bd: usize, |
423 | 0 | ) { |
424 | 0 | for i in 0..4 { |
425 | 0 | let (p1, p0, q0, q1, a) = if horizontal_p { |
426 | 0 | ( |
427 | 0 | rec[0][i].as_(), |
428 | 0 | rec[1][i].as_(), |
429 | 0 | rec[2][i].as_(), |
430 | 0 | rec[3][i].as_(), |
431 | 0 | [src[0][i].as_(), src[1][i].as_(), src[2][i].as_(), src[3][i].as_()], |
432 | 0 | ) |
433 | | } else { |
434 | 0 | ( |
435 | 0 | rec[i][0].as_(), |
436 | 0 | rec[i][1].as_(), |
437 | 0 | rec[i][2].as_(), |
438 | 0 | rec[i][3].as_(), |
439 | 0 | [src[i][0].as_(), src[i][1].as_(), src[i][2].as_(), src[i][3].as_()], |
440 | 0 | ) |
441 | | }; |
442 | | |
443 | | // three possibilities: no filter, narrow2 and narrow4 |
444 | | // All possibilities produce four outputs |
445 | 0 | let none: [_; 4] = [p1, p0, q0, q1]; |
446 | 0 | let narrow2 = filter_narrow2_4(p1, p0, q0, q1, bd - 8); |
447 | 0 | let narrow4 = filter_narrow4_4(p1, p0, q0, q1, bd - 8); |
448 | | |
449 | | // mask4 sets the dividing line for filter vs no filter |
450 | | // nhev4 sets the dividing line between narrow2 and narrow4 |
451 | 0 | let mask = clamp(mask4(p1, p0, q0, q1, bd - 8), 1, MAX_LOOP_FILTER + 1); |
452 | 0 | let nhev = clamp(nhev4(p1, p0, q0, q1, bd - 8), mask, MAX_LOOP_FILTER + 1); |
453 | | |
454 | | // sse for each; short-circuit the 'special' no-op cases. |
455 | 0 | let sse_none = stride_sse(&a, &none); |
456 | 0 | let sse_narrow2 = |
457 | 0 | if nhev != mask { stride_sse(&a, &narrow2) } else { sse_none }; |
458 | 0 | let sse_narrow4 = if nhev <= MAX_LOOP_FILTER { |
459 | 0 | stride_sse(&a, &narrow4) |
460 | | } else { |
461 | 0 | sse_none |
462 | | }; |
463 | | |
464 | | // accumulate possible filter values into the tally |
465 | | // level 0 is a special case |
466 | 0 | tally[0] += sse_none; |
467 | 0 | tally[mask] -= sse_none; |
468 | 0 | tally[mask] += sse_narrow2; |
469 | 0 | tally[nhev] -= sse_narrow2; |
470 | 0 | tally[nhev] += sse_narrow4; |
471 | | } |
472 | 0 | } Unexecuted instantiation: rav1e::deblock::sse_size4::<u16> Unexecuted instantiation: rav1e::deblock::sse_size4::<u8> |
473 | | |
474 | | #[inline] |
475 | 0 | fn mask6( |
476 | 0 | p2: i32, p1: i32, p0: i32, q0: i32, q1: i32, q2: i32, shift: usize, |
477 | 0 | ) -> usize { |
478 | 0 | cmp::max( |
479 | 0 | limit_to_level( |
480 | 0 | cmp::max( |
481 | 0 | (p2 - p1).abs(), |
482 | 0 | cmp::max((p1 - p0).abs(), cmp::max((q2 - q1).abs(), (q1 - q0).abs())), |
483 | 0 | ), |
484 | 0 | shift, |
485 | 0 | ), |
486 | 0 | blimit_to_level((p0 - q0).abs() * 2 + (p1 - q1).abs() / 2, shift), |
487 | 0 | ) as usize |
488 | 0 | } Unexecuted instantiation: rav1e::deblock::mask6 Unexecuted instantiation: rav1e::deblock::mask6 |
489 | | |
490 | | #[inline] |
491 | 0 | fn flat6(p2: i32, p1: i32, p0: i32, q0: i32, q1: i32, q2: i32) -> usize { |
492 | 0 | cmp::max( |
493 | 0 | (p1 - p0).abs(), |
494 | 0 | cmp::max((q1 - q0).abs(), cmp::max((p2 - p0).abs(), (q2 - q0).abs())), |
495 | 0 | ) as usize |
496 | 0 | } Unexecuted instantiation: rav1e::deblock::flat6 Unexecuted instantiation: rav1e::deblock::flat6 |
497 | | |
498 | | #[inline] |
499 | 0 | fn deblock_size6_inner( |
500 | 0 | [p2, p1, p0, q0, q1, q2]: [i32; 6], level: usize, bd: usize, |
501 | 0 | ) -> Option<[i32; 4]> { |
502 | 0 | if mask6(p2, p1, p0, q0, q1, q2, bd - 8) <= level { |
503 | 0 | let flat = 1 << (bd - 8); |
504 | 0 | let x = if flat6(p2, p1, p0, q0, q1, q2) <= flat { |
505 | 0 | filter_wide6_4(p2, p1, p0, q0, q1, q2) |
506 | 0 | } else if nhev4(p1, p0, q0, q1, bd - 8) <= level { |
507 | 0 | filter_narrow4_4(p1, p0, q0, q1, bd - 8) |
508 | | } else { |
509 | 0 | filter_narrow2_4(p1, p0, q0, q1, bd - 8) |
510 | | }; |
511 | 0 | Some(x) |
512 | | } else { |
513 | 0 | None |
514 | | } |
515 | 0 | } Unexecuted instantiation: rav1e::deblock::deblock_size6_inner Unexecuted instantiation: rav1e::deblock::deblock_size6_inner |
516 | | |
517 | | // Assumes slice[0] is set 3 taps back from the edge |
518 | 0 | fn deblock_v_size6<T: Pixel>( |
519 | 0 | rec: &mut PlaneRegionMut<'_, T>, level: usize, bd: usize, |
520 | 0 | ) { |
521 | 0 | for y in 0..4 { |
522 | 0 | let p = &rec[y]; |
523 | 0 | let vals = |
524 | 0 | [p[0].as_(), p[1].as_(), p[2].as_(), p[3].as_(), p[4].as_(), p[5].as_()]; |
525 | 0 | if let Some(data) = deblock_size6_inner(vals, level, bd) { |
526 | 0 | copy_horizontal(rec, 1, y, &data); |
527 | 0 | } |
528 | | } |
529 | 0 | } Unexecuted instantiation: rav1e::deblock::deblock_v_size6::<u16> Unexecuted instantiation: rav1e::deblock::deblock_v_size6::<u8> |
530 | | |
531 | | // Assumes slice[0] is set 3 taps back from the edge |
532 | 0 | fn deblock_h_size6<T: Pixel>( |
533 | 0 | rec: &mut PlaneRegionMut<'_, T>, level: usize, bd: usize, |
534 | 0 | ) { |
535 | 0 | for x in 0..4 { |
536 | 0 | let vals = [ |
537 | 0 | rec[0][x].as_(), |
538 | 0 | rec[1][x].as_(), |
539 | 0 | rec[2][x].as_(), |
540 | 0 | rec[3][x].as_(), |
541 | 0 | rec[4][x].as_(), |
542 | 0 | rec[5][x].as_(), |
543 | 0 | ]; |
544 | 0 | if let Some(data) = deblock_size6_inner(vals, level, bd) { |
545 | 0 | copy_vertical(rec, x, 1, &data); |
546 | 0 | } |
547 | | } |
548 | 0 | } Unexecuted instantiation: rav1e::deblock::deblock_h_size6::<u16> Unexecuted instantiation: rav1e::deblock::deblock_h_size6::<u8> |
549 | | |
550 | | // Assumes rec[0] and src[0] are set 3 taps back from the edge. |
551 | | // Accesses six taps, accumulates four pixels into the tally |
552 | 0 | fn sse_size6<T: Pixel>( |
553 | 0 | rec: &PlaneRegion<'_, T>, src: &PlaneRegion<'_, T>, |
554 | 0 | tally: &mut [i64; MAX_LOOP_FILTER + 2], horizontal_p: bool, bd: usize, |
555 | 0 | ) { |
556 | 0 | let flat = 1 << (bd - 8); |
557 | 0 | for i in 0..4 { |
558 | 0 | let (p2, p1, p0, q0, q1, q2, a) = if horizontal_p { |
559 | | // six taps |
560 | 0 | ( |
561 | 0 | rec[0][i].as_(), |
562 | 0 | rec[1][i].as_(), |
563 | 0 | rec[2][i].as_(), |
564 | 0 | rec[3][i].as_(), |
565 | 0 | rec[4][i].as_(), |
566 | 0 | rec[5][i].as_(), |
567 | 0 | // four pixels to compare so offset one forward |
568 | 0 | [src[1][i].as_(), src[2][i].as_(), src[3][i].as_(), src[4][i].as_()], |
569 | 0 | ) |
570 | | } else { |
571 | | // six taps |
572 | 0 | ( |
573 | 0 | rec[i][0].as_(), |
574 | 0 | rec[i][1].as_(), |
575 | 0 | rec[i][2].as_(), |
576 | 0 | rec[i][3].as_(), |
577 | 0 | rec[i][4].as_(), |
578 | 0 | rec[i][5].as_(), |
579 | 0 | // four pixels to compare so offset one forward |
580 | 0 | [src[i][1].as_(), src[i][2].as_(), src[i][3].as_(), src[i][4].as_()], |
581 | 0 | ) |
582 | | }; |
583 | | |
584 | | // Four possibilities: no filter, wide6, narrow2 and narrow4 |
585 | | // All possibilities produce four outputs |
586 | 0 | let none: [_; 4] = [p1, p0, q0, q1]; |
587 | 0 | let wide6 = filter_wide6_4(p2, p1, p0, q0, q1, q2); |
588 | 0 | let narrow2 = filter_narrow2_4(p1, p0, q0, q1, bd - 8); |
589 | 0 | let narrow4 = filter_narrow4_4(p1, p0, q0, q1, bd - 8); |
590 | | |
591 | | // mask6 sets the dividing line for filter vs no filter |
592 | | // flat6 decides between wide and narrow filters (unrelated to level) |
593 | | // nhev4 sets the dividing line between narrow2 and narrow4 |
594 | 0 | let mask = |
595 | 0 | clamp(mask6(p2, p1, p0, q0, q1, q2, bd - 8), 1, MAX_LOOP_FILTER + 1); |
596 | 0 | let flatp = flat6(p2, p1, p0, q0, q1, q2) <= flat; |
597 | 0 | let nhev = clamp(nhev4(p1, p0, q0, q1, bd - 8), mask, MAX_LOOP_FILTER + 1); |
598 | | |
599 | | // sse for each; short-circuit the 'special' no-op cases. |
600 | 0 | let sse_none = stride_sse(&a, &none); |
601 | 0 | let sse_wide6 = if flatp && mask <= MAX_LOOP_FILTER { |
602 | 0 | stride_sse(&a, &wide6) |
603 | | } else { |
604 | 0 | sse_none |
605 | | }; |
606 | 0 | let sse_narrow2 = |
607 | 0 | if !flatp && nhev != mask { stride_sse(&a, &narrow2) } else { sse_none }; |
608 | 0 | let sse_narrow4 = if !flatp && nhev <= MAX_LOOP_FILTER { |
609 | 0 | stride_sse(&a, &narrow4) |
610 | | } else { |
611 | 0 | sse_none |
612 | | }; |
613 | | |
614 | | // accumulate possible filter values into the tally |
615 | 0 | tally[0] += sse_none; |
616 | 0 | tally[mask] -= sse_none; |
617 | 0 | if flatp { |
618 | 0 | tally[mask] += sse_wide6; |
619 | 0 | } else { |
620 | 0 | tally[mask] += sse_narrow2; |
621 | 0 | tally[nhev] -= sse_narrow2; |
622 | 0 | tally[nhev] += sse_narrow4; |
623 | 0 | } |
624 | | } |
625 | 0 | } Unexecuted instantiation: rav1e::deblock::sse_size6::<u16> Unexecuted instantiation: rav1e::deblock::sse_size6::<u8> |
626 | | |
627 | | #[inline] |
628 | 0 | fn mask8( |
629 | 0 | p3: i32, p2: i32, p1: i32, p0: i32, q0: i32, q1: i32, q2: i32, q3: i32, |
630 | 0 | shift: usize, |
631 | 0 | ) -> usize { |
632 | 0 | cmp::max( |
633 | 0 | limit_to_level( |
634 | 0 | cmp::max( |
635 | 0 | (p3 - p2).abs(), |
636 | 0 | cmp::max( |
637 | 0 | (p2 - p1).abs(), |
638 | 0 | cmp::max( |
639 | 0 | (p1 - p0).abs(), |
640 | 0 | cmp::max( |
641 | 0 | (q3 - q2).abs(), |
642 | 0 | cmp::max((q2 - q1).abs(), (q1 - q0).abs()), |
643 | 0 | ), |
644 | 0 | ), |
645 | 0 | ), |
646 | 0 | ), |
647 | 0 | shift, |
648 | 0 | ), |
649 | 0 | blimit_to_level((p0 - q0).abs() * 2 + (p1 - q1).abs() / 2, shift), |
650 | 0 | ) as usize |
651 | 0 | } Unexecuted instantiation: rav1e::deblock::mask8 Unexecuted instantiation: rav1e::deblock::mask8 |
652 | | |
653 | | #[inline] |
654 | 0 | fn flat8( |
655 | 0 | p3: i32, p2: i32, p1: i32, p0: i32, q0: i32, q1: i32, q2: i32, q3: i32, |
656 | 0 | ) -> usize { |
657 | 0 | cmp::max( |
658 | 0 | (p1 - p0).abs(), |
659 | 0 | cmp::max( |
660 | 0 | (q1 - q0).abs(), |
661 | 0 | cmp::max( |
662 | 0 | (p2 - p0).abs(), |
663 | 0 | cmp::max((q2 - q0).abs(), cmp::max((p3 - p0).abs(), (q3 - q0).abs())), |
664 | 0 | ), |
665 | 0 | ), |
666 | 0 | ) as usize |
667 | 0 | } Unexecuted instantiation: rav1e::deblock::flat8 Unexecuted instantiation: rav1e::deblock::flat8 |
668 | | |
669 | | #[inline] |
670 | 0 | fn deblock_size8_inner( |
671 | 0 | [p3, p2, p1, p0, q0, q1, q2, q3]: [i32; 8], level: usize, bd: usize, |
672 | 0 | ) -> Option<[i32; 6]> { |
673 | 0 | if mask8(p3, p2, p1, p0, q0, q1, q2, q3, bd - 8) <= level { |
674 | 0 | let flat = 1 << (bd - 8); |
675 | 0 | let x = if flat8(p3, p2, p1, p0, q0, q1, q2, q3) <= flat { |
676 | 0 | filter_wide8_6(p3, p2, p1, p0, q0, q1, q2, q3) |
677 | 0 | } else if nhev4(p1, p0, q0, q1, bd - 8) <= level { |
678 | 0 | filter_narrow4_6(p2, p1, p0, q0, q1, q2, bd - 8) |
679 | | } else { |
680 | 0 | filter_narrow2_6(p2, p1, p0, q0, q1, q2, bd - 8) |
681 | | }; |
682 | 0 | Some(x) |
683 | | } else { |
684 | 0 | None |
685 | | } |
686 | 0 | } Unexecuted instantiation: rav1e::deblock::deblock_size8_inner Unexecuted instantiation: rav1e::deblock::deblock_size8_inner |
687 | | |
688 | | // Assumes rec[0] is set 4 taps back from the edge |
689 | 0 | fn deblock_v_size8<T: Pixel>( |
690 | 0 | rec: &mut PlaneRegionMut<'_, T>, level: usize, bd: usize, |
691 | 0 | ) { |
692 | 0 | for y in 0..4 { |
693 | 0 | let p = &rec[y]; |
694 | 0 | let vals = [ |
695 | 0 | p[0].as_(), |
696 | 0 | p[1].as_(), |
697 | 0 | p[2].as_(), |
698 | 0 | p[3].as_(), |
699 | 0 | p[4].as_(), |
700 | 0 | p[5].as_(), |
701 | 0 | p[6].as_(), |
702 | 0 | p[7].as_(), |
703 | 0 | ]; |
704 | 0 | if let Some(data) = deblock_size8_inner(vals, level, bd) { |
705 | 0 | copy_horizontal(rec, 1, y, &data); |
706 | 0 | } |
707 | | } |
708 | 0 | } Unexecuted instantiation: rav1e::deblock::deblock_v_size8::<u16> Unexecuted instantiation: rav1e::deblock::deblock_v_size8::<u8> |
709 | | |
710 | | // Assumes rec[0] is set 4 taps back from the edge |
711 | 0 | fn deblock_h_size8<T: Pixel>( |
712 | 0 | rec: &mut PlaneRegionMut<'_, T>, level: usize, bd: usize, |
713 | 0 | ) { |
714 | 0 | for x in 0..4 { |
715 | 0 | let vals = [ |
716 | 0 | rec[0][x].as_(), |
717 | 0 | rec[1][x].as_(), |
718 | 0 | rec[2][x].as_(), |
719 | 0 | rec[3][x].as_(), |
720 | 0 | rec[4][x].as_(), |
721 | 0 | rec[5][x].as_(), |
722 | 0 | rec[6][x].as_(), |
723 | 0 | rec[7][x].as_(), |
724 | 0 | ]; |
725 | 0 | if let Some(data) = deblock_size8_inner(vals, level, bd) { |
726 | 0 | copy_vertical(rec, x, 1, &data); |
727 | 0 | } |
728 | | } |
729 | 0 | } Unexecuted instantiation: rav1e::deblock::deblock_h_size8::<u16> Unexecuted instantiation: rav1e::deblock::deblock_h_size8::<u8> |
730 | | |
731 | | // Assumes rec[0] and src[0] are set 4 taps back from the edge. |
732 | | // Accesses eight taps, accumulates six pixels into the tally |
733 | 0 | fn sse_size8<T: Pixel>( |
734 | 0 | rec: &PlaneRegion<'_, T>, src: &PlaneRegion<'_, T>, |
735 | 0 | tally: &mut [i64; MAX_LOOP_FILTER + 2], horizontal_p: bool, bd: usize, |
736 | 0 | ) { |
737 | 0 | let flat = 1 << (bd - 8); |
738 | | |
739 | 0 | for i in 0..4 { |
740 | 0 | let (p3, p2, p1, p0, q0, q1, q2, q3, a) = if horizontal_p { |
741 | | // eight taps |
742 | 0 | ( |
743 | 0 | rec[0][i].as_(), |
744 | 0 | rec[1][i].as_(), |
745 | 0 | rec[2][i].as_(), |
746 | 0 | rec[3][i].as_(), |
747 | 0 | rec[4][i].as_(), |
748 | 0 | rec[5][i].as_(), |
749 | 0 | rec[6][i].as_(), |
750 | 0 | rec[7][i].as_(), |
751 | 0 | // six pixels to compare so offset one forward |
752 | 0 | [ |
753 | 0 | src[1][i].as_(), |
754 | 0 | src[2][i].as_(), |
755 | 0 | src[3][i].as_(), |
756 | 0 | src[4][i].as_(), |
757 | 0 | src[5][i].as_(), |
758 | 0 | src[6][i].as_(), |
759 | 0 | ], |
760 | 0 | ) |
761 | | } else { |
762 | | // eight taps |
763 | 0 | ( |
764 | 0 | rec[i][0].as_(), |
765 | 0 | rec[i][1].as_(), |
766 | 0 | rec[i][2].as_(), |
767 | 0 | rec[i][3].as_(), |
768 | 0 | rec[i][4].as_(), |
769 | 0 | rec[i][5].as_(), |
770 | 0 | rec[i][6].as_(), |
771 | 0 | rec[i][7].as_(), |
772 | 0 | // six pixels to compare so offset one forward |
773 | 0 | [ |
774 | 0 | src[i][1].as_(), |
775 | 0 | src[i][2].as_(), |
776 | 0 | src[i][3].as_(), |
777 | 0 | src[i][4].as_(), |
778 | 0 | src[i][5].as_(), |
779 | 0 | src[i][6].as_(), |
780 | 0 | ], |
781 | 0 | ) |
782 | | }; |
783 | | |
784 | | // Four possibilities: no filter, wide8, narrow2 and narrow4 |
785 | 0 | let none: [_; 6] = [p2, p1, p0, q0, q1, q2]; |
786 | 0 | let wide8: [_; 6] = filter_wide8_6(p3, p2, p1, p0, q0, q1, q2, q3); |
787 | 0 | let narrow2: [_; 6] = filter_narrow2_6(p2, p1, p0, q0, q1, q2, bd - 8); |
788 | 0 | let narrow4: [_; 6] = filter_narrow4_6(p2, p1, p0, q0, q1, q2, bd - 8); |
789 | | |
790 | | // mask8 sets the dividing line for filter vs no filter |
791 | | // flat8 decides between wide and narrow filters (unrelated to level) |
792 | | // nhev4 sets the dividing line between narrow2 and narrow4 |
793 | 0 | let mask = clamp( |
794 | 0 | mask8(p3, p2, p1, p0, q0, q1, q2, q3, bd - 8), |
795 | | 1, |
796 | 0 | MAX_LOOP_FILTER + 1, |
797 | | ); |
798 | 0 | let flatp = flat8(p3, p2, p1, p0, q0, q1, q2, q3) <= flat; |
799 | 0 | let nhev = clamp(nhev4(p1, p0, q0, q1, bd - 8), mask, MAX_LOOP_FILTER + 1); |
800 | | |
801 | | // sse for each; short-circuit the 'special' no-op cases. |
802 | 0 | let sse_none = stride_sse(&a, &none); |
803 | 0 | let sse_wide8 = if flatp && mask <= MAX_LOOP_FILTER { |
804 | 0 | stride_sse(&a, &wide8) |
805 | | } else { |
806 | 0 | sse_none |
807 | | }; |
808 | 0 | let sse_narrow2 = |
809 | 0 | if !flatp && nhev != mask { stride_sse(&a, &narrow2) } else { sse_none }; |
810 | 0 | let sse_narrow4 = if !flatp && nhev <= MAX_LOOP_FILTER { |
811 | 0 | stride_sse(&a, &narrow4) |
812 | | } else { |
813 | 0 | sse_none |
814 | | }; |
815 | | |
816 | | // accumulate possible filter values into the tally |
817 | 0 | tally[0] += sse_none; |
818 | 0 | tally[mask] -= sse_none; |
819 | 0 | if flatp { |
820 | 0 | tally[mask] += sse_wide8; |
821 | 0 | } else { |
822 | 0 | tally[mask] += sse_narrow2; |
823 | 0 | tally[nhev] -= sse_narrow2; |
824 | 0 | tally[nhev] += sse_narrow4; |
825 | 0 | } |
826 | | } |
827 | 0 | } Unexecuted instantiation: rav1e::deblock::sse_size8::<u16> Unexecuted instantiation: rav1e::deblock::sse_size8::<u8> |
828 | | |
829 | | #[inline] |
830 | 0 | fn flat14_outer( |
831 | 0 | p6: i32, p5: i32, p4: i32, p0: i32, q0: i32, q4: i32, q5: i32, q6: i32, |
832 | 0 | ) -> usize { |
833 | 0 | cmp::max( |
834 | 0 | (p4 - p0).abs(), |
835 | 0 | cmp::max( |
836 | 0 | (q4 - q0).abs(), |
837 | 0 | cmp::max( |
838 | 0 | (p5 - p0).abs(), |
839 | 0 | cmp::max((q5 - q0).abs(), cmp::max((p6 - p0).abs(), (q6 - q0).abs())), |
840 | 0 | ), |
841 | 0 | ), |
842 | 0 | ) as usize |
843 | 0 | } Unexecuted instantiation: rav1e::deblock::flat14_outer Unexecuted instantiation: rav1e::deblock::flat14_outer |
844 | | |
845 | | #[inline] |
846 | 0 | fn deblock_size14_inner( |
847 | 0 | [p6, p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5, q6]: [i32; 14], |
848 | 0 | level: usize, bd: usize, |
849 | 0 | ) -> Option<[i32; 12]> { |
850 | | // 'mask' test |
851 | 0 | if mask8(p3, p2, p1, p0, q0, q1, q2, q3, bd - 8) <= level { |
852 | 0 | let flat = 1 << (bd - 8); |
853 | | // inner flatness test |
854 | 0 | let x = if flat8(p3, p2, p1, p0, q0, q1, q2, q3) <= flat { |
855 | | // outer flatness test |
856 | 0 | if flat14_outer(p6, p5, p4, p0, q0, q4, q5, q6) <= flat { |
857 | | // sufficient flatness across 14 pixel width; run full-width filter |
858 | 0 | filter_wide14_12( |
859 | 0 | p6, p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5, q6, |
860 | | ) |
861 | | } else { |
862 | | // only flat in inner area, run 8-tap |
863 | 0 | filter_wide8_12(p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5) |
864 | | } |
865 | 0 | } else if nhev4(p1, p0, q0, q1, bd - 8) <= level { |
866 | | // not flat, run narrow filter |
867 | 0 | filter_narrow4_12(p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5, bd - 8) |
868 | | } else { |
869 | 0 | filter_narrow2_12(p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5, bd - 8) |
870 | | }; |
871 | 0 | Some(x) |
872 | | } else { |
873 | 0 | None |
874 | | } |
875 | 0 | } Unexecuted instantiation: rav1e::deblock::deblock_size14_inner Unexecuted instantiation: rav1e::deblock::deblock_size14_inner |
876 | | |
877 | | // Assumes rec[0] is set 7 taps back from the edge |
878 | 0 | fn deblock_v_size14<T: Pixel>( |
879 | 0 | rec: &mut PlaneRegionMut<'_, T>, level: usize, bd: usize, |
880 | 0 | ) { |
881 | 0 | for y in 0..4 { |
882 | 0 | let p = &rec[y]; |
883 | 0 | let vals = [ |
884 | 0 | p[0].as_(), |
885 | 0 | p[1].as_(), |
886 | 0 | p[2].as_(), |
887 | 0 | p[3].as_(), |
888 | 0 | p[4].as_(), |
889 | 0 | p[5].as_(), |
890 | 0 | p[6].as_(), |
891 | 0 | p[7].as_(), |
892 | 0 | p[8].as_(), |
893 | 0 | p[9].as_(), |
894 | 0 | p[10].as_(), |
895 | 0 | p[11].as_(), |
896 | 0 | p[12].as_(), |
897 | 0 | p[13].as_(), |
898 | 0 | ]; |
899 | 0 | if let Some(data) = deblock_size14_inner(vals, level, bd) { |
900 | 0 | copy_horizontal(rec, 1, y, &data); |
901 | 0 | } |
902 | | } |
903 | 0 | } Unexecuted instantiation: rav1e::deblock::deblock_v_size14::<u16> Unexecuted instantiation: rav1e::deblock::deblock_v_size14::<u8> |
904 | | |
905 | | // Assumes rec[0] is set 7 taps back from the edge |
906 | 0 | fn deblock_h_size14<T: Pixel>( |
907 | 0 | rec: &mut PlaneRegionMut<'_, T>, level: usize, bd: usize, |
908 | 0 | ) { |
909 | 0 | for x in 0..4 { |
910 | 0 | let vals = [ |
911 | 0 | rec[0][x].as_(), |
912 | 0 | rec[1][x].as_(), |
913 | 0 | rec[2][x].as_(), |
914 | 0 | rec[3][x].as_(), |
915 | 0 | rec[4][x].as_(), |
916 | 0 | rec[5][x].as_(), |
917 | 0 | rec[6][x].as_(), |
918 | 0 | rec[7][x].as_(), |
919 | 0 | rec[8][x].as_(), |
920 | 0 | rec[9][x].as_(), |
921 | 0 | rec[10][x].as_(), |
922 | 0 | rec[11][x].as_(), |
923 | 0 | rec[12][x].as_(), |
924 | 0 | rec[13][x].as_(), |
925 | 0 | ]; |
926 | 0 | if let Some(data) = deblock_size14_inner(vals, level, bd) { |
927 | 0 | copy_vertical(rec, x, 1, &data); |
928 | 0 | } |
929 | | } |
930 | 0 | } Unexecuted instantiation: rav1e::deblock::deblock_h_size14::<u16> Unexecuted instantiation: rav1e::deblock::deblock_h_size14::<u8> |
931 | | |
932 | | // Assumes rec[0] and src[0] are set 7 taps back from the edge. |
933 | | // Accesses fourteen taps, accumulates twelve pixels into the tally |
934 | 0 | fn sse_size14<T: Pixel>( |
935 | 0 | rec: &PlaneRegion<'_, T>, src: &PlaneRegion<'_, T>, |
936 | 0 | tally: &mut [i64; MAX_LOOP_FILTER + 2], horizontal_p: bool, bd: usize, |
937 | 0 | ) { |
938 | 0 | let flat = 1 << (bd - 8); |
939 | 0 | for i in 0..4 { |
940 | 0 | let (p6, p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5, q6, a) = |
941 | 0 | if horizontal_p { |
942 | | // 14 taps |
943 | 0 | ( |
944 | 0 | rec[0][i].as_(), |
945 | 0 | rec[1][i].as_(), |
946 | 0 | rec[2][i].as_(), |
947 | 0 | rec[3][i].as_(), |
948 | 0 | rec[4][i].as_(), |
949 | 0 | rec[5][i].as_(), |
950 | 0 | rec[6][i].as_(), |
951 | 0 | rec[7][i].as_(), |
952 | 0 | rec[8][i].as_(), |
953 | 0 | rec[9][i].as_(), |
954 | 0 | rec[10][i].as_(), |
955 | 0 | rec[11][i].as_(), |
956 | 0 | rec[12][i].as_(), |
957 | 0 | rec[13][i].as_(), |
958 | 0 | // 12 pixels to compare so offset one forward |
959 | 0 | [ |
960 | 0 | src[1][i].as_(), |
961 | 0 | src[2][i].as_(), |
962 | 0 | src[3][i].as_(), |
963 | 0 | src[4][i].as_(), |
964 | 0 | src[5][i].as_(), |
965 | 0 | src[6][i].as_(), |
966 | 0 | src[7][i].as_(), |
967 | 0 | src[8][i].as_(), |
968 | 0 | src[9][i].as_(), |
969 | 0 | src[10][i].as_(), |
970 | 0 | src[11][i].as_(), |
971 | 0 | src[12][i].as_(), |
972 | 0 | ], |
973 | 0 | ) |
974 | | } else { |
975 | | // 14 taps |
976 | 0 | ( |
977 | 0 | rec[i][0].as_(), |
978 | 0 | rec[i][1].as_(), |
979 | 0 | rec[i][2].as_(), |
980 | 0 | rec[i][3].as_(), |
981 | 0 | rec[i][4].as_(), |
982 | 0 | rec[i][5].as_(), |
983 | 0 | rec[i][6].as_(), |
984 | 0 | rec[i][7].as_(), |
985 | 0 | rec[i][8].as_(), |
986 | 0 | rec[i][9].as_(), |
987 | 0 | rec[i][10].as_(), |
988 | 0 | rec[i][11].as_(), |
989 | 0 | rec[i][12].as_(), |
990 | 0 | rec[i][13].as_(), |
991 | 0 | // 12 pixels to compare so offset one forward |
992 | 0 | [ |
993 | 0 | src[i][1].as_(), |
994 | 0 | src[i][2].as_(), |
995 | 0 | src[i][3].as_(), |
996 | 0 | src[i][4].as_(), |
997 | 0 | src[i][5].as_(), |
998 | 0 | src[i][6].as_(), |
999 | 0 | src[i][7].as_(), |
1000 | 0 | src[i][8].as_(), |
1001 | 0 | src[i][9].as_(), |
1002 | 0 | src[i][10].as_(), |
1003 | 0 | src[i][11].as_(), |
1004 | 0 | src[i][12].as_(), |
1005 | 0 | ], |
1006 | 0 | ) |
1007 | | }; |
1008 | | |
1009 | | // Five possibilities: no filter, wide14, wide8, narrow2 and narrow4 |
1010 | 0 | let none: [i32; 12] = [p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5]; |
1011 | 0 | let wide14 = |
1012 | 0 | filter_wide14_12(p6, p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5, q6); |
1013 | 0 | let wide8 = |
1014 | 0 | filter_wide8_12(p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5); |
1015 | 0 | let narrow2 = filter_narrow2_12( |
1016 | 0 | p5, |
1017 | 0 | p4, |
1018 | 0 | p3, |
1019 | 0 | p2, |
1020 | 0 | p1, |
1021 | 0 | p0, |
1022 | 0 | q0, |
1023 | 0 | q1, |
1024 | 0 | q2, |
1025 | 0 | q3, |
1026 | 0 | q4, |
1027 | 0 | q5, |
1028 | 0 | bd - 8, |
1029 | | ); |
1030 | 0 | let narrow4 = filter_narrow4_12( |
1031 | 0 | p5, |
1032 | 0 | p4, |
1033 | 0 | p3, |
1034 | 0 | p2, |
1035 | 0 | p1, |
1036 | 0 | p0, |
1037 | 0 | q0, |
1038 | 0 | q1, |
1039 | 0 | q2, |
1040 | 0 | q3, |
1041 | 0 | q4, |
1042 | 0 | q5, |
1043 | 0 | bd - 8, |
1044 | | ); |
1045 | | |
1046 | | // mask8 sets the dividing line for filter vs no filter |
1047 | | // flat8 decides between wide and narrow filters (unrelated to level) |
1048 | | // flat14 decides between wide14 and wide8 filters |
1049 | | // nhev4 sets the dividing line between narrow2 and narrow4 |
1050 | 0 | let mask = clamp( |
1051 | 0 | mask8(p3, p2, p1, p0, q0, q1, q2, q3, bd - 8), |
1052 | | 1, |
1053 | 0 | MAX_LOOP_FILTER + 1, |
1054 | | ); |
1055 | 0 | let flat8p = flat8(p3, p2, p1, p0, q0, q1, q2, q3) <= flat; |
1056 | 0 | let flat14p = flat14_outer(p6, p5, p4, p0, q0, q4, q5, q6) <= flat; |
1057 | 0 | let nhev = clamp(nhev4(p1, p0, q0, q1, bd - 8), mask, MAX_LOOP_FILTER + 1); |
1058 | | |
1059 | | // sse for each; short-circuit the 'special' no-op cases. |
1060 | 0 | let sse_none = stride_sse(&a, &none); |
1061 | 0 | let sse_wide8 = if flat8p && !flat14p && mask <= MAX_LOOP_FILTER { |
1062 | 0 | stride_sse(&a, &wide8) |
1063 | | } else { |
1064 | 0 | sse_none |
1065 | | }; |
1066 | 0 | let sse_wide14 = if flat8p && flat14p && mask <= MAX_LOOP_FILTER { |
1067 | 0 | stride_sse(&a, &wide14) |
1068 | | } else { |
1069 | 0 | sse_none |
1070 | | }; |
1071 | 0 | let sse_narrow2 = if !flat8p && nhev != mask { |
1072 | 0 | stride_sse(&a, &narrow2) |
1073 | | } else { |
1074 | 0 | sse_none |
1075 | | }; |
1076 | 0 | let sse_narrow4 = if !flat8p && nhev <= MAX_LOOP_FILTER { |
1077 | 0 | stride_sse(&a, &narrow4) |
1078 | | } else { |
1079 | 0 | sse_none |
1080 | | }; |
1081 | | |
1082 | | // accumulate possible filter values into the tally |
1083 | 0 | tally[0] += sse_none; |
1084 | 0 | tally[mask] -= sse_none; |
1085 | 0 | if flat8p { |
1086 | 0 | if flat14p { |
1087 | 0 | tally[mask] += sse_wide14; |
1088 | 0 | } else { |
1089 | 0 | tally[mask] += sse_wide8; |
1090 | 0 | } |
1091 | 0 | } else { |
1092 | 0 | tally[mask] += sse_narrow2; |
1093 | 0 | tally[nhev] -= sse_narrow2; |
1094 | 0 | tally[nhev] += sse_narrow4; |
1095 | 0 | } |
1096 | | } |
1097 | 0 | } Unexecuted instantiation: rav1e::deblock::sse_size14::<u16> Unexecuted instantiation: rav1e::deblock::sse_size14::<u8> |
1098 | | |
1099 | 0 | fn filter_v_edge<T: Pixel>( |
1100 | 0 | deblock: &DeblockState, blocks: &TileBlocks, bo: TileBlockOffset, |
1101 | 0 | p: &mut PlaneRegionMut<T>, pli: usize, bd: usize, xdec: usize, ydec: usize, |
1102 | 0 | ) { |
1103 | 0 | let block = &blocks[bo]; |
1104 | 0 | let txsize = if pli == 0 { |
1105 | 0 | block.txsize |
1106 | | } else { |
1107 | 0 | block.bsize.largest_chroma_tx_size(xdec, ydec) |
1108 | | }; |
1109 | 0 | let tx_edge = (bo.0.x >> xdec) & (txsize.width_mi() - 1) == 0; |
1110 | 0 | if tx_edge { |
1111 | 0 | let prev_block = deblock_left(blocks, bo, &p.as_const()); |
1112 | 0 | let block_edge = bo.0.x & (block.n4_w as usize - 1) == 0; |
1113 | 0 | let filter_size = |
1114 | 0 | deblock_size(block, prev_block, &p.as_const(), pli, true, block_edge); |
1115 | 0 | if filter_size > 0 { |
1116 | 0 | let level = deblock_level(deblock, block, prev_block, pli, true); |
1117 | 0 | if level > 0 { |
1118 | 0 | let po = bo.plane_offset(p.plane_cfg); |
1119 | 0 | let mut plane_region = p.subregion_mut(Area::Rect { |
1120 | 0 | x: po.x - (filter_size >> 1) as isize, |
1121 | 0 | y: po.y, |
1122 | 0 | width: filter_size, |
1123 | 0 | height: 4, |
1124 | 0 | }); |
1125 | 0 | match filter_size { |
1126 | 0 | 4 => { |
1127 | 0 | deblock_v_size4(&mut plane_region, level, bd); |
1128 | 0 | } |
1129 | 0 | 6 => { |
1130 | 0 | deblock_v_size6(&mut plane_region, level, bd); |
1131 | 0 | } |
1132 | 0 | 8 => { |
1133 | 0 | deblock_v_size8(&mut plane_region, level, bd); |
1134 | 0 | } |
1135 | 0 | 14 => { |
1136 | 0 | deblock_v_size14(&mut plane_region, level, bd); |
1137 | 0 | } |
1138 | 0 | _ => unreachable!(), |
1139 | | } |
1140 | 0 | } |
1141 | 0 | } |
1142 | 0 | } |
1143 | 0 | } Unexecuted instantiation: rav1e::deblock::filter_v_edge::<u16> Unexecuted instantiation: rav1e::deblock::filter_v_edge::<u8> |
1144 | | |
1145 | 0 | fn sse_v_edge<T: Pixel>( |
1146 | 0 | blocks: &TileBlocks, bo: TileBlockOffset, rec_plane: &PlaneRegion<T>, |
1147 | 0 | src_plane: &PlaneRegion<T>, tally: &mut [i64; MAX_LOOP_FILTER + 2], |
1148 | 0 | pli: usize, bd: usize, xdec: usize, ydec: usize, |
1149 | 0 | ) { |
1150 | 0 | let block = &blocks[bo]; |
1151 | 0 | let txsize = if pli == 0 { |
1152 | 0 | block.txsize |
1153 | | } else { |
1154 | 0 | block.bsize.largest_chroma_tx_size(xdec, ydec) |
1155 | | }; |
1156 | 0 | let tx_edge = (bo.0.x >> xdec) & (txsize.width_mi() - 1) == 0; |
1157 | 0 | if tx_edge { |
1158 | 0 | let prev_block = deblock_left(blocks, bo, rec_plane); |
1159 | 0 | let block_edge = bo.0.x & (block.n4_w as usize - 1) == 0; |
1160 | 0 | let filter_size = |
1161 | 0 | deblock_size(block, prev_block, rec_plane, pli, true, block_edge); |
1162 | 0 | if filter_size > 0 { |
1163 | 0 | let po = bo.plane_offset(rec_plane.plane_cfg); // rec and src have identical subsampling |
1164 | 0 | let rec_region = rec_plane.subregion(Area::Rect { |
1165 | 0 | x: po.x - (filter_size >> 1) as isize, |
1166 | 0 | y: po.y, |
1167 | 0 | width: filter_size, |
1168 | 0 | height: 4, |
1169 | 0 | }); |
1170 | 0 | let src_region = src_plane.subregion(Area::Rect { |
1171 | 0 | x: po.x - (filter_size >> 1) as isize, |
1172 | 0 | y: po.y, |
1173 | 0 | width: filter_size, |
1174 | 0 | height: 4, |
1175 | 0 | }); |
1176 | 0 | match filter_size { |
1177 | 0 | 4 => { |
1178 | 0 | sse_size4(&rec_region, &src_region, tally, false, bd); |
1179 | 0 | } |
1180 | 0 | 6 => { |
1181 | 0 | sse_size6(&rec_region, &src_region, tally, false, bd); |
1182 | 0 | } |
1183 | 0 | 8 => { |
1184 | 0 | sse_size8(&rec_region, &src_region, tally, false, bd); |
1185 | 0 | } |
1186 | 0 | 14 => { |
1187 | 0 | sse_size14(&rec_region, &src_region, tally, false, bd); |
1188 | 0 | } |
1189 | 0 | _ => unreachable!(), |
1190 | | } |
1191 | 0 | } |
1192 | 0 | } |
1193 | 0 | } Unexecuted instantiation: rav1e::deblock::sse_v_edge::<u16> Unexecuted instantiation: rav1e::deblock::sse_v_edge::<u8> |
1194 | | |
1195 | 0 | fn filter_h_edge<T: Pixel>( |
1196 | 0 | deblock: &DeblockState, blocks: &TileBlocks, bo: TileBlockOffset, |
1197 | 0 | p: &mut PlaneRegionMut<T>, pli: usize, bd: usize, xdec: usize, ydec: usize, |
1198 | 0 | ) { |
1199 | 0 | let block = &blocks[bo]; |
1200 | 0 | let txsize = if pli == 0 { |
1201 | 0 | block.txsize |
1202 | | } else { |
1203 | 0 | block.bsize.largest_chroma_tx_size(xdec, ydec) |
1204 | | }; |
1205 | 0 | let tx_edge = (bo.0.y >> ydec) & (txsize.height_mi() - 1) == 0; |
1206 | 0 | if tx_edge { |
1207 | 0 | let prev_block = deblock_up(blocks, bo, &p.as_const()); |
1208 | 0 | let block_edge = bo.0.y & (block.n4_h as usize - 1) == 0; |
1209 | 0 | let filter_size = |
1210 | 0 | deblock_size(block, prev_block, &p.as_const(), pli, false, block_edge); |
1211 | 0 | if filter_size > 0 { |
1212 | 0 | let level = deblock_level(deblock, block, prev_block, pli, false); |
1213 | 0 | if level > 0 { |
1214 | 0 | let po = bo.plane_offset(p.plane_cfg); |
1215 | 0 | let mut plane_region = p.subregion_mut(Area::Rect { |
1216 | 0 | x: po.x, |
1217 | 0 | y: po.y - (filter_size >> 1) as isize, |
1218 | 0 | width: 4, |
1219 | 0 | height: filter_size, |
1220 | 0 | }); |
1221 | 0 | match filter_size { |
1222 | 0 | 4 => { |
1223 | 0 | deblock_h_size4(&mut plane_region, level, bd); |
1224 | 0 | } |
1225 | 0 | 6 => { |
1226 | 0 | deblock_h_size6(&mut plane_region, level, bd); |
1227 | 0 | } |
1228 | 0 | 8 => { |
1229 | 0 | deblock_h_size8(&mut plane_region, level, bd); |
1230 | 0 | } |
1231 | 0 | 14 => { |
1232 | 0 | deblock_h_size14(&mut plane_region, level, bd); |
1233 | 0 | } |
1234 | 0 | _ => unreachable!(), |
1235 | | } |
1236 | 0 | } |
1237 | 0 | } |
1238 | 0 | } |
1239 | 0 | } Unexecuted instantiation: rav1e::deblock::filter_h_edge::<u16> Unexecuted instantiation: rav1e::deblock::filter_h_edge::<u8> |
1240 | | |
1241 | 0 | fn sse_h_edge<T: Pixel>( |
1242 | 0 | blocks: &TileBlocks, bo: TileBlockOffset, rec_plane: &PlaneRegion<T>, |
1243 | 0 | src_plane: &PlaneRegion<T>, tally: &mut [i64; MAX_LOOP_FILTER + 2], |
1244 | 0 | pli: usize, bd: usize, xdec: usize, ydec: usize, |
1245 | 0 | ) { |
1246 | 0 | let block = &blocks[bo]; |
1247 | 0 | let txsize = if pli == 0 { |
1248 | 0 | block.txsize |
1249 | | } else { |
1250 | 0 | block.bsize.largest_chroma_tx_size(xdec, ydec) |
1251 | | }; |
1252 | 0 | let tx_edge = (bo.0.y >> ydec) & (txsize.height_mi() - 1) == 0; |
1253 | 0 | if tx_edge { |
1254 | 0 | let prev_block = deblock_up(blocks, bo, rec_plane); |
1255 | 0 | let block_edge = bo.0.y & (block.n4_h as usize - 1) == 0; |
1256 | 0 | let filter_size = |
1257 | 0 | deblock_size(block, prev_block, rec_plane, pli, true, block_edge); |
1258 | 0 | if filter_size > 0 { |
1259 | 0 | let po = bo.plane_offset(rec_plane.plane_cfg); // rec and src have identical subsampling |
1260 | 0 | let rec_region = rec_plane.subregion(Area::Rect { |
1261 | 0 | x: po.x, |
1262 | 0 | y: po.y - (filter_size >> 1) as isize, |
1263 | 0 | width: 4, |
1264 | 0 | height: filter_size, |
1265 | 0 | }); |
1266 | 0 | let src_region = src_plane.subregion(Area::Rect { |
1267 | 0 | x: po.x, |
1268 | 0 | y: po.y - (filter_size >> 1) as isize, |
1269 | 0 | width: 4, |
1270 | 0 | height: filter_size, |
1271 | 0 | }); |
1272 | | |
1273 | 0 | match filter_size { |
1274 | 0 | 4 => { |
1275 | 0 | sse_size4(&rec_region, &src_region, tally, true, bd); |
1276 | 0 | } |
1277 | 0 | 6 => { |
1278 | 0 | sse_size6(&rec_region, &src_region, tally, true, bd); |
1279 | 0 | } |
1280 | 0 | 8 => { |
1281 | 0 | sse_size8(&rec_region, &src_region, tally, true, bd); |
1282 | 0 | } |
1283 | 0 | 14 => { |
1284 | 0 | sse_size14(&rec_region, &src_region, tally, true, bd); |
1285 | 0 | } |
1286 | 0 | _ => unreachable!(), |
1287 | | } |
1288 | 0 | } |
1289 | 0 | } |
1290 | 0 | } Unexecuted instantiation: rav1e::deblock::sse_h_edge::<u16> Unexecuted instantiation: rav1e::deblock::sse_h_edge::<u8> |
1291 | | |
1292 | | // Deblocks all edges, vertical and horizontal, in a single plane |
1293 | | #[profiling::function] |
1294 | | pub fn deblock_plane<T: Pixel>( |
1295 | | deblock: &DeblockState, p: &mut PlaneRegionMut<T>, pli: usize, |
1296 | | blocks: &TileBlocks, crop_w: usize, crop_h: usize, bd: usize, |
1297 | | ) { |
1298 | | let xdec = p.plane_cfg.xdec; |
1299 | | let ydec = p.plane_cfg.ydec; |
1300 | | assert!(xdec <= 1 && ydec <= 1); |
1301 | | |
1302 | | match pli { |
1303 | | 0 => { |
1304 | | if deblock.levels[0] == 0 && deblock.levels[1] == 0 { |
1305 | | return; |
1306 | | } |
1307 | | } |
1308 | | 1 => { |
1309 | | if deblock.levels[2] == 0 { |
1310 | | return; |
1311 | | } |
1312 | | } |
1313 | | 2 => { |
1314 | | if deblock.levels[3] == 0 { |
1315 | | return; |
1316 | | } |
1317 | | } |
1318 | | _ => return, |
1319 | | } |
1320 | | |
1321 | | let rect = p.rect(); |
1322 | | let cols = (cmp::min( |
1323 | | blocks.cols(), |
1324 | | ((crop_w - rect.x as usize) + MI_SIZE - 1) >> MI_SIZE_LOG2, |
1325 | | ) + (1 << xdec >> 1)) |
1326 | | >> xdec |
1327 | | << xdec; // Clippy can go suck an egg |
1328 | | let rows = (cmp::min( |
1329 | | blocks.rows(), |
1330 | | ((crop_h - rect.y as usize) + MI_SIZE - 1) >> MI_SIZE_LOG2, |
1331 | | ) + (1 << ydec >> 1)) |
1332 | | >> ydec |
1333 | | << ydec; // Clippy can go suck an egg |
1334 | | |
1335 | | // vertical edge filtering leads horizontal by one full MI-sized |
1336 | | // row (and horizontal filtering doesn't happen along the upper |
1337 | | // edge). Unroll to avoid corner-cases. |
1338 | | if rows > 0 { |
1339 | | for x in (1 << xdec..cols).step_by(1 << xdec) { |
1340 | | filter_v_edge( |
1341 | | deblock, |
1342 | | blocks, |
1343 | | TileBlockOffset(BlockOffset { x, y: 0 }), |
1344 | | p, |
1345 | | pli, |
1346 | | bd, |
1347 | | xdec, |
1348 | | ydec, |
1349 | | ); |
1350 | | } |
1351 | | if rows > 1 << ydec { |
1352 | | for x in (1 << xdec..cols).step_by(1 << xdec) { |
1353 | | filter_v_edge( |
1354 | | deblock, |
1355 | | blocks, |
1356 | | TileBlockOffset(BlockOffset { x, y: 1 << ydec }), |
1357 | | p, |
1358 | | pli, |
1359 | | bd, |
1360 | | xdec, |
1361 | | ydec, |
1362 | | ); |
1363 | | } |
1364 | | } |
1365 | | } |
1366 | | |
1367 | | // filter rows where vertical and horizontal edge filtering both |
1368 | | // happen (horizontal edge filtering lags vertical by one row). |
1369 | | for y in ((2 << ydec)..rows).step_by(1 << ydec) { |
1370 | | // Check for vertical edge at first MI block boundary on this row |
1371 | | if cols > 1 << xdec { |
1372 | | filter_v_edge( |
1373 | | deblock, |
1374 | | blocks, |
1375 | | TileBlockOffset(BlockOffset { x: 1 << xdec, y }), |
1376 | | p, |
1377 | | pli, |
1378 | | bd, |
1379 | | xdec, |
1380 | | ydec, |
1381 | | ); |
1382 | | } |
1383 | | // run the rest of the row with both vertical and horizontal edge filtering. |
1384 | | // Horizontal lags vertical edge by one row and two columns. |
1385 | | for x in (2 << xdec..cols).step_by(1 << xdec) { |
1386 | | filter_v_edge( |
1387 | | deblock, |
1388 | | blocks, |
1389 | | TileBlockOffset(BlockOffset { x, y }), |
1390 | | p, |
1391 | | pli, |
1392 | | bd, |
1393 | | xdec, |
1394 | | ydec, |
1395 | | ); |
1396 | | filter_h_edge( |
1397 | | deblock, |
1398 | | blocks, |
1399 | | TileBlockOffset(BlockOffset { |
1400 | | x: x - (2 << xdec), |
1401 | | y: y - (1 << ydec), |
1402 | | }), |
1403 | | p, |
1404 | | pli, |
1405 | | bd, |
1406 | | xdec, |
1407 | | ydec, |
1408 | | ); |
1409 | | } |
1410 | | // ..and the last two horizontal edges for the row |
1411 | | if cols >= 2 << xdec { |
1412 | | filter_h_edge( |
1413 | | deblock, |
1414 | | blocks, |
1415 | | TileBlockOffset(BlockOffset { |
1416 | | x: cols - (2 << xdec), |
1417 | | y: y - (1 << ydec), |
1418 | | }), |
1419 | | p, |
1420 | | pli, |
1421 | | bd, |
1422 | | xdec, |
1423 | | ydec, |
1424 | | ); |
1425 | | } |
1426 | | if cols >= 1 << xdec { |
1427 | | filter_h_edge( |
1428 | | deblock, |
1429 | | blocks, |
1430 | | TileBlockOffset(BlockOffset { |
1431 | | x: cols - (1 << xdec), |
1432 | | y: y - (1 << ydec), |
1433 | | }), |
1434 | | p, |
1435 | | pli, |
1436 | | bd, |
1437 | | xdec, |
1438 | | ydec, |
1439 | | ); |
1440 | | } |
1441 | | } |
1442 | | |
1443 | | // Last horizontal row, vertical is already complete |
1444 | | if rows > 1 << ydec { |
1445 | | for x in (0..cols).step_by(1 << xdec) { |
1446 | | filter_h_edge( |
1447 | | deblock, |
1448 | | blocks, |
1449 | | TileBlockOffset(BlockOffset { x, y: rows - (1 << ydec) }), |
1450 | | p, |
1451 | | pli, |
1452 | | bd, |
1453 | | xdec, |
1454 | | ydec, |
1455 | | ); |
1456 | | } |
1457 | | } |
1458 | | } |
1459 | | |
1460 | | // sse count of all edges in a single plane, accumulates into vertical and horizontal counts |
1461 | 0 | fn sse_plane<T: Pixel>( |
1462 | 0 | rec: &PlaneRegion<T>, src: &PlaneRegion<T>, |
1463 | 0 | v_sse: &mut [i64; MAX_LOOP_FILTER + 2], |
1464 | 0 | h_sse: &mut [i64; MAX_LOOP_FILTER + 2], pli: usize, blocks: &TileBlocks, |
1465 | 0 | crop_w: usize, crop_h: usize, bd: usize, |
1466 | 0 | ) { |
1467 | 0 | let xdec = rec.plane_cfg.xdec; |
1468 | 0 | let ydec = rec.plane_cfg.ydec; |
1469 | 0 | assert!(xdec <= 1 && ydec <= 1); |
1470 | 0 | let rect = rec.rect(); |
1471 | 0 | let cols = (cmp::min( |
1472 | 0 | blocks.cols(), |
1473 | 0 | (crop_w - rect.x as usize + MI_SIZE - 1) >> MI_SIZE_LOG2, |
1474 | 0 | ) + (1 << xdec >> 1)) |
1475 | 0 | >> xdec |
1476 | 0 | << xdec; // Clippy can go suck an egg |
1477 | 0 | let rows = (cmp::min( |
1478 | 0 | blocks.rows(), |
1479 | 0 | (crop_h - rect.y as usize + MI_SIZE - 1) >> MI_SIZE_LOG2, |
1480 | 0 | ) + (1 << ydec >> 1)) |
1481 | 0 | >> ydec |
1482 | 0 | << ydec; // Clippy can go suck an egg |
1483 | | |
1484 | | // No horizontal edge filtering along top of frame |
1485 | 0 | for x in (1 << xdec..cols).step_by(1 << xdec) { |
1486 | 0 | sse_v_edge( |
1487 | 0 | blocks, |
1488 | 0 | TileBlockOffset(BlockOffset { x, y: 0 }), |
1489 | 0 | rec, |
1490 | 0 | src, |
1491 | 0 | v_sse, |
1492 | 0 | pli, |
1493 | 0 | bd, |
1494 | 0 | xdec, |
1495 | 0 | ydec, |
1496 | 0 | ); |
1497 | 0 | } |
1498 | | |
1499 | | // Unlike actual filtering, we're counting horizontal and vertical |
1500 | | // as separable cases. No need to lag the horizontal processing |
1501 | | // behind vertical. |
1502 | 0 | for y in (1 << ydec..rows).step_by(1 << ydec) { |
1503 | | // No vertical filtering along left edge of frame |
1504 | 0 | sse_h_edge( |
1505 | 0 | blocks, |
1506 | 0 | TileBlockOffset(BlockOffset { x: 0, y }), |
1507 | 0 | rec, |
1508 | 0 | src, |
1509 | 0 | h_sse, |
1510 | 0 | pli, |
1511 | 0 | bd, |
1512 | 0 | xdec, |
1513 | 0 | ydec, |
1514 | | ); |
1515 | 0 | for x in (1 << xdec..cols).step_by(1 << xdec) { |
1516 | 0 | sse_v_edge( |
1517 | 0 | blocks, |
1518 | 0 | TileBlockOffset(BlockOffset { x, y }), |
1519 | 0 | rec, |
1520 | 0 | src, |
1521 | 0 | v_sse, |
1522 | 0 | pli, |
1523 | 0 | bd, |
1524 | 0 | xdec, |
1525 | 0 | ydec, |
1526 | 0 | ); |
1527 | 0 | sse_h_edge( |
1528 | 0 | blocks, |
1529 | 0 | TileBlockOffset(BlockOffset { x, y }), |
1530 | 0 | rec, |
1531 | 0 | src, |
1532 | 0 | h_sse, |
1533 | 0 | pli, |
1534 | 0 | bd, |
1535 | 0 | xdec, |
1536 | 0 | ydec, |
1537 | 0 | ); |
1538 | 0 | } |
1539 | | } |
1540 | 0 | } Unexecuted instantiation: rav1e::deblock::sse_plane::<u16> Unexecuted instantiation: rav1e::deblock::sse_plane::<u8> |
1541 | | |
1542 | | // Deblocks all edges in all planes of a frame |
1543 | | #[profiling::function] |
1544 | | pub fn deblock_filter_frame<T: Pixel>( |
1545 | | deblock: &DeblockState, tile: &mut TileMut<T>, blocks: &TileBlocks, |
1546 | | crop_w: usize, crop_h: usize, bd: usize, planes: usize, |
1547 | | ) { |
1548 | 0 | tile.planes[..planes].par_iter_mut().enumerate().for_each(|(pli, plane)| { |
1549 | 0 | deblock_plane(deblock, plane, pli, blocks, crop_w, crop_h, bd); |
1550 | 0 | }); Unexecuted instantiation: rav1e::deblock::deblock_filter_frame::<u16>::{closure#0}Unexecuted instantiation: rav1e::deblock::deblock_filter_frame::<u8>::{closure#0} |
1551 | | } |
1552 | | |
1553 | 0 | fn sse_optimize<T: Pixel>( |
1554 | 0 | rec: &Tile<T>, input: &Tile<T>, blocks: &TileBlocks, crop_w: usize, |
1555 | 0 | crop_h: usize, bd: usize, monochrome: bool, |
1556 | 0 | ) -> [u8; 4] { |
1557 | | // i64 allows us to accumulate a total of ~ 35 bits worth of pixels |
1558 | 0 | assert!( |
1559 | 0 | ILog::ilog(input.planes[0].plane_cfg.width) |
1560 | 0 | + ILog::ilog(input.planes[0].plane_cfg.height) |
1561 | 0 | < 35 |
1562 | | ); |
1563 | 0 | let mut level = [0; 4]; |
1564 | 0 | let planes = if monochrome { 1 } else { MAX_PLANES }; |
1565 | | |
1566 | 0 | for pli in 0..planes { |
1567 | 0 | let mut v_tally: [i64; MAX_LOOP_FILTER + 2] = [0; MAX_LOOP_FILTER + 2]; |
1568 | 0 | let mut h_tally: [i64; MAX_LOOP_FILTER + 2] = [0; MAX_LOOP_FILTER + 2]; |
1569 | | |
1570 | 0 | sse_plane( |
1571 | 0 | &rec.planes[pli], |
1572 | 0 | &input.planes[pli], |
1573 | 0 | &mut v_tally, |
1574 | 0 | &mut h_tally, |
1575 | 0 | pli, |
1576 | 0 | blocks, |
1577 | 0 | crop_w, |
1578 | 0 | crop_h, |
1579 | 0 | bd, |
1580 | | ); |
1581 | | |
1582 | 0 | for i in 1..=MAX_LOOP_FILTER { |
1583 | 0 | v_tally[i] += v_tally[i - 1]; |
1584 | 0 | h_tally[i] += h_tally[i - 1]; |
1585 | 0 | } |
1586 | | |
1587 | 0 | match pli { |
1588 | | 0 => { |
1589 | 0 | let mut best_v = 999; |
1590 | 0 | let mut best_h = 999; |
1591 | 0 | for i in 0..=MAX_LOOP_FILTER { |
1592 | 0 | if best_v == 999 || v_tally[best_v] > v_tally[i] { |
1593 | 0 | best_v = i; |
1594 | 0 | }; |
1595 | 0 | if best_h == 999 || h_tally[best_h] > h_tally[i] { |
1596 | 0 | best_h = i; |
1597 | 0 | }; |
1598 | | } |
1599 | 0 | level[0] = best_v as u8; |
1600 | 0 | level[1] = best_h as u8; |
1601 | | } |
1602 | | 1 | 2 => { |
1603 | 0 | let mut best = 999; |
1604 | 0 | for i in 0..=MAX_LOOP_FILTER { |
1605 | 0 | if best == 999 |
1606 | 0 | || v_tally[best] + h_tally[best] > v_tally[i] + h_tally[i] |
1607 | 0 | { |
1608 | 0 | best = i; |
1609 | 0 | }; |
1610 | | } |
1611 | 0 | level[pli + 1] = best as u8; |
1612 | | } |
1613 | 0 | _ => unreachable!(), |
1614 | | } |
1615 | | } |
1616 | 0 | level |
1617 | 0 | } Unexecuted instantiation: rav1e::deblock::sse_optimize::<u16> Unexecuted instantiation: rav1e::deblock::sse_optimize::<u8> |
1618 | | |
1619 | | #[profiling::function] |
1620 | | pub fn deblock_filter_optimize<T: Pixel, U: Pixel>( |
1621 | | fi: &FrameInvariants<T>, rec: &Tile<U>, input: &Tile<U>, |
1622 | | blocks: &TileBlocks, crop_w: usize, crop_h: usize, |
1623 | | ) -> [u8; 4] { |
1624 | | if fi.config.speed_settings.fast_deblock { |
1625 | | let q = ac_q(fi.base_q_idx, 0, fi.sequence.bit_depth).get() as i32; |
1626 | | let level = clamp( |
1627 | | match fi.sequence.bit_depth { |
1628 | | 8 => { |
1629 | | if fi.frame_type == FrameType::KEY { |
1630 | | (q * 17563 - 421_574 + (1 << 18 >> 1)) >> 18 |
1631 | | } else { |
1632 | | (q * 6017 + 650_707 + (1 << 18 >> 1)) >> 18 |
1633 | | } |
1634 | | } |
1635 | | 10 => { |
1636 | | if fi.frame_type == FrameType::KEY { |
1637 | | ((q * 20723 + 4_060_632 + (1 << 20 >> 1)) >> 20) - 4 |
1638 | | } else { |
1639 | | (q * 20723 + 4_060_632 + (1 << 20 >> 1)) >> 20 |
1640 | | } |
1641 | | } |
1642 | | 12 => { |
1643 | | if fi.frame_type == FrameType::KEY { |
1644 | | ((q * 20723 + 16_242_526 + (1 << 22 >> 1)) >> 22) - 4 |
1645 | | } else { |
1646 | | (q * 20723 + 16_242_526 + (1 << 22 >> 1)) >> 22 |
1647 | | } |
1648 | | } |
1649 | | _ => unreachable!(), |
1650 | | }, |
1651 | | 0, |
1652 | | MAX_LOOP_FILTER as i32, |
1653 | | ) as u8; |
1654 | | [level; 4] |
1655 | | } else { |
1656 | | // Deblocking happens in 4x4 (luma) units; luma x,y are clipped to |
1657 | | // the *crop frame* of the entire frame by 4x4 block. |
1658 | | sse_optimize( |
1659 | | rec, |
1660 | | input, |
1661 | | blocks, |
1662 | | crop_w, |
1663 | | crop_h, |
1664 | | fi.sequence.bit_depth, |
1665 | | fi.sequence.chroma_sampling == Cs400, |
1666 | | ) |
1667 | | } |
1668 | | } |