/rust/registry/src/index.crates.io-1949cf8c6b5b557f/av-scenechange-0.14.1/src/data/mc.rs
Line | Count | Source |
1 | | #[cfg(asm_neon)] |
2 | | mod simd_neon; |
3 | | #[cfg(asm_x86_64)] |
4 | | mod simd_x86; |
5 | | |
6 | | use v_frame::{pixel::Pixel, plane::PlaneSlice}; |
7 | | |
8 | | #[cfg(not(any(asm_x86_64, asm_neon)))] |
9 | | use self::rust::*; |
10 | | #[cfg(asm_neon)] |
11 | | use self::simd_neon::*; |
12 | | #[cfg(asm_x86_64)] |
13 | | use self::simd_x86::*; |
14 | | use crate::{cpu::CpuFeatureLevel, data::plane::PlaneRegionMut}; |
15 | | |
16 | | #[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd)] |
17 | | #[allow(clippy::upper_case_acronyms)] |
18 | | #[allow(dead_code)] |
19 | | pub enum FilterMode { |
20 | | REGULAR = 0, |
21 | | SMOOTH = 1, |
22 | | SHARP = 2, |
23 | | BILINEAR = 3, |
24 | | SWITCHABLE = 4, |
25 | | } |
26 | | |
27 | | pub const SUBPEL_FILTER_SIZE: usize = 8; |
28 | | |
29 | | const SUBPEL_FILTERS: [[[i32; SUBPEL_FILTER_SIZE]; 16]; 6] = [ |
30 | | [ |
31 | | [0, 0, 0, 128, 0, 0, 0, 0], |
32 | | [0, 2, -6, 126, 8, -2, 0, 0], |
33 | | [0, 2, -10, 122, 18, -4, 0, 0], |
34 | | [0, 2, -12, 116, 28, -8, 2, 0], |
35 | | [0, 2, -14, 110, 38, -10, 2, 0], |
36 | | [0, 2, -14, 102, 48, -12, 2, 0], |
37 | | [0, 2, -16, 94, 58, -12, 2, 0], |
38 | | [0, 2, -14, 84, 66, -12, 2, 0], |
39 | | [0, 2, -14, 76, 76, -14, 2, 0], |
40 | | [0, 2, -12, 66, 84, -14, 2, 0], |
41 | | [0, 2, -12, 58, 94, -16, 2, 0], |
42 | | [0, 2, -12, 48, 102, -14, 2, 0], |
43 | | [0, 2, -10, 38, 110, -14, 2, 0], |
44 | | [0, 2, -8, 28, 116, -12, 2, 0], |
45 | | [0, 0, -4, 18, 122, -10, 2, 0], |
46 | | [0, 0, -2, 8, 126, -6, 2, 0], |
47 | | ], |
48 | | [ |
49 | | [0, 0, 0, 128, 0, 0, 0, 0], |
50 | | [0, 2, 28, 62, 34, 2, 0, 0], |
51 | | [0, 0, 26, 62, 36, 4, 0, 0], |
52 | | [0, 0, 22, 62, 40, 4, 0, 0], |
53 | | [0, 0, 20, 60, 42, 6, 0, 0], |
54 | | [0, 0, 18, 58, 44, 8, 0, 0], |
55 | | [0, 0, 16, 56, 46, 10, 0, 0], |
56 | | [0, -2, 16, 54, 48, 12, 0, 0], |
57 | | [0, -2, 14, 52, 52, 14, -2, 0], |
58 | | [0, 0, 12, 48, 54, 16, -2, 0], |
59 | | [0, 0, 10, 46, 56, 16, 0, 0], |
60 | | [0, 0, 8, 44, 58, 18, 0, 0], |
61 | | [0, 0, 6, 42, 60, 20, 0, 0], |
62 | | [0, 0, 4, 40, 62, 22, 0, 0], |
63 | | [0, 0, 4, 36, 62, 26, 0, 0], |
64 | | [0, 0, 2, 34, 62, 28, 2, 0], |
65 | | ], |
66 | | [ |
67 | | [0, 0, 0, 128, 0, 0, 0, 0], |
68 | | [-2, 2, -6, 126, 8, -2, 2, 0], |
69 | | [-2, 6, -12, 124, 16, -6, 4, -2], |
70 | | [-2, 8, -18, 120, 26, -10, 6, -2], |
71 | | [-4, 10, -22, 116, 38, -14, 6, -2], |
72 | | [-4, 10, -22, 108, 48, -18, 8, -2], |
73 | | [-4, 10, -24, 100, 60, -20, 8, -2], |
74 | | [-4, 10, -24, 90, 70, -22, 10, -2], |
75 | | [-4, 12, -24, 80, 80, -24, 12, -4], |
76 | | [-2, 10, -22, 70, 90, -24, 10, -4], |
77 | | [-2, 8, -20, 60, 100, -24, 10, -4], |
78 | | [-2, 8, -18, 48, 108, -22, 10, -4], |
79 | | [-2, 6, -14, 38, 116, -22, 10, -4], |
80 | | [-2, 6, -10, 26, 120, -18, 8, -2], |
81 | | [-2, 4, -6, 16, 124, -12, 6, -2], |
82 | | [0, 2, -2, 8, 126, -6, 2, -2], |
83 | | ], |
84 | | [ |
85 | | [0, 0, 0, 128, 0, 0, 0, 0], |
86 | | [0, 0, 0, 120, 8, 0, 0, 0], |
87 | | [0, 0, 0, 112, 16, 0, 0, 0], |
88 | | [0, 0, 0, 104, 24, 0, 0, 0], |
89 | | [0, 0, 0, 96, 32, 0, 0, 0], |
90 | | [0, 0, 0, 88, 40, 0, 0, 0], |
91 | | [0, 0, 0, 80, 48, 0, 0, 0], |
92 | | [0, 0, 0, 72, 56, 0, 0, 0], |
93 | | [0, 0, 0, 64, 64, 0, 0, 0], |
94 | | [0, 0, 0, 56, 72, 0, 0, 0], |
95 | | [0, 0, 0, 48, 80, 0, 0, 0], |
96 | | [0, 0, 0, 40, 88, 0, 0, 0], |
97 | | [0, 0, 0, 32, 96, 0, 0, 0], |
98 | | [0, 0, 0, 24, 104, 0, 0, 0], |
99 | | [0, 0, 0, 16, 112, 0, 0, 0], |
100 | | [0, 0, 0, 8, 120, 0, 0, 0], |
101 | | ], |
102 | | [ |
103 | | [0, 0, 0, 128, 0, 0, 0, 0], |
104 | | [0, 0, -4, 126, 8, -2, 0, 0], |
105 | | [0, 0, -8, 122, 18, -4, 0, 0], |
106 | | [0, 0, -10, 116, 28, -6, 0, 0], |
107 | | [0, 0, -12, 110, 38, -8, 0, 0], |
108 | | [0, 0, -12, 102, 48, -10, 0, 0], |
109 | | [0, 0, -14, 94, 58, -10, 0, 0], |
110 | | [0, 0, -12, 84, 66, -10, 0, 0], |
111 | | [0, 0, -12, 76, 76, -12, 0, 0], |
112 | | [0, 0, -10, 66, 84, -12, 0, 0], |
113 | | [0, 0, -10, 58, 94, -14, 0, 0], |
114 | | [0, 0, -10, 48, 102, -12, 0, 0], |
115 | | [0, 0, -8, 38, 110, -12, 0, 0], |
116 | | [0, 0, -6, 28, 116, -10, 0, 0], |
117 | | [0, 0, -4, 18, 122, -8, 0, 0], |
118 | | [0, 0, -2, 8, 126, -4, 0, 0], |
119 | | ], |
120 | | [ |
121 | | [0, 0, 0, 128, 0, 0, 0, 0], |
122 | | [0, 0, 30, 62, 34, 2, 0, 0], |
123 | | [0, 0, 26, 62, 36, 4, 0, 0], |
124 | | [0, 0, 22, 62, 40, 4, 0, 0], |
125 | | [0, 0, 20, 60, 42, 6, 0, 0], |
126 | | [0, 0, 18, 58, 44, 8, 0, 0], |
127 | | [0, 0, 16, 56, 46, 10, 0, 0], |
128 | | [0, 0, 14, 54, 48, 12, 0, 0], |
129 | | [0, 0, 12, 52, 52, 12, 0, 0], |
130 | | [0, 0, 12, 48, 54, 14, 0, 0], |
131 | | [0, 0, 10, 46, 56, 16, 0, 0], |
132 | | [0, 0, 8, 44, 58, 18, 0, 0], |
133 | | [0, 0, 6, 42, 60, 20, 0, 0], |
134 | | [0, 0, 4, 40, 62, 22, 0, 0], |
135 | | [0, 0, 4, 36, 62, 26, 0, 0], |
136 | | [0, 0, 2, 34, 62, 30, 0, 0], |
137 | | ], |
138 | | ]; |
139 | | |
140 | | mod rust { |
141 | | use num_traits::AsPrimitive; |
142 | | use v_frame::{math::round_shift, pixel::Pixel, plane::PlaneSlice}; |
143 | | |
144 | | use crate::{ |
145 | | cpu::CpuFeatureLevel, |
146 | | data::{ |
147 | | mc::{FilterMode, SUBPEL_FILTERS, SUBPEL_FILTER_SIZE}, |
148 | | plane::PlaneRegionMut, |
149 | | }, |
150 | | }; |
151 | | |
152 | | #[cfg_attr( |
153 | | all(asm_x86_64, any(target_feature = "ssse3", target_feature = "avx2")), |
154 | | cold |
155 | | )] |
156 | | #[cfg_attr(asm_neon, cold)] |
157 | | #[allow(clippy::too_many_arguments)] |
158 | 0 | pub fn put_8tap_internal<T: Pixel>( |
159 | 0 | dst: &mut PlaneRegionMut<'_, T>, |
160 | 0 | src: PlaneSlice<'_, T>, |
161 | 0 | width: usize, |
162 | 0 | height: usize, |
163 | 0 | col_frac: i32, |
164 | 0 | row_frac: i32, |
165 | 0 | bit_depth: usize, |
166 | 0 | _cpu: CpuFeatureLevel, |
167 | 0 | ) { |
168 | | // The assembly only supports even heights and valid uncropped widths |
169 | 0 | assert_eq!(height & 1, 0); |
170 | 0 | assert!(width.is_power_of_two() && (2..=128).contains(&width)); |
171 | | |
172 | 0 | let ref_stride = src.plane.cfg.stride; |
173 | 0 | let y_filter = get_filter(row_frac, height); |
174 | 0 | let x_filter = get_filter(col_frac, width); |
175 | 0 | let max_sample_val = (1 << bit_depth) - 1; |
176 | 0 | let intermediate_bits = 4 - if bit_depth == 12 { 2 } else { 0 }; |
177 | 0 | match (col_frac, row_frac) { |
178 | | (0, 0) => { |
179 | 0 | for r in 0..height { |
180 | 0 | let src_slice = &src[r]; |
181 | 0 | let dst_slice = &mut dst[r]; |
182 | 0 | dst_slice[..width].copy_from_slice(&src_slice[..width]); |
183 | 0 | } |
184 | | } |
185 | | (0, _) => { |
186 | 0 | let offset_slice = src.go_up(3); |
187 | 0 | for r in 0..height { |
188 | 0 | let src_slice = &offset_slice[r]; |
189 | 0 | let dst_slice = &mut dst[r]; |
190 | 0 | for c in 0..width { |
191 | 0 | dst_slice[c] = T::cast_from( |
192 | 0 | round_shift( |
193 | 0 | // SAFETY: We pass this a raw pointer, but it's created from a |
194 | 0 | // checked slice, so we are safe. |
195 | 0 | unsafe { |
196 | 0 | run_filter(src_slice[c..].as_ptr(), ref_stride, y_filter) |
197 | 0 | }, |
198 | 0 | 7, |
199 | 0 | ) |
200 | 0 | .clamp(0, max_sample_val), |
201 | 0 | ); |
202 | 0 | } |
203 | | } |
204 | | } |
205 | | (_, 0) => { |
206 | 0 | let offset_slice = src.go_left(3); |
207 | 0 | for r in 0..height { |
208 | 0 | let src_slice = &offset_slice[r]; |
209 | 0 | let dst_slice = &mut dst[r]; |
210 | 0 | for c in 0..width { |
211 | 0 | dst_slice[c] = T::cast_from( |
212 | 0 | round_shift( |
213 | 0 | round_shift( |
214 | 0 | // SAFETY: We pass this a raw pointer, but it's created from a |
215 | 0 | // checked slice, so we are safe. |
216 | 0 | unsafe { run_filter(src_slice[c..].as_ptr(), 1, x_filter) }, |
217 | 0 | 7 - intermediate_bits, |
218 | 0 | ), |
219 | 0 | intermediate_bits, |
220 | 0 | ) |
221 | 0 | .clamp(0, max_sample_val), |
222 | 0 | ); |
223 | 0 | } |
224 | | } |
225 | | } |
226 | | (_, _) => { |
227 | 0 | let mut intermediate: [i16; 8 * (128 + 7)] = [0; 8 * (128 + 7)]; |
228 | | |
229 | 0 | let offset_slice = src.go_left(3).go_up(3); |
230 | 0 | for cg in (0..width).step_by(8) { |
231 | 0 | for r in 0..height + 7 { |
232 | 0 | let src_slice = &offset_slice[r]; |
233 | 0 | for c in cg..(cg + 8).min(width) { |
234 | 0 | intermediate[8 * r + (c - cg)] = round_shift( |
235 | 0 | // SAFETY: We pass this a raw pointer, but it's created from a |
236 | 0 | // checked slice, so we are safe. |
237 | 0 | unsafe { run_filter(src_slice[c..].as_ptr(), 1, x_filter) }, |
238 | 0 | 7 - intermediate_bits, |
239 | 0 | ) as i16; |
240 | 0 | } |
241 | | } |
242 | | |
243 | 0 | for r in 0..height { |
244 | 0 | let dst_slice = &mut dst[r]; |
245 | 0 | for c in cg..(cg + 8).min(width) { |
246 | 0 | dst_slice[c] = T::cast_from( |
247 | 0 | round_shift( |
248 | 0 | // SAFETY: We pass this a raw pointer, but it's created from a |
249 | 0 | // checked slice, so we are safe. |
250 | 0 | unsafe { |
251 | 0 | run_filter( |
252 | 0 | intermediate[8 * r + c - cg..].as_ptr(), |
253 | 0 | 8, |
254 | 0 | y_filter, |
255 | 0 | ) |
256 | 0 | }, |
257 | 0 | 7 + intermediate_bits, |
258 | 0 | ) |
259 | 0 | .clamp(0, max_sample_val), |
260 | 0 | ); |
261 | 0 | } |
262 | | } |
263 | | } |
264 | | } |
265 | | } |
266 | 0 | } Unexecuted instantiation: av_scenechange::data::mc::rust::put_8tap_internal::<u16> Unexecuted instantiation: av_scenechange::data::mc::rust::put_8tap_internal::<u8> Unexecuted instantiation: av_scenechange::data::mc::rust::put_8tap_internal::<_> |
267 | | |
268 | 0 | fn get_filter(frac: i32, length: usize) -> [i32; SUBPEL_FILTER_SIZE] { |
269 | | const MODE: FilterMode = FilterMode::REGULAR; |
270 | | |
271 | 0 | let filter_idx = if MODE == FilterMode::BILINEAR || length > 4 { |
272 | 0 | MODE as usize |
273 | | } else { |
274 | 0 | (MODE as usize).min(1) + 4 |
275 | | }; |
276 | 0 | SUBPEL_FILTERS[filter_idx][frac as usize] |
277 | 0 | } |
278 | | |
279 | 0 | unsafe fn run_filter<T: AsPrimitive<i32>>( |
280 | 0 | src: *const T, |
281 | 0 | stride: usize, |
282 | 0 | filter: [i32; 8], |
283 | 0 | ) -> i32 { |
284 | 0 | filter |
285 | 0 | .iter() |
286 | 0 | .enumerate() |
287 | 0 | .map(|(i, f)| { |
288 | 0 | let p = src.add(i * stride); |
289 | 0 | f * (*p).as_() |
290 | 0 | }) Unexecuted instantiation: av_scenechange::data::mc::rust::run_filter::<u16>::{closure#0}Unexecuted instantiation: av_scenechange::data::mc::rust::run_filter::<u8>::{closure#0}Unexecuted instantiation: av_scenechange::data::mc::rust::run_filter::<i16>::{closure#0}Unexecuted instantiation: av_scenechange::data::mc::rust::run_filter::<_>::{closure#0} |
291 | 0 | .sum::<i32>() |
292 | 0 | } Unexecuted instantiation: av_scenechange::data::mc::rust::run_filter::<u16> Unexecuted instantiation: av_scenechange::data::mc::rust::run_filter::<u8> Unexecuted instantiation: av_scenechange::data::mc::rust::run_filter::<i16> Unexecuted instantiation: av_scenechange::data::mc::rust::run_filter::<_> |
293 | | } |
294 | | |
295 | | #[allow(clippy::too_many_arguments)] |
296 | 0 | pub fn put_8tap<T: Pixel>( |
297 | 0 | dst: &mut PlaneRegionMut<'_, T>, |
298 | 0 | src: PlaneSlice<'_, T>, |
299 | 0 | width: usize, |
300 | 0 | height: usize, |
301 | 0 | col_frac: i32, |
302 | 0 | row_frac: i32, |
303 | 0 | bit_depth: usize, |
304 | 0 | cpu: CpuFeatureLevel, |
305 | 0 | ) { |
306 | 0 | put_8tap_internal(dst, src, width, height, col_frac, row_frac, bit_depth, cpu); |
307 | 0 | } Unexecuted instantiation: av_scenechange::data::mc::put_8tap::<u16> Unexecuted instantiation: av_scenechange::data::mc::put_8tap::<u8> Unexecuted instantiation: av_scenechange::data::mc::put_8tap::<_> |