/rust/registry/src/index.crates.io-1949cf8c6b5b557f/av-scenechange-0.14.1/src/analyze/inter.rs
Line | Count | Source |
1 | | use std::sync::Arc; |
2 | | |
3 | | use aligned::{Aligned, A64}; |
4 | | use arrayvec::ArrayVec; |
5 | | use num_rational::Rational32; |
6 | | use rayon::iter::{IntoParallelIterator, ParallelIterator}; |
7 | | use v_frame::{ |
8 | | frame::Frame, |
9 | | math::{clamp, ILog}, |
10 | | pixel::{ChromaSampling, Pixel}, |
11 | | plane::{Plane, PlaneConfig, PlaneOffset}, |
12 | | }; |
13 | | |
14 | | use super::importance::{ |
15 | | IMPORTANCE_BLOCK_SIZE, |
16 | | IMP_BLOCK_MV_UNITS_PER_PIXEL, |
17 | | IMP_BLOCK_SIZE_IN_MV_UNITS, |
18 | | }; |
19 | | use crate::{ |
20 | | cpu::CpuFeatureLevel, |
21 | | data::{ |
22 | | block::{BlockOffset, BlockSize, MIB_SIZE_LOG2}, |
23 | | frame::{FrameInvariants, FrameState, RefType, ALLOWED_REF_FRAMES}, |
24 | | motion::{ |
25 | | MEStats, |
26 | | MVSamplingMode, |
27 | | MotionEstimationSubsets, |
28 | | MotionVector, |
29 | | ReadGuardMEStats, |
30 | | RefMEStats, |
31 | | TileMEStats, |
32 | | MV_LOW, |
33 | | MV_UPP, |
34 | | }, |
35 | | plane::{Area, AsRegion, PlaneBlockOffset, PlaneRegion, PlaneRegionMut, Rect}, |
36 | | prediction::PredictionMode, |
37 | | sad::get_sad, |
38 | | satd::get_satd, |
39 | | superblock::{ |
40 | | SuperBlockOffset, |
41 | | TileSuperBlockOffset, |
42 | | MAX_SB_SIZE_LOG2, |
43 | | MI_SIZE, |
44 | | MI_SIZE_LOG2, |
45 | | SB_SIZE, |
46 | | }, |
47 | | tile::{TileBlockOffset, TileRect, TileStateMut, TilingInfo}, |
48 | | }, |
49 | | }; |
50 | | |
51 | | /// Declares an array of motion vectors in structure of arrays syntax. |
52 | | macro_rules! search_pattern_subpel { |
53 | | ($field_a:ident: [$($ll_a:expr),*], $field_b:ident: [$($ll_b:expr),*]) => { |
54 | | [ $(MotionVector { $field_a: $ll_a, $field_b: $ll_b } ),*] |
55 | | }; |
56 | | } |
57 | | |
58 | | /// Declares an array of motion vectors in structure of arrays syntax. |
59 | | /// Compared to [`search_pattern_subpel`], this version creates motion vectors |
60 | | /// in fullpel resolution (x8). |
61 | | macro_rules! search_pattern { |
62 | | ($field_a:ident: [$($ll_a:expr),*], $field_b:ident: [$($ll_b:expr),*]) => { |
63 | | [ $(MotionVector { $field_a: $ll_a << 3, $field_b: $ll_b << 3 } ),*] |
64 | | }; |
65 | | } |
66 | | |
67 | | /// Diamond pattern of radius 1 as shown below. For fullpel search, use |
68 | | /// `DIAMOND_R1_PATTERN_FULLPEL` since it has been scaled for fullpel search. |
69 | | /// ```text |
70 | | /// X |
71 | | /// XoX |
72 | | /// X |
73 | | /// ``` |
74 | | /// 'X's are motion candidates and the 'o' is the center. |
75 | | const DIAMOND_R1_PATTERN_SUBPEL: [MotionVector; 4] = search_pattern_subpel!( |
76 | | col: [ 0, 1, 0, -1], |
77 | | row: [ 1, 0, -1, 0] |
78 | | ); |
79 | | |
80 | | /// Diamond pattern of radius 1 as shown below. Unlike `DIAMOND_R1_PATTERN`, the |
81 | | /// vectors have been shifted fullpel scale. |
82 | | /// ```text |
83 | | /// X |
84 | | /// XoX |
85 | | /// X |
86 | | /// ``` |
87 | | /// 'X's are motion candidates and the 'o' is the center. |
88 | | const DIAMOND_R1_PATTERN: [MotionVector; 4] = search_pattern!( |
89 | | col: [ 0, 1, 0, -1], |
90 | | row: [ 1, 0, -1, 0] |
91 | | ); |
92 | | |
93 | | /// Uneven multi-hexagon search pattern around a center point. Used for locating |
94 | | /// irregular movement. |
95 | | /// ```text |
96 | | /// X |
97 | | /// X X |
98 | | /// X X |
99 | | /// X X |
100 | | /// X o X |
101 | | /// X X |
102 | | /// X X |
103 | | /// X X |
104 | | /// X |
105 | | /// ``` |
106 | | /// 'X's are motion candidates and the 'o' is the center. |
107 | | const UMH_PATTERN: [MotionVector; 16] = search_pattern!( |
108 | | col: [ -2, -1, 0, 1, 2, 3, 4, 3, 2, 1, 0, -1, -2, 3, -4, -3], |
109 | | row: [ 4, 4, 4, 4, 4, 2, 0, -2, -4, -4, -4, -4, -4, -2, 0, 2] |
110 | | ); |
111 | | |
112 | | /// A hexagon pattern around a center point. The pattern is ordered so that the |
113 | | /// offsets circle around the center. This is done to allow pruning locations |
114 | | /// covered by the last iteration. |
115 | | /// ```text |
116 | | /// 21012 |
117 | | /// 2 X X |
118 | | /// 1 |
119 | | /// 0 X o X |
120 | | /// 1 |
121 | | /// 2 X X |
122 | | /// ``` |
123 | | /// 'X's are motion candidates and the 'o' is the center. |
124 | | /// |
125 | | /// The illustration below shows the process of a hexagon search. |
126 | | /// ```text |
127 | | /// Step 1 Step 2 |
128 | | /// 1 1 1 1 2 |
129 | | /// |
130 | | /// 1(0)1 => 1 0(1)2 |
131 | | /// |
132 | | /// 1 1 1 1 2 |
133 | | /// ``` |
134 | | /// The search above has gone through the following steps. |
135 | | /// 1. Search '1' elements for better candidates than the center '0'. |
136 | | /// 2. Recenter around the best candidate ('(1)') and hexagon candidates that |
137 | | /// don't overlap with the previous search step (labeled '2'). |
138 | | const HEXAGON_PATTERN: [MotionVector; 6] = search_pattern!( |
139 | | col: [ 0, 2, 2, 0, -2, -2], |
140 | | row: [ -2, -1, 1, 2, 1, -1] |
141 | | ); |
142 | | |
143 | | /// A small square pattern around a center point. |
144 | | /// ```text |
145 | | /// 101 |
146 | | /// 1 XXX |
147 | | /// 0 XoX |
148 | | /// 1 XXX |
149 | | /// ``` |
150 | | /// 'X's are motion candidates and the 'o' is the center. |
151 | | const SQUARE_REFINE_PATTERN: [MotionVector; 8] = search_pattern!( |
152 | | col: [ -1, 0, 1, -1, 1, -1, 0, 1], |
153 | | row: [ 1, 1, 1, 0, 0, -1, -1, -1] |
154 | | ); |
155 | | |
156 | 0 | pub(crate) fn estimate_inter_costs<T: Pixel>( |
157 | 0 | frame: Arc<Frame<T>>, |
158 | 0 | ref_frame: Arc<Frame<T>>, |
159 | 0 | bit_depth: usize, |
160 | 0 | frame_rate: Rational32, |
161 | 0 | chroma_sampling: ChromaSampling, |
162 | 0 | buffer: RefMEStats, |
163 | 0 | cpu_feature_level: CpuFeatureLevel, |
164 | 0 | ) -> f64 { |
165 | 0 | let last_fi = |
166 | 0 | FrameInvariants::new_key_frame(frame.planes[0].cfg.width, frame.planes[0].cfg.height); |
167 | 0 | let fi = FrameInvariants::new_inter_frame(&last_fi, 1).unwrap(); |
168 | | |
169 | | // Compute the motion vectors. |
170 | 0 | let mut fs = FrameState::new_with_frame_and_me_stats_and_rec(Arc::clone(&frame), buffer); |
171 | 0 | let mut tiling = TilingInfo::from_target_tiles( |
172 | 0 | frame.planes[0].cfg.width, |
173 | 0 | frame.planes[0].cfg.height, |
174 | 0 | *frame_rate.numer() as f64 / *frame_rate.denom() as f64, |
175 | 0 | TilingInfo::tile_log2(1, 0).unwrap(), |
176 | 0 | TilingInfo::tile_log2(1, 0).unwrap(), |
177 | 0 | chroma_sampling == ChromaSampling::Cs422, |
178 | | ); |
179 | 0 | compute_motion_vectors(&fi, &mut fs, &mut tiling, bit_depth, cpu_feature_level); |
180 | | |
181 | | // Estimate inter costs |
182 | 0 | let plane_org = &frame.planes[0]; |
183 | 0 | let plane_ref = &ref_frame.planes[0]; |
184 | 0 | let h_in_imp_b = plane_org.cfg.height / IMPORTANCE_BLOCK_SIZE; |
185 | 0 | let w_in_imp_b = plane_org.cfg.width / IMPORTANCE_BLOCK_SIZE; |
186 | 0 | let stats = &fs.frame_me_stats.read().expect("poisoned lock")[0]; |
187 | 0 | let bsize = BlockSize::from_width_and_height(IMPORTANCE_BLOCK_SIZE, IMPORTANCE_BLOCK_SIZE); |
188 | | |
189 | 0 | let mut inter_costs = 0; |
190 | 0 | (0..h_in_imp_b).for_each(|y| { |
191 | 0 | (0..w_in_imp_b).for_each(|x| { |
192 | 0 | let mv = stats[y * 2][x * 2].mv; |
193 | | |
194 | | // Coordinates of the top-left corner of the reference block, in MV |
195 | | // units. |
196 | 0 | let reference_x = x as i64 * IMP_BLOCK_SIZE_IN_MV_UNITS + mv.col as i64; |
197 | 0 | let reference_y = y as i64 * IMP_BLOCK_SIZE_IN_MV_UNITS + mv.row as i64; |
198 | | |
199 | 0 | let region_org = plane_org.region(Area::Rect(Rect { |
200 | 0 | x: (x * IMPORTANCE_BLOCK_SIZE) as isize, |
201 | 0 | y: (y * IMPORTANCE_BLOCK_SIZE) as isize, |
202 | 0 | width: IMPORTANCE_BLOCK_SIZE, |
203 | 0 | height: IMPORTANCE_BLOCK_SIZE, |
204 | 0 | })); |
205 | | |
206 | 0 | let region_ref = plane_ref.region(Area::Rect(Rect { |
207 | 0 | x: reference_x as isize / IMP_BLOCK_MV_UNITS_PER_PIXEL as isize, |
208 | 0 | y: reference_y as isize / IMP_BLOCK_MV_UNITS_PER_PIXEL as isize, |
209 | 0 | width: IMPORTANCE_BLOCK_SIZE, |
210 | 0 | height: IMPORTANCE_BLOCK_SIZE, |
211 | 0 | })); |
212 | | |
213 | 0 | inter_costs += get_satd( |
214 | 0 | ®ion_org, |
215 | 0 | ®ion_ref, |
216 | 0 | bsize.width(), |
217 | 0 | bsize.height(), |
218 | 0 | bit_depth, |
219 | 0 | cpu_feature_level, |
220 | 0 | ) as u64; |
221 | 0 | }); Unexecuted instantiation: av_scenechange::analyze::inter::estimate_inter_costs::<u16>::{closure#0}::{closure#0}Unexecuted instantiation: av_scenechange::analyze::inter::estimate_inter_costs::<u8>::{closure#0}::{closure#0}Unexecuted instantiation: av_scenechange::analyze::inter::estimate_inter_costs::<_>::{closure#0}::{closure#0} |
222 | 0 | }); Unexecuted instantiation: av_scenechange::analyze::inter::estimate_inter_costs::<u16>::{closure#0}Unexecuted instantiation: av_scenechange::analyze::inter::estimate_inter_costs::<u8>::{closure#0}Unexecuted instantiation: av_scenechange::analyze::inter::estimate_inter_costs::<_>::{closure#0} |
223 | 0 | inter_costs as f64 / (w_in_imp_b * h_in_imp_b) as f64 |
224 | 0 | } Unexecuted instantiation: av_scenechange::analyze::inter::estimate_inter_costs::<u16> Unexecuted instantiation: av_scenechange::analyze::inter::estimate_inter_costs::<u8> Unexecuted instantiation: av_scenechange::analyze::inter::estimate_inter_costs::<_> |
225 | | |
226 | 0 | fn compute_motion_vectors<T: Pixel>( |
227 | 0 | fi: &FrameInvariants<T>, |
228 | 0 | fs: &mut FrameState<T>, |
229 | 0 | tiling_info: &mut TilingInfo, |
230 | 0 | bit_depth: usize, |
231 | 0 | cpu_feature_level: CpuFeatureLevel, |
232 | 0 | ) { |
233 | 0 | tiling_info |
234 | 0 | .tile_iter_mut(fs) |
235 | 0 | .collect::<Vec<_>>() |
236 | 0 | .into_par_iter() |
237 | 0 | .for_each(|mut ctx| { |
238 | 0 | let ts = &mut ctx.ts; |
239 | 0 | estimate_tile_motion(fi, ts, bit_depth, cpu_feature_level); |
240 | 0 | }); Unexecuted instantiation: av_scenechange::analyze::inter::compute_motion_vectors::<u16>::{closure#0}Unexecuted instantiation: av_scenechange::analyze::inter::compute_motion_vectors::<u8>::{closure#0}Unexecuted instantiation: av_scenechange::analyze::inter::compute_motion_vectors::<_>::{closure#0} |
241 | 0 | } Unexecuted instantiation: av_scenechange::analyze::inter::compute_motion_vectors::<u16> Unexecuted instantiation: av_scenechange::analyze::inter::compute_motion_vectors::<u8> Unexecuted instantiation: av_scenechange::analyze::inter::compute_motion_vectors::<_> |
242 | | |
243 | 0 | fn estimate_tile_motion<T: Pixel>( |
244 | 0 | fi: &FrameInvariants<T>, |
245 | 0 | ts: &mut TileStateMut<'_, T>, |
246 | 0 | bit_depth: usize, |
247 | 0 | cpu_feature_level: CpuFeatureLevel, |
248 | 0 | ) { |
249 | 0 | let init_size = MIB_SIZE_LOG2; |
250 | | |
251 | 0 | let mut prev_ssdec: Option<u8> = None; |
252 | 0 | for mv_size_in_b_log2 in (2..=init_size).rev() { |
253 | 0 | let init = mv_size_in_b_log2 == init_size; |
254 | | |
255 | | // Choose subsampling. Pass one is quarter res and pass two is at half res. |
256 | 0 | let ssdec = match init_size - mv_size_in_b_log2 { |
257 | 0 | 0 => 2, |
258 | 0 | 1 => 1, |
259 | 0 | _ => 0, |
260 | | }; |
261 | | |
262 | 0 | let new_subsampling = if let Some(prev) = prev_ssdec { |
263 | 0 | prev != ssdec |
264 | | } else { |
265 | 0 | false |
266 | | }; |
267 | 0 | prev_ssdec = Some(ssdec); |
268 | | |
269 | | // 0.5 and 0.125 are a fudge factors |
270 | 0 | let lambda = 0; |
271 | | |
272 | 0 | for sby in 0..ts.sb_height { |
273 | 0 | for sbx in 0..ts.sb_width { |
274 | 0 | let mut tested_frames_flags = 0; |
275 | 0 | for &ref_frame in ALLOWED_REF_FRAMES { |
276 | 0 | let frame_flag = 1 << fi.ref_frames[ref_frame.to_index()]; |
277 | 0 | if tested_frames_flags & frame_flag == frame_flag { |
278 | 0 | continue; |
279 | 0 | } |
280 | 0 | tested_frames_flags |= frame_flag; |
281 | | |
282 | 0 | let tile_bo = TileSuperBlockOffset(SuperBlockOffset { x: sbx, y: sby }) |
283 | 0 | .block_offset(0, 0); |
284 | | |
285 | 0 | if new_subsampling { |
286 | 0 | refine_subsampled_sb_motion( |
287 | 0 | fi, |
288 | 0 | ts, |
289 | 0 | ref_frame, |
290 | 0 | mv_size_in_b_log2 + 1, |
291 | 0 | tile_bo, |
292 | 0 | ssdec, |
293 | 0 | lambda, |
294 | 0 | bit_depth, |
295 | 0 | cpu_feature_level, |
296 | 0 | ); |
297 | 0 | } |
298 | | |
299 | 0 | estimate_sb_motion( |
300 | 0 | fi, |
301 | 0 | ts, |
302 | 0 | ref_frame, |
303 | 0 | mv_size_in_b_log2, |
304 | 0 | tile_bo, |
305 | 0 | init, |
306 | 0 | ssdec, |
307 | 0 | lambda, |
308 | 0 | bit_depth, |
309 | 0 | cpu_feature_level, |
310 | | ); |
311 | | } |
312 | | } |
313 | | } |
314 | | } |
315 | 0 | } Unexecuted instantiation: av_scenechange::analyze::inter::estimate_tile_motion::<u16> Unexecuted instantiation: av_scenechange::analyze::inter::estimate_tile_motion::<u8> Unexecuted instantiation: av_scenechange::analyze::inter::estimate_tile_motion::<_> |
316 | | |
317 | | #[allow(clippy::too_many_arguments)] |
318 | 0 | fn refine_subsampled_sb_motion<T: Pixel>( |
319 | 0 | fi: &FrameInvariants<T>, |
320 | 0 | ts: &mut TileStateMut<'_, T>, |
321 | 0 | ref_frame: RefType, |
322 | 0 | mv_size_in_b_log2: usize, |
323 | 0 | tile_bo: TileBlockOffset, |
324 | 0 | ssdec: u8, |
325 | 0 | lambda: u32, |
326 | 0 | bit_depth: usize, |
327 | 0 | cpu_feature_level: CpuFeatureLevel, |
328 | 0 | ) { |
329 | 0 | let pix_offset = tile_bo.to_luma_plane_offset(); |
330 | 0 | let sb_h: usize = SB_SIZE.min(ts.height - pix_offset.y as usize); |
331 | 0 | let sb_w: usize = SB_SIZE.min(ts.width - pix_offset.x as usize); |
332 | | |
333 | 0 | let mv_size = MI_SIZE << mv_size_in_b_log2; |
334 | | |
335 | | // Process in blocks, cropping at edges. |
336 | 0 | for y in (0..sb_h).step_by(mv_size) { |
337 | 0 | for x in (0..sb_w).step_by(mv_size) { |
338 | 0 | let sub_bo = |
339 | 0 | tile_bo.with_offset(x as isize >> MI_SIZE_LOG2, y as isize >> MI_SIZE_LOG2); |
340 | | |
341 | | // Clamp to frame edge, rounding up in the case of subsampling. |
342 | | // The rounding makes some assumptions about how subsampling is done. |
343 | 0 | let w = mv_size.min(sb_w - x + (1 << ssdec) - 1) >> ssdec; |
344 | 0 | let h = mv_size.min(sb_h - y + (1 << ssdec) - 1) >> ssdec; |
345 | | |
346 | | // Refine the existing motion estimate |
347 | 0 | if let Some(results) = refine_subsampled_motion_estimate( |
348 | 0 | fi, |
349 | 0 | ts, |
350 | 0 | w, |
351 | 0 | h, |
352 | 0 | sub_bo, |
353 | 0 | ref_frame, |
354 | 0 | ssdec, |
355 | 0 | lambda, |
356 | 0 | bit_depth, |
357 | 0 | cpu_feature_level, |
358 | 0 | ) { |
359 | 0 | // normalize sad to 128x128 block |
360 | 0 | let sad = |
361 | 0 | (((results.rd.sad as u64) << (MAX_SB_SIZE_LOG2 * 2)) / (w * h) as u64) as u32; |
362 | 0 | save_me_stats(ts, mv_size_in_b_log2, sub_bo, ref_frame, MEStats { |
363 | 0 | mv: results.mv, |
364 | 0 | normalized_sad: sad, |
365 | 0 | }); |
366 | 0 | } |
367 | | } |
368 | | } |
369 | 0 | } Unexecuted instantiation: av_scenechange::analyze::inter::refine_subsampled_sb_motion::<u16> Unexecuted instantiation: av_scenechange::analyze::inter::refine_subsampled_sb_motion::<u8> Unexecuted instantiation: av_scenechange::analyze::inter::refine_subsampled_sb_motion::<_> |
370 | | |
371 | | /// Refine motion estimation that was computed one level of subsampling up. |
372 | | #[allow(clippy::too_many_arguments)] |
373 | 0 | fn refine_subsampled_motion_estimate<T: Pixel>( |
374 | 0 | fi: &FrameInvariants<T>, |
375 | 0 | ts: &TileStateMut<'_, T>, |
376 | 0 | w: usize, |
377 | 0 | h: usize, |
378 | 0 | tile_bo: TileBlockOffset, |
379 | 0 | ref_frame: RefType, |
380 | 0 | ssdec: u8, |
381 | 0 | lambda: u32, |
382 | 0 | bit_depth: usize, |
383 | 0 | cpu_feature_level: CpuFeatureLevel, |
384 | 0 | ) -> Option<MotionSearchResult> { |
385 | 0 | if let Some(ref rec) = fi.rec_buffer.frames[fi.ref_frames[ref_frame.to_index()] as usize] { |
386 | 0 | let frame_bo = ts.to_frame_block_offset(tile_bo); |
387 | 0 | let (mvx_min, mvx_max, mvy_min, mvy_max) = |
388 | 0 | get_mv_range(fi.w_in_b, fi.h_in_b, frame_bo, w << ssdec, h << ssdec); |
389 | | |
390 | 0 | let pmv = [MotionVector { row: 0, col: 0 }; 2]; |
391 | | |
392 | 0 | let po = frame_bo.to_luma_plane_offset(); |
393 | 0 | let (mvx_min, mvx_max, mvy_min, mvy_max) = ( |
394 | 0 | mvx_min >> ssdec, |
395 | 0 | mvx_max >> ssdec, |
396 | 0 | mvy_min >> ssdec, |
397 | 0 | mvy_max >> ssdec, |
398 | 0 | ); |
399 | 0 | let po = PlaneOffset { |
400 | 0 | x: po.x >> ssdec, |
401 | 0 | y: po.y >> ssdec, |
402 | 0 | }; |
403 | 0 | let p_ref = match ssdec { |
404 | 0 | 0 => &rec.frame.planes[0], |
405 | 0 | 1 => &rec.input_hres, |
406 | 0 | 2 => &rec.input_qres, |
407 | 0 | _ => unimplemented!(), |
408 | | }; |
409 | | |
410 | 0 | let org_region = &match ssdec { |
411 | 0 | 0 => ts.input_tile.planes[0].subregion(Area::BlockStartingAt { bo: tile_bo.0 }), |
412 | 0 | 1 => ts.input_hres.region(Area::StartingAt { x: po.x, y: po.y }), |
413 | 0 | 2 => ts.input_qres.region(Area::StartingAt { x: po.x, y: po.y }), |
414 | 0 | _ => unimplemented!(), |
415 | | }; |
416 | | |
417 | 0 | let mv = ts.me_stats[ref_frame.to_index()][tile_bo.0.y][tile_bo.0.x].mv >> ssdec; |
418 | | |
419 | | // Given a motion vector at 0 at higher subsampling: |
420 | | // | -1 | 0 | 1 | |
421 | | // then the vectors at -1 to 2 should be tested at the current subsampling. |
422 | | // |-------------| |
423 | | // | -2 -1 | 0 1 | 2 3 | |
424 | | // This corresponds to a 4x4 full search. |
425 | 0 | let x_lo = po.x + (mv.col as isize / 8 - 1).max(mvx_min / 8); |
426 | 0 | let x_hi = po.x + (mv.col as isize / 8 + 2).min(mvx_max / 8); |
427 | 0 | let y_lo = po.y + (mv.row as isize / 8 - 1).max(mvy_min / 8); |
428 | 0 | let y_hi = po.y + (mv.row as isize / 8 + 2).min(mvy_max / 8); |
429 | 0 | let mut results = full_search( |
430 | 0 | x_lo, |
431 | 0 | x_hi, |
432 | 0 | y_lo, |
433 | 0 | y_hi, |
434 | 0 | w, |
435 | 0 | h, |
436 | 0 | org_region, |
437 | 0 | p_ref, |
438 | 0 | po, |
439 | | 1, |
440 | 0 | lambda, |
441 | 0 | pmv, |
442 | 0 | bit_depth, |
443 | 0 | cpu_feature_level, |
444 | | ); |
445 | | |
446 | | // Scale motion vectors to full res size |
447 | 0 | results.mv = results.mv << ssdec; |
448 | | |
449 | 0 | Some(results) |
450 | | } else { |
451 | 0 | None |
452 | | } |
453 | 0 | } Unexecuted instantiation: av_scenechange::analyze::inter::refine_subsampled_motion_estimate::<u16> Unexecuted instantiation: av_scenechange::analyze::inter::refine_subsampled_motion_estimate::<u8> Unexecuted instantiation: av_scenechange::analyze::inter::refine_subsampled_motion_estimate::<_> |
454 | | |
455 | 0 | fn get_mv_range( |
456 | 0 | w_in_b: usize, |
457 | 0 | h_in_b: usize, |
458 | 0 | bo: PlaneBlockOffset, |
459 | 0 | blk_w: usize, |
460 | 0 | blk_h: usize, |
461 | 0 | ) -> (isize, isize, isize, isize) { |
462 | 0 | let border_w = 128 + blk_w as isize * 8; |
463 | 0 | let border_h = 128 + blk_h as isize * 8; |
464 | 0 | let mvx_min = -(bo.0.x as isize) * (8 * MI_SIZE) as isize - border_w; |
465 | 0 | let mvx_max = ((w_in_b - bo.0.x) as isize - (blk_w / MI_SIZE) as isize) |
466 | 0 | * (8 * MI_SIZE) as isize |
467 | 0 | + border_w; |
468 | 0 | let mvy_min = -(bo.0.y as isize) * (8 * MI_SIZE) as isize - border_h; |
469 | 0 | let mvy_max = ((h_in_b - bo.0.y) as isize - (blk_h / MI_SIZE) as isize) |
470 | 0 | * (8 * MI_SIZE) as isize |
471 | 0 | + border_h; |
472 | | |
473 | | // <https://aomediacodec.github.io/av1-spec/#assign-mv-semantics> |
474 | 0 | ( |
475 | 0 | mvx_min.max(MV_LOW as isize + 1), |
476 | 0 | mvx_max.min(MV_UPP as isize - 1), |
477 | 0 | mvy_min.max(MV_LOW as isize + 1), |
478 | 0 | mvy_max.min(MV_UPP as isize - 1), |
479 | 0 | ) |
480 | 0 | } |
481 | | |
482 | | #[allow(clippy::too_many_arguments)] |
483 | 0 | fn full_search<T: Pixel>( |
484 | 0 | x_lo: isize, |
485 | 0 | x_hi: isize, |
486 | 0 | y_lo: isize, |
487 | 0 | y_hi: isize, |
488 | 0 | w: usize, |
489 | 0 | h: usize, |
490 | 0 | org_region: &PlaneRegion<T>, |
491 | 0 | p_ref: &Plane<T>, |
492 | 0 | po: PlaneOffset, |
493 | 0 | step: usize, |
494 | 0 | lambda: u32, |
495 | 0 | pmv: [MotionVector; 2], |
496 | 0 | bit_depth: usize, |
497 | 0 | cpu_feature_level: CpuFeatureLevel, |
498 | 0 | ) -> MotionSearchResult { |
499 | 0 | let search_region = p_ref.region(Area::Rect(Rect { |
500 | 0 | x: x_lo, |
501 | 0 | y: y_lo, |
502 | 0 | width: (x_hi - x_lo) as usize + w, |
503 | 0 | height: (y_hi - y_lo) as usize + h, |
504 | 0 | })); |
505 | | |
506 | 0 | let mut best: MotionSearchResult = MotionSearchResult::empty(); |
507 | | |
508 | | // Select rectangular regions within search region with vert+horz windows |
509 | 0 | for vert_window in search_region.vert_windows(h).step_by(step) { |
510 | 0 | for ref_window in vert_window.horz_windows(w).step_by(step) { |
511 | 0 | let &Rect { x, y, .. } = ref_window.rect(); |
512 | | |
513 | 0 | let mv = MotionVector { |
514 | 0 | row: 8 * (y as i16 - po.y as i16), |
515 | 0 | col: 8 * (x as i16 - po.x as i16), |
516 | 0 | }; |
517 | | |
518 | 0 | let rd = compute_mv_rd( |
519 | 0 | pmv, |
520 | 0 | lambda, |
521 | | false, |
522 | 0 | bit_depth, |
523 | 0 | w, |
524 | 0 | h, |
525 | 0 | mv, |
526 | 0 | org_region, |
527 | 0 | &ref_window, |
528 | 0 | cpu_feature_level, |
529 | | ); |
530 | | |
531 | 0 | if rd.cost < best.rd.cost { |
532 | 0 | best.rd = rd; |
533 | 0 | best.mv = mv; |
534 | 0 | } |
535 | | } |
536 | | } |
537 | | |
538 | 0 | best |
539 | 0 | } Unexecuted instantiation: av_scenechange::analyze::inter::full_search::<u16> Unexecuted instantiation: av_scenechange::analyze::inter::full_search::<u8> Unexecuted instantiation: av_scenechange::analyze::inter::full_search::<_> |
540 | | |
541 | | /// Compute the rate distortion stats for a motion vector. |
542 | | #[allow(clippy::too_many_arguments)] |
543 | 0 | fn compute_mv_rd<T: Pixel>( |
544 | 0 | pmv: [MotionVector; 2], |
545 | 0 | lambda: u32, |
546 | 0 | use_satd: bool, |
547 | 0 | bit_depth: usize, |
548 | 0 | w: usize, |
549 | 0 | h: usize, |
550 | 0 | cand_mv: MotionVector, |
551 | 0 | plane_org: &PlaneRegion<'_, T>, |
552 | 0 | plane_ref: &PlaneRegion<'_, T>, |
553 | 0 | cpu_feature_level: CpuFeatureLevel, |
554 | 0 | ) -> MVCandidateRD { |
555 | 0 | let sad = if use_satd { |
556 | 0 | get_satd(plane_org, plane_ref, w, h, bit_depth, cpu_feature_level) |
557 | | } else { |
558 | 0 | get_sad(plane_org, plane_ref, w, h, bit_depth, cpu_feature_level) |
559 | | }; |
560 | | |
561 | 0 | let rate1 = get_mv_rate(cand_mv, pmv[0]); |
562 | 0 | let rate2 = get_mv_rate(cand_mv, pmv[1]); |
563 | 0 | let rate = rate1.min(rate2 + 1); |
564 | | |
565 | 0 | MVCandidateRD { |
566 | 0 | cost: 256 * sad as u64 + rate as u64 * lambda as u64, |
567 | 0 | sad, |
568 | 0 | } |
569 | 0 | } Unexecuted instantiation: av_scenechange::analyze::inter::compute_mv_rd::<u16> Unexecuted instantiation: av_scenechange::analyze::inter::compute_mv_rd::<u8> Unexecuted instantiation: av_scenechange::analyze::inter::compute_mv_rd::<_> |
570 | | |
571 | 0 | fn diff_to_rate(diff: i16) -> u32 { |
572 | 0 | let d = diff >> 1; |
573 | 0 | 2 * ILog::ilog(d.abs()) as u32 |
574 | 0 | } |
575 | | |
576 | 0 | fn get_mv_rate(a: MotionVector, b: MotionVector) -> u32 { |
577 | 0 | diff_to_rate(a.row - b.row) + diff_to_rate(a.col - b.col) |
578 | 0 | } |
579 | | |
580 | | /// Result of motion search. |
581 | | #[derive(Debug, Copy, Clone)] |
582 | | pub struct MotionSearchResult { |
583 | | /// Motion vector chosen by the motion search. |
584 | | pub mv: MotionVector, |
585 | | /// Rate distortion data associated with `mv`. |
586 | | pub rd: MVCandidateRD, |
587 | | } |
588 | | |
589 | | impl MotionSearchResult { |
590 | | /// Creates an 'empty' value. |
591 | | /// |
592 | | /// To be considered empty, cost is set higher than any naturally occurring |
593 | | /// cost value. The idea is that comparing to any valid rd output, the |
594 | | /// search result will always be replaced. |
595 | 0 | pub fn empty() -> MotionSearchResult { |
596 | 0 | MotionSearchResult { |
597 | 0 | mv: MotionVector::default(), |
598 | 0 | rd: MVCandidateRD::empty(), |
599 | 0 | } |
600 | 0 | } |
601 | | |
602 | | /// Check if the value should be considered to be empty. |
603 | 0 | const fn is_empty(&self) -> bool { |
604 | 0 | self.rd.cost == u64::MAX |
605 | 0 | } |
606 | | } |
607 | | |
608 | | /// Holds data from computing rate distortion of a motion vector. |
609 | | #[derive(Debug, Copy, Clone)] |
610 | | pub struct MVCandidateRD { |
611 | | /// Rate distortion cost of the motion vector. |
612 | | pub cost: u64, |
613 | | /// Distortion metric value for the motion vector. |
614 | | pub sad: u32, |
615 | | } |
616 | | |
617 | | impl MVCandidateRD { |
618 | | /// Creates an 'empty' value. |
619 | | /// |
620 | | /// To be considered empty, cost is set higher than any naturally occurring |
621 | | /// cost value. The idea is that comparing to any valid rd output, the |
622 | | /// search result will always be replaced. |
623 | 0 | const fn empty() -> MVCandidateRD { |
624 | 0 | MVCandidateRD { |
625 | 0 | sad: u32::MAX, |
626 | 0 | cost: u64::MAX, |
627 | 0 | } |
628 | 0 | } |
629 | | } |
630 | | |
631 | 0 | fn save_me_stats<T: Pixel>( |
632 | 0 | ts: &mut TileStateMut<'_, T>, |
633 | 0 | mv_size_in_b_log2: usize, |
634 | 0 | tile_bo: TileBlockOffset, |
635 | 0 | ref_frame: RefType, |
636 | 0 | stats: MEStats, |
637 | 0 | ) { |
638 | 0 | let size_in_b = 1 << mv_size_in_b_log2; |
639 | 0 | let tile_me_stats = &mut ts.me_stats[ref_frame.to_index()]; |
640 | 0 | let tile_bo_x_end = (tile_bo.0.x + size_in_b).min(ts.mi_width); |
641 | 0 | let tile_bo_y_end = (tile_bo.0.y + size_in_b).min(ts.mi_height); |
642 | 0 | for mi_y in tile_bo.0.y..tile_bo_y_end { |
643 | 0 | for a in tile_me_stats[mi_y][tile_bo.0.x..tile_bo_x_end].iter_mut() { |
644 | 0 | *a = stats; |
645 | 0 | } |
646 | | } |
647 | 0 | } Unexecuted instantiation: av_scenechange::analyze::inter::save_me_stats::<u16> Unexecuted instantiation: av_scenechange::analyze::inter::save_me_stats::<u8> Unexecuted instantiation: av_scenechange::analyze::inter::save_me_stats::<_> |
648 | | |
649 | | #[allow(clippy::too_many_arguments)] |
650 | 0 | fn estimate_sb_motion<T: Pixel>( |
651 | 0 | fi: &FrameInvariants<T>, |
652 | 0 | ts: &mut TileStateMut<'_, T>, |
653 | 0 | ref_frame: RefType, |
654 | 0 | mv_size_in_b_log2: usize, |
655 | 0 | tile_bo: TileBlockOffset, |
656 | 0 | init: bool, |
657 | 0 | ssdec: u8, |
658 | 0 | lambda: u32, |
659 | 0 | bit_depth: usize, |
660 | 0 | cpu_feature_level: CpuFeatureLevel, |
661 | 0 | ) { |
662 | 0 | let pix_offset = tile_bo.to_luma_plane_offset(); |
663 | 0 | let sb_h: usize = SB_SIZE.min(ts.height - pix_offset.y as usize); |
664 | 0 | let sb_w: usize = SB_SIZE.min(ts.width - pix_offset.x as usize); |
665 | | |
666 | 0 | let mv_size = MI_SIZE << mv_size_in_b_log2; |
667 | | |
668 | | // Process in blocks, cropping at edges. |
669 | 0 | for y in (0..sb_h).step_by(mv_size) { |
670 | 0 | for x in (0..sb_w).step_by(mv_size) { |
671 | 0 | let corner: MVSamplingMode = if init { |
672 | 0 | MVSamplingMode::INIT |
673 | | } else { |
674 | | // Processing the block a size up produces data that can be used by |
675 | | // the right and bottom corners. |
676 | 0 | MVSamplingMode::CORNER { |
677 | 0 | right: x & mv_size == mv_size, |
678 | 0 | bottom: y & mv_size == mv_size, |
679 | 0 | } |
680 | | }; |
681 | | |
682 | 0 | let sub_bo = |
683 | 0 | tile_bo.with_offset(x as isize >> MI_SIZE_LOG2, y as isize >> MI_SIZE_LOG2); |
684 | | |
685 | | // Clamp to frame edge, rounding up in the case of subsampling. |
686 | | // The rounding makes some assumptions about how subsampling is done. |
687 | 0 | let w = mv_size.min(sb_w - x + (1 << ssdec) - 1) >> ssdec; |
688 | 0 | let h = mv_size.min(sb_h - y + (1 << ssdec) - 1) >> ssdec; |
689 | | |
690 | | // Run motion estimation. |
691 | | // Note that the initial search (init) instructs the called function to |
692 | | // perform a more extensive search. |
693 | 0 | if let Some(results) = estimate_motion( |
694 | 0 | fi, |
695 | 0 | ts, |
696 | 0 | w, |
697 | 0 | h, |
698 | 0 | sub_bo, |
699 | 0 | ref_frame, |
700 | 0 | None, |
701 | 0 | corner, |
702 | 0 | init, |
703 | 0 | ssdec, |
704 | 0 | Some(lambda), |
705 | 0 | bit_depth, |
706 | 0 | cpu_feature_level, |
707 | 0 | ) { |
708 | 0 | // normalize sad to 128x128 block |
709 | 0 | let sad = |
710 | 0 | (((results.rd.sad as u64) << (MAX_SB_SIZE_LOG2 * 2)) / (w * h) as u64) as u32; |
711 | 0 | save_me_stats(ts, mv_size_in_b_log2, sub_bo, ref_frame, MEStats { |
712 | 0 | mv: results.mv, |
713 | 0 | normalized_sad: sad, |
714 | 0 | }); |
715 | 0 | } |
716 | | } |
717 | | } |
718 | 0 | } Unexecuted instantiation: av_scenechange::analyze::inter::estimate_sb_motion::<u16> Unexecuted instantiation: av_scenechange::analyze::inter::estimate_sb_motion::<u8> Unexecuted instantiation: av_scenechange::analyze::inter::estimate_sb_motion::<_> |
719 | | |
720 | | #[allow(clippy::too_many_arguments)] |
721 | 0 | fn estimate_motion<T: Pixel>( |
722 | 0 | fi: &FrameInvariants<T>, |
723 | 0 | ts: &TileStateMut<'_, T>, |
724 | 0 | w: usize, |
725 | 0 | h: usize, |
726 | 0 | tile_bo: TileBlockOffset, |
727 | 0 | ref_frame: RefType, |
728 | 0 | pmv: Option<[MotionVector; 2]>, |
729 | 0 | corner: MVSamplingMode, |
730 | 0 | extensive_search: bool, |
731 | 0 | ssdec: u8, |
732 | 0 | lambda: Option<u32>, |
733 | 0 | bit_depth: usize, |
734 | 0 | cpu_feature_level: CpuFeatureLevel, |
735 | 0 | ) -> Option<MotionSearchResult> { |
736 | 0 | if let Some(ref rec) = fi.rec_buffer.frames[fi.ref_frames[ref_frame.to_index()] as usize] { |
737 | 0 | let frame_bo = ts.to_frame_block_offset(tile_bo); |
738 | 0 | let (mvx_min, mvx_max, mvy_min, mvy_max) = |
739 | 0 | get_mv_range(fi.w_in_b, fi.h_in_b, frame_bo, w << ssdec, h << ssdec); |
740 | | |
741 | 0 | let lambda = lambda.unwrap_or(0); |
742 | | |
743 | 0 | let global_mv = [MotionVector { row: 0, col: 0 }; 2]; |
744 | | |
745 | 0 | let po = frame_bo.to_luma_plane_offset(); |
746 | 0 | let (mvx_min, mvx_max, mvy_min, mvy_max) = ( |
747 | 0 | mvx_min >> ssdec, |
748 | 0 | mvx_max >> ssdec, |
749 | 0 | mvy_min >> ssdec, |
750 | 0 | mvy_max >> ssdec, |
751 | 0 | ); |
752 | 0 | let po = PlaneOffset { |
753 | 0 | x: po.x >> ssdec, |
754 | 0 | y: po.y >> ssdec, |
755 | 0 | }; |
756 | 0 | let p_ref = match ssdec { |
757 | 0 | 0 => &rec.frame.planes[0], |
758 | 0 | 1 => &rec.input_hres, |
759 | 0 | 2 => &rec.input_qres, |
760 | 0 | _ => unimplemented!(), |
761 | | }; |
762 | | |
763 | 0 | let org_region = &match ssdec { |
764 | 0 | 0 => ts.input_tile.planes[0].subregion(Area::BlockStartingAt { bo: tile_bo.0 }), |
765 | 0 | 1 => ts.input_hres.region(Area::StartingAt { x: po.x, y: po.y }), |
766 | 0 | 2 => ts.input_qres.region(Area::StartingAt { x: po.x, y: po.y }), |
767 | 0 | _ => unimplemented!(), |
768 | | }; |
769 | | |
770 | 0 | let mut best: MotionSearchResult = full_pixel_me( |
771 | 0 | fi, |
772 | 0 | ts, |
773 | 0 | org_region, |
774 | 0 | p_ref, |
775 | 0 | tile_bo, |
776 | 0 | po, |
777 | 0 | lambda, |
778 | 0 | pmv.unwrap_or(global_mv), |
779 | 0 | w, |
780 | 0 | h, |
781 | 0 | mvx_min, |
782 | 0 | mvx_max, |
783 | 0 | mvy_min, |
784 | 0 | mvy_max, |
785 | 0 | ref_frame, |
786 | 0 | corner, |
787 | 0 | extensive_search, |
788 | 0 | ssdec, |
789 | 0 | bit_depth, |
790 | 0 | cpu_feature_level, |
791 | | ); |
792 | | |
793 | 0 | if let Some(pmv) = pmv { |
794 | 0 | best.rd = get_fullpel_mv_rd( |
795 | 0 | po, |
796 | 0 | org_region, |
797 | 0 | p_ref, |
798 | 0 | bit_depth, |
799 | 0 | pmv, |
800 | 0 | lambda, |
801 | 0 | true, |
802 | 0 | mvx_min, |
803 | 0 | mvx_max, |
804 | 0 | mvy_min, |
805 | 0 | mvy_max, |
806 | 0 | w, |
807 | 0 | h, |
808 | 0 | best.mv, |
809 | 0 | cpu_feature_level, |
810 | 0 | ); |
811 | 0 |
|
812 | 0 | sub_pixel_me( |
813 | 0 | fi, |
814 | 0 | po, |
815 | 0 | org_region, |
816 | 0 | p_ref, |
817 | 0 | lambda, |
818 | 0 | pmv, |
819 | 0 | mvx_min, |
820 | 0 | mvx_max, |
821 | 0 | mvy_min, |
822 | 0 | mvy_max, |
823 | 0 | w, |
824 | 0 | h, |
825 | 0 | true, |
826 | 0 | &mut best, |
827 | 0 | ref_frame, |
828 | 0 | bit_depth, |
829 | 0 | cpu_feature_level, |
830 | 0 | ); |
831 | 0 | } |
832 | | |
833 | | // Scale motion vectors to full res size |
834 | 0 | best.mv = best.mv << ssdec; |
835 | | |
836 | 0 | Some(best) |
837 | | } else { |
838 | 0 | None |
839 | | } |
840 | 0 | } Unexecuted instantiation: av_scenechange::analyze::inter::estimate_motion::<u16> Unexecuted instantiation: av_scenechange::analyze::inter::estimate_motion::<u8> Unexecuted instantiation: av_scenechange::analyze::inter::estimate_motion::<_> |
841 | | |
842 | | #[allow(clippy::too_many_arguments)] |
843 | 0 | fn full_pixel_me<T: Pixel>( |
844 | 0 | fi: &FrameInvariants<T>, |
845 | 0 | ts: &TileStateMut<'_, T>, |
846 | 0 | org_region: &PlaneRegion<T>, |
847 | 0 | p_ref: &Plane<T>, |
848 | 0 | tile_bo: TileBlockOffset, |
849 | 0 | po: PlaneOffset, |
850 | 0 | lambda: u32, |
851 | 0 | pmv: [MotionVector; 2], |
852 | 0 | w: usize, |
853 | 0 | h: usize, |
854 | 0 | mvx_min: isize, |
855 | 0 | mvx_max: isize, |
856 | 0 | mvy_min: isize, |
857 | 0 | mvy_max: isize, |
858 | 0 | ref_frame: RefType, |
859 | 0 | corner: MVSamplingMode, |
860 | 0 | extensive_search: bool, |
861 | 0 | ssdec: u8, |
862 | 0 | bit_depth: usize, |
863 | 0 | cpu_feature_level: CpuFeatureLevel, |
864 | 0 | ) -> MotionSearchResult { |
865 | 0 | let ref_frame_id = ref_frame.to_index(); |
866 | 0 | let tile_me_stats = &ts.me_stats[ref_frame_id].as_const(); |
867 | 0 | let frame_ref = fi.rec_buffer.frames[fi.ref_frames[0] as usize] |
868 | 0 | .as_ref() |
869 | 0 | .map(|frame_ref| frame_ref.frame_me_stats.read().expect("poisoned lock"));Unexecuted instantiation: av_scenechange::analyze::inter::full_pixel_me::<u16>::{closure#0}Unexecuted instantiation: av_scenechange::analyze::inter::full_pixel_me::<u8>::{closure#0}Unexecuted instantiation: av_scenechange::analyze::inter::full_pixel_me::<_>::{closure#0} |
870 | 0 | let subsets = get_subset_predictors( |
871 | 0 | tile_bo, |
872 | 0 | tile_me_stats, |
873 | 0 | frame_ref, |
874 | 0 | ref_frame_id, |
875 | 0 | w, |
876 | 0 | h, |
877 | 0 | mvx_min, |
878 | 0 | mvx_max, |
879 | 0 | mvy_min, |
880 | 0 | mvy_max, |
881 | 0 | corner, |
882 | 0 | ssdec, |
883 | | ); |
884 | | |
885 | 0 | let try_cands = |predictors: &[MotionVector], best: &mut MotionSearchResult| { |
886 | 0 | let mut results = get_best_predictor( |
887 | 0 | po, |
888 | 0 | org_region, |
889 | 0 | p_ref, |
890 | 0 | predictors, |
891 | 0 | bit_depth, |
892 | 0 | pmv, |
893 | 0 | lambda, |
894 | 0 | mvx_min, |
895 | 0 | mvx_max, |
896 | 0 | mvy_min, |
897 | 0 | mvy_max, |
898 | 0 | w, |
899 | 0 | h, |
900 | 0 | cpu_feature_level, |
901 | | ); |
902 | 0 | fullpel_diamond_search( |
903 | 0 | po, |
904 | 0 | org_region, |
905 | 0 | p_ref, |
906 | 0 | &mut results, |
907 | 0 | bit_depth, |
908 | 0 | pmv, |
909 | 0 | lambda, |
910 | 0 | mvx_min, |
911 | 0 | mvx_max, |
912 | 0 | mvy_min, |
913 | 0 | mvy_max, |
914 | 0 | w, |
915 | 0 | h, |
916 | 0 | cpu_feature_level, |
917 | | ); |
918 | | |
919 | 0 | if results.rd.cost < best.rd.cost { |
920 | 0 | *best = results; |
921 | 0 | } |
922 | 0 | }; Unexecuted instantiation: av_scenechange::analyze::inter::full_pixel_me::<u16>::{closure#1}Unexecuted instantiation: av_scenechange::analyze::inter::full_pixel_me::<u8>::{closure#1}Unexecuted instantiation: av_scenechange::analyze::inter::full_pixel_me::<_>::{closure#1} |
923 | | |
924 | 0 | let mut best: MotionSearchResult = MotionSearchResult::empty(); |
925 | 0 | if !extensive_search { |
926 | 0 | try_cands(&subsets.all_mvs(), &mut best); |
927 | 0 | best |
928 | | } else { |
929 | | // Perform a more thorough search before resorting to full search. |
930 | | // Search the median, the best mvs of neighboring blocks, and motion vectors |
931 | | // from the previous frame. Stop once a candidate with a sad less than a |
932 | | // threshold is found. |
933 | | |
934 | 0 | let thresh = (subsets.min_sad as f32 * 1.2) as u32 + (((w * h) as u32) << (bit_depth - 8)); |
935 | | |
936 | 0 | if let Some(median) = subsets.median { |
937 | 0 | try_cands(&[median], &mut best); |
938 | | |
939 | 0 | if best.rd.sad < thresh { |
940 | 0 | return best; |
941 | 0 | } |
942 | 0 | } |
943 | | |
944 | 0 | try_cands(&subsets.subset_b, &mut best); |
945 | | |
946 | 0 | if best.rd.sad < thresh { |
947 | 0 | return best; |
948 | 0 | } |
949 | | |
950 | 0 | try_cands(&subsets.subset_c, &mut best); |
951 | | |
952 | 0 | if best.rd.sad < thresh { |
953 | 0 | return best; |
954 | 0 | } |
955 | | |
956 | | // Preform UMH search, either as the last possible search when full search |
957 | | // is disabled, or as the last search before resorting to full search. |
958 | | // Use 24 merange, since it is the largest range that x264 uses. |
959 | 0 | uneven_multi_hex_search( |
960 | 0 | po, |
961 | 0 | org_region, |
962 | 0 | p_ref, |
963 | 0 | &mut best, |
964 | 0 | bit_depth, |
965 | 0 | pmv, |
966 | 0 | lambda, |
967 | 0 | mvx_min, |
968 | 0 | mvx_max, |
969 | 0 | mvy_min, |
970 | 0 | mvy_max, |
971 | 0 | w, |
972 | 0 | h, |
973 | | 24, |
974 | 0 | cpu_feature_level, |
975 | | ); |
976 | | |
977 | 0 | best |
978 | | } |
979 | 0 | } Unexecuted instantiation: av_scenechange::analyze::inter::full_pixel_me::<u16> Unexecuted instantiation: av_scenechange::analyze::inter::full_pixel_me::<u8> Unexecuted instantiation: av_scenechange::analyze::inter::full_pixel_me::<_> |
980 | | |
981 | | #[allow(clippy::too_many_arguments)] |
982 | 0 | fn sub_pixel_me<T: Pixel>( |
983 | 0 | fi: &FrameInvariants<T>, |
984 | 0 | po: PlaneOffset, |
985 | 0 | org_region: &PlaneRegion<T>, |
986 | 0 | p_ref: &Plane<T>, |
987 | 0 | lambda: u32, |
988 | 0 | pmv: [MotionVector; 2], |
989 | 0 | mvx_min: isize, |
990 | 0 | mvx_max: isize, |
991 | 0 | mvy_min: isize, |
992 | 0 | mvy_max: isize, |
993 | 0 | w: usize, |
994 | 0 | h: usize, |
995 | 0 | use_satd: bool, |
996 | 0 | best: &mut MotionSearchResult, |
997 | 0 | ref_frame: RefType, |
998 | 0 | bit_depth: usize, |
999 | 0 | cpu_feature_level: CpuFeatureLevel, |
1000 | 0 | ) { |
1001 | 0 | subpel_diamond_search( |
1002 | 0 | fi, |
1003 | 0 | po, |
1004 | 0 | org_region, |
1005 | 0 | p_ref, |
1006 | 0 | bit_depth, |
1007 | 0 | pmv, |
1008 | 0 | lambda, |
1009 | 0 | mvx_min, |
1010 | 0 | mvx_max, |
1011 | 0 | mvy_min, |
1012 | 0 | mvy_max, |
1013 | 0 | w, |
1014 | 0 | h, |
1015 | 0 | use_satd, |
1016 | 0 | best, |
1017 | 0 | ref_frame, |
1018 | 0 | cpu_feature_level, |
1019 | | ); |
1020 | 0 | } Unexecuted instantiation: av_scenechange::analyze::inter::sub_pixel_me::<u16> Unexecuted instantiation: av_scenechange::analyze::inter::sub_pixel_me::<u8> Unexecuted instantiation: av_scenechange::analyze::inter::sub_pixel_me::<_> |
1021 | | |
1022 | | /// Run a subpixel diamond search. The search is run on multiple step sizes. |
1023 | | /// |
1024 | | /// For each step size, candidate motion vectors are examined for improvement |
1025 | | /// to the current search location. The search location is moved to the best |
1026 | | /// candidate (if any). This is repeated until the search location stops moving. |
1027 | | #[allow(clippy::too_many_arguments)] |
1028 | 0 | fn subpel_diamond_search<T: Pixel>( |
1029 | 0 | fi: &FrameInvariants<T>, |
1030 | 0 | po: PlaneOffset, |
1031 | 0 | org_region: &PlaneRegion<T>, |
1032 | 0 | _p_ref: &Plane<T>, |
1033 | 0 | bit_depth: usize, |
1034 | 0 | pmv: [MotionVector; 2], |
1035 | 0 | lambda: u32, |
1036 | 0 | mvx_min: isize, |
1037 | 0 | mvx_max: isize, |
1038 | 0 | mvy_min: isize, |
1039 | 0 | mvy_max: isize, |
1040 | 0 | w: usize, |
1041 | 0 | h: usize, |
1042 | 0 | use_satd: bool, |
1043 | 0 | current: &mut MotionSearchResult, |
1044 | 0 | ref_frame: RefType, |
1045 | 0 | cpu_feature_level: CpuFeatureLevel, |
1046 | 0 | ) { |
1047 | | // Motion compensation assembly has special requirements for edges |
1048 | 0 | let mc_w = w.next_power_of_two(); |
1049 | 0 | let mc_h = (h + 1) & !1; |
1050 | | |
1051 | | // Metadata for subpel scratch pad. |
1052 | 0 | let cfg = PlaneConfig::new(mc_w, mc_h, 0, 0, 0, 0, std::mem::size_of::<T>()); |
1053 | | // Stack allocation for subpel scratch pad. |
1054 | | // SAFETY: We write to the array below before reading from it. |
1055 | 0 | let mut buf: Aligned<A64, [T; 128 * 128]> = Aligned([T::cast_from(0); 128 * 128]); |
1056 | 0 | let mut tmp_region = PlaneRegionMut::from_slice(buf.as_mut(), &cfg, Rect { |
1057 | 0 | x: 0, |
1058 | 0 | y: 0, |
1059 | 0 | width: cfg.width, |
1060 | 0 | height: cfg.height, |
1061 | 0 | }); |
1062 | | |
1063 | | // start at 1/2 pel and end at 1/4 or 1/8 pel |
1064 | 0 | let (mut diamond_radius_log2, diamond_radius_end_log2) = (2u8, 1u8); |
1065 | | |
1066 | | loop { |
1067 | | // Find the best candidate from the diamond pattern. |
1068 | 0 | let mut best_cand: MotionSearchResult = MotionSearchResult::empty(); |
1069 | 0 | for &offset in &DIAMOND_R1_PATTERN_SUBPEL { |
1070 | 0 | let cand_mv = current.mv + (offset << diamond_radius_log2); |
1071 | | |
1072 | 0 | let rd = get_subpel_mv_rd( |
1073 | 0 | fi, |
1074 | 0 | po, |
1075 | 0 | org_region, |
1076 | 0 | bit_depth, |
1077 | 0 | pmv, |
1078 | 0 | lambda, |
1079 | 0 | use_satd, |
1080 | 0 | mvx_min, |
1081 | 0 | mvx_max, |
1082 | 0 | mvy_min, |
1083 | 0 | mvy_max, |
1084 | 0 | w, |
1085 | 0 | h, |
1086 | 0 | cand_mv, |
1087 | 0 | &mut tmp_region, |
1088 | 0 | ref_frame, |
1089 | 0 | cpu_feature_level, |
1090 | | ); |
1091 | | |
1092 | 0 | if rd.cost < best_cand.rd.cost { |
1093 | 0 | best_cand.mv = cand_mv; |
1094 | 0 | best_cand.rd = rd; |
1095 | 0 | } |
1096 | | } |
1097 | | |
1098 | | // Continue the search at this scale until a better candidate isn't found. |
1099 | 0 | if current.rd.cost <= best_cand.rd.cost { |
1100 | 0 | if diamond_radius_log2 == diamond_radius_end_log2 { |
1101 | 0 | break; |
1102 | 0 | } else { |
1103 | 0 | diamond_radius_log2 -= 1; |
1104 | 0 | } |
1105 | 0 | } else { |
1106 | 0 | *current = best_cand; |
1107 | 0 | } |
1108 | | } |
1109 | | |
1110 | 0 | assert!(!current.is_empty()); |
1111 | 0 | } Unexecuted instantiation: av_scenechange::analyze::inter::subpel_diamond_search::<u16> Unexecuted instantiation: av_scenechange::analyze::inter::subpel_diamond_search::<u8> Unexecuted instantiation: av_scenechange::analyze::inter::subpel_diamond_search::<_> |
1112 | | |
1113 | | #[allow(clippy::too_many_arguments)] |
1114 | 0 | fn get_subpel_mv_rd<T: Pixel>( |
1115 | 0 | fi: &FrameInvariants<T>, |
1116 | 0 | po: PlaneOffset, |
1117 | 0 | org_region: &PlaneRegion<T>, |
1118 | 0 | bit_depth: usize, |
1119 | 0 | pmv: [MotionVector; 2], |
1120 | 0 | lambda: u32, |
1121 | 0 | use_satd: bool, |
1122 | 0 | mvx_min: isize, |
1123 | 0 | mvx_max: isize, |
1124 | 0 | mvy_min: isize, |
1125 | 0 | mvy_max: isize, |
1126 | 0 | w: usize, |
1127 | 0 | h: usize, |
1128 | 0 | cand_mv: MotionVector, |
1129 | 0 | tmp_region: &mut PlaneRegionMut<T>, |
1130 | 0 | ref_frame: RefType, |
1131 | 0 | cpu_feature_level: CpuFeatureLevel, |
1132 | 0 | ) -> MVCandidateRD { |
1133 | 0 | if (cand_mv.col as isize) < mvx_min |
1134 | 0 | || (cand_mv.col as isize) > mvx_max |
1135 | 0 | || (cand_mv.row as isize) < mvy_min |
1136 | 0 | || (cand_mv.row as isize) > mvy_max |
1137 | | { |
1138 | 0 | return MVCandidateRD::empty(); |
1139 | 0 | } |
1140 | | |
1141 | 0 | let tmp_width = tmp_region.rect().width; |
1142 | 0 | let tmp_height = tmp_region.rect().height; |
1143 | 0 | let tile_rect = TileRect { |
1144 | 0 | x: 0, |
1145 | 0 | y: 0, |
1146 | 0 | width: tmp_width, |
1147 | 0 | height: tmp_height, |
1148 | 0 | }; |
1149 | | |
1150 | 0 | PredictionMode::NEWMV.predict_inter_single( |
1151 | 0 | fi, |
1152 | 0 | tile_rect, |
1153 | | 0, |
1154 | 0 | po, |
1155 | 0 | tmp_region, |
1156 | | // motion comp's w & h on edges can be different from distortion's |
1157 | 0 | tmp_width, |
1158 | 0 | tmp_height, |
1159 | 0 | ref_frame, |
1160 | 0 | cand_mv, |
1161 | 0 | bit_depth, |
1162 | 0 | cpu_feature_level, |
1163 | | ); |
1164 | 0 | let plane_ref = tmp_region.as_const(); |
1165 | 0 | compute_mv_rd( |
1166 | 0 | pmv, |
1167 | 0 | lambda, |
1168 | 0 | use_satd, |
1169 | 0 | bit_depth, |
1170 | 0 | w, |
1171 | 0 | h, |
1172 | 0 | cand_mv, |
1173 | 0 | org_region, |
1174 | 0 | &plane_ref, |
1175 | 0 | cpu_feature_level, |
1176 | | ) |
1177 | 0 | } Unexecuted instantiation: av_scenechange::analyze::inter::get_subpel_mv_rd::<u16> Unexecuted instantiation: av_scenechange::analyze::inter::get_subpel_mv_rd::<u8> Unexecuted instantiation: av_scenechange::analyze::inter::get_subpel_mv_rd::<_> |
1178 | | |
1179 | | /// Perform an uneven multi-hexagon search. There are 4 stages: |
1180 | | /// 1. Unsymmetrical-cross search: Search the horizontal and vertical directions |
1181 | | /// for the general direction of the motion. |
1182 | | /// 2. A 5x5 full search is done to refine the current candidate. |
1183 | | /// 3. Uneven multi-hexagon search. See [`UMH_PATTERN`]. |
1184 | | /// 4. Refinement using standard hexagon search. |
1185 | | /// |
1186 | | /// `current` provides the initial search location and serves as |
1187 | | /// the output for the final search results. |
1188 | | /// |
1189 | | /// `me_range` parameter determines how far these stages can search. |
1190 | | #[allow(clippy::too_many_arguments)] |
1191 | 0 | fn uneven_multi_hex_search<T: Pixel>( |
1192 | 0 | po: PlaneOffset, |
1193 | 0 | org_region: &PlaneRegion<T>, |
1194 | 0 | p_ref: &Plane<T>, |
1195 | 0 | current: &mut MotionSearchResult, |
1196 | 0 | bit_depth: usize, |
1197 | 0 | pmv: [MotionVector; 2], |
1198 | 0 | lambda: u32, |
1199 | 0 | mvx_min: isize, |
1200 | 0 | mvx_max: isize, |
1201 | 0 | mvy_min: isize, |
1202 | 0 | mvy_max: isize, |
1203 | 0 | w: usize, |
1204 | 0 | h: usize, |
1205 | 0 | me_range: i16, |
1206 | 0 | cpu_feature_level: CpuFeatureLevel, |
1207 | 0 | ) { |
1208 | 0 | assert!(!current.is_empty()); |
1209 | | |
1210 | | // Search in a cross pattern to obtain a rough approximate of motion. |
1211 | | // The cross is split into a horizontal and vertical component. Video content |
1212 | | // tends to have more horizontal motion, so the horizontal part of the cross |
1213 | | // is twice as large as the vertical half. |
1214 | | // X - |
1215 | | // | <- me_range/2 |
1216 | | // X | |
1217 | | // X X X XoX X X X - |
1218 | | // X |
1219 | | // |
1220 | | // X |
1221 | | // |------| |
1222 | | // \ |
1223 | | // me_range |
1224 | 0 | let center = current.mv; |
1225 | | |
1226 | | // The larger, horizontal, part of the cross search. |
1227 | 0 | for i in (1..=me_range).step_by(2) { |
1228 | | const HORIZONTAL_LINE: [MotionVector; 2] = search_pattern!( |
1229 | | col: [ 0, 0], |
1230 | | row: [-1, 1] |
1231 | | ); |
1232 | | |
1233 | 0 | for &offset in &HORIZONTAL_LINE { |
1234 | 0 | let cand_mv = center + offset * i; |
1235 | 0 | let rd = get_fullpel_mv_rd( |
1236 | 0 | po, |
1237 | 0 | org_region, |
1238 | 0 | p_ref, |
1239 | 0 | bit_depth, |
1240 | 0 | pmv, |
1241 | 0 | lambda, |
1242 | | false, |
1243 | 0 | mvx_min, |
1244 | 0 | mvx_max, |
1245 | 0 | mvy_min, |
1246 | 0 | mvy_max, |
1247 | 0 | w, |
1248 | 0 | h, |
1249 | 0 | cand_mv, |
1250 | 0 | cpu_feature_level, |
1251 | | ); |
1252 | | |
1253 | 0 | if rd.cost < current.rd.cost { |
1254 | 0 | current.mv = cand_mv; |
1255 | 0 | current.rd = rd; |
1256 | 0 | } |
1257 | | } |
1258 | | } |
1259 | | |
1260 | | // The smaller, vertical, part of the cross search |
1261 | 0 | for i in (1..=me_range >> 1).step_by(2) { |
1262 | | const VERTICAL_LINE: [MotionVector; 2] = search_pattern!( |
1263 | | col: [-1, 1], |
1264 | | row: [ 0, 0] |
1265 | | ); |
1266 | | |
1267 | 0 | for &offset in &VERTICAL_LINE { |
1268 | 0 | let cand_mv = center + offset * i; |
1269 | 0 | let rd = get_fullpel_mv_rd( |
1270 | 0 | po, |
1271 | 0 | org_region, |
1272 | 0 | p_ref, |
1273 | 0 | bit_depth, |
1274 | 0 | pmv, |
1275 | 0 | lambda, |
1276 | | false, |
1277 | 0 | mvx_min, |
1278 | 0 | mvx_max, |
1279 | 0 | mvy_min, |
1280 | 0 | mvy_max, |
1281 | 0 | w, |
1282 | 0 | h, |
1283 | 0 | cand_mv, |
1284 | 0 | cpu_feature_level, |
1285 | | ); |
1286 | | |
1287 | 0 | if rd.cost < current.rd.cost { |
1288 | 0 | current.mv = cand_mv; |
1289 | 0 | current.rd = rd; |
1290 | 0 | } |
1291 | | } |
1292 | | } |
1293 | | |
1294 | | // 5x5 full search. Search a 5x5 square region around the current best mv. |
1295 | 0 | let center = current.mv; |
1296 | 0 | for row in -2..=2 { |
1297 | 0 | for col in -2..=2 { |
1298 | 0 | if row == 0 && col == 0 { |
1299 | 0 | continue; |
1300 | 0 | } |
1301 | 0 | let cand_mv = center + MotionVector { row, col }; |
1302 | 0 | let rd = get_fullpel_mv_rd( |
1303 | 0 | po, |
1304 | 0 | org_region, |
1305 | 0 | p_ref, |
1306 | 0 | bit_depth, |
1307 | 0 | pmv, |
1308 | 0 | lambda, |
1309 | | false, |
1310 | 0 | mvx_min, |
1311 | 0 | mvx_max, |
1312 | 0 | mvy_min, |
1313 | 0 | mvy_max, |
1314 | 0 | w, |
1315 | 0 | h, |
1316 | 0 | cand_mv, |
1317 | 0 | cpu_feature_level, |
1318 | | ); |
1319 | | |
1320 | 0 | if rd.cost < current.rd.cost { |
1321 | 0 | current.mv = cand_mv; |
1322 | 0 | current.rd = rd; |
1323 | 0 | } |
1324 | | } |
1325 | | } |
1326 | | |
1327 | | // Run the hexagons in uneven multi-hexagon. The hexagonal pattern is tested |
1328 | | // around the best vector at multiple scales. |
1329 | | // Example of the UMH pattern run on a scale of 1 and 2: |
1330 | | // 2 - |
1331 | | // | <- me_range |
1332 | | // 2 2 | |
1333 | | // | |
1334 | | // 2 1 2 | |
1335 | | // 1 1 | |
1336 | | // 2 1 1 2 | |
1337 | | // 1 1 | |
1338 | | // 2 1 o 1 2 | |
1339 | | // 1 1 | |
1340 | | // 2 1 1 2 | |
1341 | | // 1 1 | |
1342 | | // 2 1 2 | |
1343 | | // | |
1344 | | // 2 2 | |
1345 | | // | |
1346 | | // 2 - |
1347 | | // |---------------| |
1348 | | // \ |
1349 | | // me_range |
1350 | 0 | let center = current.mv; |
1351 | | |
1352 | | // Divide by 4, the radius of the UMH's hexagon. |
1353 | 0 | let iterations = me_range >> 2; |
1354 | 0 | for i in 1..=iterations { |
1355 | 0 | for &offset in &UMH_PATTERN { |
1356 | 0 | let cand_mv = center + offset * i; |
1357 | 0 | let rd = get_fullpel_mv_rd( |
1358 | 0 | po, |
1359 | 0 | org_region, |
1360 | 0 | p_ref, |
1361 | 0 | bit_depth, |
1362 | 0 | pmv, |
1363 | 0 | lambda, |
1364 | | false, |
1365 | 0 | mvx_min, |
1366 | 0 | mvx_max, |
1367 | 0 | mvy_min, |
1368 | 0 | mvy_max, |
1369 | 0 | w, |
1370 | 0 | h, |
1371 | 0 | cand_mv, |
1372 | 0 | cpu_feature_level, |
1373 | | ); |
1374 | | |
1375 | 0 | if rd.cost < current.rd.cost { |
1376 | 0 | current.mv = cand_mv; |
1377 | 0 | current.rd = rd; |
1378 | 0 | } |
1379 | | } |
1380 | | } |
1381 | | |
1382 | | // Refine the search results using a 'normal' hexagon search. |
1383 | 0 | hexagon_search( |
1384 | 0 | po, |
1385 | 0 | org_region, |
1386 | 0 | p_ref, |
1387 | 0 | current, |
1388 | 0 | bit_depth, |
1389 | 0 | pmv, |
1390 | 0 | lambda, |
1391 | 0 | mvx_min, |
1392 | 0 | mvx_max, |
1393 | 0 | mvy_min, |
1394 | 0 | mvy_max, |
1395 | 0 | w, |
1396 | 0 | h, |
1397 | 0 | cpu_feature_level, |
1398 | | ); |
1399 | 0 | } Unexecuted instantiation: av_scenechange::analyze::inter::uneven_multi_hex_search::<u16> Unexecuted instantiation: av_scenechange::analyze::inter::uneven_multi_hex_search::<u8> Unexecuted instantiation: av_scenechange::analyze::inter::uneven_multi_hex_search::<_> |
1400 | | |
1401 | | #[allow(clippy::too_many_arguments)] |
1402 | 0 | fn get_subset_predictors( |
1403 | 0 | tile_bo: TileBlockOffset, |
1404 | 0 | tile_me_stats: &TileMEStats<'_>, |
1405 | 0 | frame_ref_opt: Option<ReadGuardMEStats<'_>>, |
1406 | 0 | ref_frame_id: usize, |
1407 | 0 | pix_w: usize, |
1408 | 0 | pix_h: usize, |
1409 | 0 | mvx_min: isize, |
1410 | 0 | mvx_max: isize, |
1411 | 0 | mvy_min: isize, |
1412 | 0 | mvy_max: isize, |
1413 | 0 | corner: MVSamplingMode, |
1414 | 0 | ssdec: u8, |
1415 | 0 | ) -> MotionEstimationSubsets { |
1416 | 0 | let mut min_sad: u32 = u32::MAX; |
1417 | 0 | let mut subset_b = ArrayVec::<MotionVector, 5>::new(); |
1418 | 0 | let mut subset_c = ArrayVec::<MotionVector, 5>::new(); |
1419 | | |
1420 | | // rounded up width in blocks |
1421 | 0 | let w = ((pix_w << ssdec) + MI_SIZE - 1) >> MI_SIZE_LOG2; |
1422 | 0 | let h = ((pix_h << ssdec) + MI_SIZE - 1) >> MI_SIZE_LOG2; |
1423 | | |
1424 | | // Get predictors from the same frame. |
1425 | | |
1426 | 0 | let clipped_half_w = (w >> 1).min(tile_me_stats.cols() - 1 - tile_bo.0.x); |
1427 | 0 | let clipped_half_h = (h >> 1).min(tile_me_stats.rows() - 1 - tile_bo.0.y); |
1428 | | |
1429 | 0 | let mut process_cand = |stats: MEStats| -> MotionVector { |
1430 | 0 | min_sad = min_sad.min(stats.normalized_sad); |
1431 | 0 | let mv = stats.mv.quantize_to_fullpel(); |
1432 | 0 | MotionVector { |
1433 | 0 | col: clamp(mv.col as isize, mvx_min, mvx_max) as i16, |
1434 | 0 | row: clamp(mv.row as isize, mvy_min, mvy_max) as i16, |
1435 | 0 | } |
1436 | 0 | }; |
1437 | | |
1438 | | // Sample the middle of all block edges bordering this one. |
1439 | | // Note: If motion vectors haven't been precomputed to a given blocksize, then |
1440 | | // the right and bottom edges will be duplicates of the center predictor when |
1441 | | // processing in raster order. |
1442 | | |
1443 | | // left |
1444 | 0 | if tile_bo.0.x > 0 { |
1445 | 0 | subset_b.push(process_cand( |
1446 | 0 | tile_me_stats[tile_bo.0.y + clipped_half_h][tile_bo.0.x - 1], |
1447 | 0 | )); |
1448 | 0 | } |
1449 | | // top |
1450 | 0 | if tile_bo.0.y > 0 { |
1451 | 0 | subset_b.push(process_cand( |
1452 | 0 | tile_me_stats[tile_bo.0.y - 1][tile_bo.0.x + clipped_half_w], |
1453 | 0 | )); |
1454 | 0 | } |
1455 | | |
1456 | | // Sampling far right and far bottom edges was tested, but had worse results |
1457 | | // without an extensive threshold test (with threshold being applied after |
1458 | | // checking median and the best of each subset). |
1459 | | |
1460 | | // right |
1461 | | if let MVSamplingMode::CORNER { |
1462 | | right: true, |
1463 | | bottom: _, |
1464 | 0 | } = corner |
1465 | | { |
1466 | 0 | if tile_bo.0.x + w < tile_me_stats.cols() { |
1467 | 0 | subset_b.push(process_cand( |
1468 | 0 | tile_me_stats[tile_bo.0.y + clipped_half_h][tile_bo.0.x + w], |
1469 | 0 | )); |
1470 | 0 | } |
1471 | 0 | } |
1472 | | // bottom |
1473 | | if let MVSamplingMode::CORNER { |
1474 | | right: _, |
1475 | | bottom: true, |
1476 | 0 | } = corner |
1477 | | { |
1478 | 0 | if tile_bo.0.y + h < tile_me_stats.rows() { |
1479 | 0 | subset_b.push(process_cand( |
1480 | 0 | tile_me_stats[tile_bo.0.y + h][tile_bo.0.x + clipped_half_w], |
1481 | 0 | )); |
1482 | 0 | } |
1483 | 0 | } |
1484 | | |
1485 | 0 | let median = if corner != MVSamplingMode::INIT { |
1486 | | // Sample the center of the current block. |
1487 | 0 | Some(process_cand( |
1488 | 0 | tile_me_stats[tile_bo.0.y + clipped_half_h][tile_bo.0.x + clipped_half_w], |
1489 | 0 | )) |
1490 | 0 | } else if subset_b.len() != 3 { |
1491 | 0 | None |
1492 | | } else { |
1493 | 0 | let mut rows: ArrayVec<i16, 3> = subset_b.iter().map(|&a| a.row).collect(); |
1494 | 0 | let mut cols: ArrayVec<i16, 3> = subset_b.iter().map(|&a| a.col).collect(); |
1495 | 0 | rows.as_mut_slice().sort_unstable(); |
1496 | 0 | cols.as_mut_slice().sort_unstable(); |
1497 | 0 | Some(MotionVector { |
1498 | 0 | row: rows[1], |
1499 | 0 | col: cols[1], |
1500 | 0 | }) |
1501 | | }; |
1502 | | |
1503 | | // Zero motion vector, don't use add_cand since it skips zero vectors. |
1504 | 0 | subset_b.push(MotionVector::default()); |
1505 | | |
1506 | | // EPZS subset C predictors. |
1507 | | // Sample the middle of bordering side of the left, right, top and bottom |
1508 | | // blocks of the previous frame. |
1509 | | // Sample the middle of this block in the previous frame. |
1510 | | |
1511 | 0 | if let Some(frame_me_stats) = frame_ref_opt { |
1512 | 0 | let prev_frame = &frame_me_stats[ref_frame_id]; |
1513 | | |
1514 | 0 | let frame_bo = PlaneBlockOffset(BlockOffset { |
1515 | 0 | x: tile_me_stats.x() + tile_bo.0.x, |
1516 | 0 | y: tile_me_stats.y() + tile_bo.0.y, |
1517 | 0 | }); |
1518 | 0 | let clipped_half_w = (w >> 1).min(prev_frame.cols - 1 - frame_bo.0.x); |
1519 | 0 | let clipped_half_h = (h >> 1).min(prev_frame.rows - 1 - frame_bo.0.y); |
1520 | | |
1521 | | // left |
1522 | 0 | if frame_bo.0.x > 0 { |
1523 | 0 | subset_c.push(process_cand( |
1524 | 0 | prev_frame[frame_bo.0.y + clipped_half_h][frame_bo.0.x - 1], |
1525 | 0 | )); |
1526 | 0 | } |
1527 | | // top |
1528 | 0 | if frame_bo.0.y > 0 { |
1529 | 0 | subset_c.push(process_cand( |
1530 | 0 | prev_frame[frame_bo.0.y - 1][frame_bo.0.x + clipped_half_w], |
1531 | 0 | )); |
1532 | 0 | } |
1533 | | // right |
1534 | 0 | if frame_bo.0.x + w < prev_frame.cols { |
1535 | 0 | subset_c.push(process_cand( |
1536 | 0 | prev_frame[frame_bo.0.y + clipped_half_h][frame_bo.0.x + w], |
1537 | 0 | )); |
1538 | 0 | } |
1539 | | // bottom |
1540 | 0 | if frame_bo.0.y + h < prev_frame.rows { |
1541 | 0 | subset_c.push(process_cand( |
1542 | 0 | prev_frame[frame_bo.0.y + h][frame_bo.0.x + clipped_half_w], |
1543 | 0 | )); |
1544 | 0 | } |
1545 | | |
1546 | 0 | subset_c.push(process_cand( |
1547 | 0 | prev_frame[frame_bo.0.y + clipped_half_h][frame_bo.0.x + clipped_half_w], |
1548 | 0 | )); |
1549 | 0 | } |
1550 | | |
1551 | | // Undo normalization to 128x128 block size |
1552 | 0 | let min_sad = ((min_sad as u64 * (pix_w * pix_h) as u64) >> (MAX_SB_SIZE_LOG2 * 2)) as u32; |
1553 | | |
1554 | 0 | let dec_mv = |mv: MotionVector| MotionVector { |
1555 | 0 | col: mv.col >> ssdec, |
1556 | 0 | row: mv.row >> ssdec, |
1557 | 0 | }; |
1558 | 0 | let median = median.map(dec_mv); |
1559 | 0 | for mv in subset_b.iter_mut() { |
1560 | 0 | *mv = dec_mv(*mv); |
1561 | 0 | } |
1562 | 0 | for mv in subset_c.iter_mut() { |
1563 | 0 | *mv = dec_mv(*mv); |
1564 | 0 | } |
1565 | | |
1566 | 0 | MotionEstimationSubsets { |
1567 | 0 | min_sad, |
1568 | 0 | median, |
1569 | 0 | subset_b, |
1570 | 0 | subset_c, |
1571 | 0 | } |
1572 | 0 | } |
1573 | | |
1574 | | #[allow(clippy::too_many_arguments)] |
1575 | 0 | fn get_best_predictor<T: Pixel>( |
1576 | 0 | po: PlaneOffset, |
1577 | 0 | org_region: &PlaneRegion<T>, |
1578 | 0 | p_ref: &Plane<T>, |
1579 | 0 | predictors: &[MotionVector], |
1580 | 0 | bit_depth: usize, |
1581 | 0 | pmv: [MotionVector; 2], |
1582 | 0 | lambda: u32, |
1583 | 0 | mvx_min: isize, |
1584 | 0 | mvx_max: isize, |
1585 | 0 | mvy_min: isize, |
1586 | 0 | mvy_max: isize, |
1587 | 0 | w: usize, |
1588 | 0 | h: usize, |
1589 | 0 | cpu_feature_level: CpuFeatureLevel, |
1590 | 0 | ) -> MotionSearchResult { |
1591 | 0 | let mut best: MotionSearchResult = MotionSearchResult::empty(); |
1592 | | |
1593 | 0 | for &init_mv in predictors.iter() { |
1594 | 0 | let rd = get_fullpel_mv_rd( |
1595 | 0 | po, |
1596 | 0 | org_region, |
1597 | 0 | p_ref, |
1598 | 0 | bit_depth, |
1599 | 0 | pmv, |
1600 | 0 | lambda, |
1601 | | false, |
1602 | 0 | mvx_min, |
1603 | 0 | mvx_max, |
1604 | 0 | mvy_min, |
1605 | 0 | mvy_max, |
1606 | 0 | w, |
1607 | 0 | h, |
1608 | 0 | init_mv, |
1609 | 0 | cpu_feature_level, |
1610 | | ); |
1611 | | |
1612 | 0 | if rd.cost < best.rd.cost { |
1613 | 0 | best.mv = init_mv; |
1614 | 0 | best.rd = rd; |
1615 | 0 | } |
1616 | | } |
1617 | | |
1618 | 0 | best |
1619 | 0 | } Unexecuted instantiation: av_scenechange::analyze::inter::get_best_predictor::<u16> Unexecuted instantiation: av_scenechange::analyze::inter::get_best_predictor::<u8> Unexecuted instantiation: av_scenechange::analyze::inter::get_best_predictor::<_> |
1620 | | |
1621 | | #[allow(clippy::too_many_arguments)] |
1622 | 0 | fn get_fullpel_mv_rd<T: Pixel>( |
1623 | 0 | po: PlaneOffset, |
1624 | 0 | org_region: &PlaneRegion<T>, |
1625 | 0 | p_ref: &Plane<T>, |
1626 | 0 | bit_depth: usize, |
1627 | 0 | pmv: [MotionVector; 2], |
1628 | 0 | lambda: u32, |
1629 | 0 | use_satd: bool, |
1630 | 0 | mvx_min: isize, |
1631 | 0 | mvx_max: isize, |
1632 | 0 | mvy_min: isize, |
1633 | 0 | mvy_max: isize, |
1634 | 0 | w: usize, |
1635 | 0 | h: usize, |
1636 | 0 | cand_mv: MotionVector, |
1637 | 0 | cpu_feature_level: CpuFeatureLevel, |
1638 | 0 | ) -> MVCandidateRD { |
1639 | 0 | if (cand_mv.col as isize) < mvx_min |
1640 | 0 | || (cand_mv.col as isize) > mvx_max |
1641 | 0 | || (cand_mv.row as isize) < mvy_min |
1642 | 0 | || (cand_mv.row as isize) > mvy_max |
1643 | | { |
1644 | 0 | return MVCandidateRD::empty(); |
1645 | 0 | } |
1646 | | |
1647 | | // Convert the motion vector into an full pixel offset. |
1648 | 0 | let plane_ref = p_ref.region(Area::StartingAt { |
1649 | 0 | x: po.x + (cand_mv.col / 8) as isize, |
1650 | 0 | y: po.y + (cand_mv.row / 8) as isize, |
1651 | 0 | }); |
1652 | 0 | compute_mv_rd( |
1653 | 0 | pmv, |
1654 | 0 | lambda, |
1655 | 0 | use_satd, |
1656 | 0 | bit_depth, |
1657 | 0 | w, |
1658 | 0 | h, |
1659 | 0 | cand_mv, |
1660 | 0 | org_region, |
1661 | 0 | &plane_ref, |
1662 | 0 | cpu_feature_level, |
1663 | | ) |
1664 | 0 | } Unexecuted instantiation: av_scenechange::analyze::inter::get_fullpel_mv_rd::<u16> Unexecuted instantiation: av_scenechange::analyze::inter::get_fullpel_mv_rd::<u8> Unexecuted instantiation: av_scenechange::analyze::inter::get_fullpel_mv_rd::<_> |
1665 | | |
1666 | | /// Perform hexagon search and refine afterwards. |
1667 | | /// |
1668 | | /// In the hexagon search stage, candidate motion vectors are examined for |
1669 | | /// improvement to the current search location. The search location is moved to |
1670 | | /// the best candidate (if any). This is repeated until the search location |
1671 | | /// stops moving. |
1672 | | /// |
1673 | | /// Refinement uses a square pattern that fits between the hexagon candidates. |
1674 | | /// |
1675 | | /// The hexagon pattern is defined by [`HEXAGON_PATTERN`] and the refinement |
1676 | | /// is defined by [`SQUARE_REFINE_PATTERN`]. |
1677 | | /// |
1678 | | /// `current` provides the initial search location and serves as |
1679 | | /// the output for the final search results. |
1680 | | #[allow(clippy::too_many_arguments)] |
1681 | 0 | fn hexagon_search<T: Pixel>( |
1682 | 0 | po: PlaneOffset, |
1683 | 0 | org_region: &PlaneRegion<T>, |
1684 | 0 | p_ref: &Plane<T>, |
1685 | 0 | current: &mut MotionSearchResult, |
1686 | 0 | bit_depth: usize, |
1687 | 0 | pmv: [MotionVector; 2], |
1688 | 0 | lambda: u32, |
1689 | 0 | mvx_min: isize, |
1690 | 0 | mvx_max: isize, |
1691 | 0 | mvy_min: isize, |
1692 | 0 | mvy_max: isize, |
1693 | 0 | w: usize, |
1694 | 0 | h: usize, |
1695 | 0 | cpu_feature_level: CpuFeatureLevel, |
1696 | 0 | ) { |
1697 | | // The first iteration of hexagon search is implemented separate from |
1698 | | // subsequent iterations, which overlap with previous iterations. |
1699 | | |
1700 | | // Holds what candidate is used (if any). This is used to determine which |
1701 | | // candidates have already been tested in a previous iteration and can be |
1702 | | // skipped. |
1703 | 0 | let mut best_cand_idx: usize = 0; |
1704 | 0 | let mut best_cand: MotionSearchResult = MotionSearchResult::empty(); |
1705 | | |
1706 | | // First iteration of hexagon search. There are six candidates to consider. |
1707 | 0 | for (i, &pattern_mv) in HEXAGON_PATTERN.iter().enumerate() { |
1708 | 0 | let cand_mv = current.mv + pattern_mv; |
1709 | 0 | let rd = get_fullpel_mv_rd( |
1710 | 0 | po, |
1711 | 0 | org_region, |
1712 | 0 | p_ref, |
1713 | 0 | bit_depth, |
1714 | 0 | pmv, |
1715 | 0 | lambda, |
1716 | | false, |
1717 | 0 | mvx_min, |
1718 | 0 | mvx_max, |
1719 | 0 | mvy_min, |
1720 | 0 | mvy_max, |
1721 | 0 | w, |
1722 | 0 | h, |
1723 | 0 | cand_mv, |
1724 | 0 | cpu_feature_level, |
1725 | | ); |
1726 | | |
1727 | 0 | if rd.cost < best_cand.rd.cost { |
1728 | 0 | best_cand_idx = i; |
1729 | 0 | best_cand.mv = cand_mv; |
1730 | 0 | best_cand.rd = rd; |
1731 | 0 | } |
1732 | | } |
1733 | | |
1734 | | // Run additional iterations of hexagon search until the search location |
1735 | | // doesn't update. |
1736 | 0 | while best_cand.rd.cost < current.rd.cost { |
1737 | | // Update the search location. |
1738 | 0 | *current = best_cand; |
1739 | 0 | best_cand = MotionSearchResult::empty(); |
1740 | | |
1741 | | // Save the index/direction taken in the previous iteration to the current |
1742 | | // search location. |
1743 | 0 | let center_cand_idx = best_cand_idx; |
1744 | | |
1745 | | // Look only at candidates that don't overlap with previous iterations. This |
1746 | | // corresponds with the three offsets (2D) with the closest direction to |
1747 | | // that traveled by the previous iteration. HEXAGON_PATTERN has clockwise |
1748 | | // order, so the last direction -1, +0, and +1 (mod 6) give the indices for |
1749 | | // these offsets. |
1750 | 0 | for idx_offset_mod6 in 5..=7 { |
1751 | 0 | let i = (center_cand_idx + idx_offset_mod6) % 6; |
1752 | 0 | let cand_mv = current.mv + HEXAGON_PATTERN[i]; |
1753 | | |
1754 | 0 | let rd = get_fullpel_mv_rd( |
1755 | 0 | po, |
1756 | 0 | org_region, |
1757 | 0 | p_ref, |
1758 | 0 | bit_depth, |
1759 | 0 | pmv, |
1760 | 0 | lambda, |
1761 | | false, |
1762 | 0 | mvx_min, |
1763 | 0 | mvx_max, |
1764 | 0 | mvy_min, |
1765 | 0 | mvy_max, |
1766 | 0 | w, |
1767 | 0 | h, |
1768 | 0 | cand_mv, |
1769 | 0 | cpu_feature_level, |
1770 | | ); |
1771 | | |
1772 | 0 | if rd.cost < best_cand.rd.cost { |
1773 | 0 | best_cand_idx = i; |
1774 | 0 | best_cand.mv = cand_mv; |
1775 | 0 | best_cand.rd = rd; |
1776 | 0 | } |
1777 | | } |
1778 | | } |
1779 | | |
1780 | | // Refine the motion after completing hexagon search. |
1781 | 0 | let mut best_cand: MotionSearchResult = MotionSearchResult::empty(); |
1782 | 0 | for &offset in &SQUARE_REFINE_PATTERN { |
1783 | 0 | let cand_mv = current.mv + offset; |
1784 | 0 | let rd = get_fullpel_mv_rd( |
1785 | 0 | po, |
1786 | 0 | org_region, |
1787 | 0 | p_ref, |
1788 | 0 | bit_depth, |
1789 | 0 | pmv, |
1790 | 0 | lambda, |
1791 | | false, |
1792 | 0 | mvx_min, |
1793 | 0 | mvx_max, |
1794 | 0 | mvy_min, |
1795 | 0 | mvy_max, |
1796 | 0 | w, |
1797 | 0 | h, |
1798 | 0 | cand_mv, |
1799 | 0 | cpu_feature_level, |
1800 | | ); |
1801 | | |
1802 | 0 | if rd.cost < best_cand.rd.cost { |
1803 | 0 | best_cand.mv = cand_mv; |
1804 | 0 | best_cand.rd = rd; |
1805 | 0 | } |
1806 | | } |
1807 | 0 | if best_cand.rd.cost < current.rd.cost { |
1808 | 0 | *current = best_cand; |
1809 | 0 | } |
1810 | | |
1811 | 0 | assert!(!current.is_empty()); |
1812 | 0 | } Unexecuted instantiation: av_scenechange::analyze::inter::hexagon_search::<u16> Unexecuted instantiation: av_scenechange::analyze::inter::hexagon_search::<u8> Unexecuted instantiation: av_scenechange::analyze::inter::hexagon_search::<_> |
1813 | | |
1814 | | /// Run a full pixel diamond search. The search is run on multiple step sizes. |
1815 | | /// |
1816 | | /// For each step size, candidate motion vectors are examined for improvement |
1817 | | /// to the current search location. The search location is moved to the best |
1818 | | /// candidate (if any). This is repeated until the search location stops moving. |
1819 | | #[allow(clippy::too_many_arguments)] |
1820 | 0 | fn fullpel_diamond_search<T: Pixel>( |
1821 | 0 | po: PlaneOffset, |
1822 | 0 | org_region: &PlaneRegion<T>, |
1823 | 0 | p_ref: &Plane<T>, |
1824 | 0 | current: &mut MotionSearchResult, |
1825 | 0 | bit_depth: usize, |
1826 | 0 | pmv: [MotionVector; 2], |
1827 | 0 | lambda: u32, |
1828 | 0 | mvx_min: isize, |
1829 | 0 | mvx_max: isize, |
1830 | 0 | mvy_min: isize, |
1831 | 0 | mvy_max: isize, |
1832 | 0 | w: usize, |
1833 | 0 | h: usize, |
1834 | 0 | cpu_feature_level: CpuFeatureLevel, |
1835 | 0 | ) { |
1836 | | // Define the initial and the final scale (log2) of the diamond. |
1837 | 0 | let (mut diamond_radius_log2, diamond_radius_end_log2) = (1u8, 0u8); |
1838 | | |
1839 | | loop { |
1840 | | // Find the best candidate from the diamond pattern. |
1841 | 0 | let mut best_cand: MotionSearchResult = MotionSearchResult::empty(); |
1842 | 0 | for &offset in &DIAMOND_R1_PATTERN { |
1843 | 0 | let cand_mv = current.mv + (offset << diamond_radius_log2); |
1844 | 0 | let rd = get_fullpel_mv_rd( |
1845 | 0 | po, |
1846 | 0 | org_region, |
1847 | 0 | p_ref, |
1848 | 0 | bit_depth, |
1849 | 0 | pmv, |
1850 | 0 | lambda, |
1851 | | false, |
1852 | 0 | mvx_min, |
1853 | 0 | mvx_max, |
1854 | 0 | mvy_min, |
1855 | 0 | mvy_max, |
1856 | 0 | w, |
1857 | 0 | h, |
1858 | 0 | cand_mv, |
1859 | 0 | cpu_feature_level, |
1860 | | ); |
1861 | | |
1862 | 0 | if rd.cost < best_cand.rd.cost { |
1863 | 0 | best_cand.mv = cand_mv; |
1864 | 0 | best_cand.rd = rd; |
1865 | 0 | } |
1866 | | } |
1867 | | |
1868 | | // Continue the search at this scale until the can't find a better candidate |
1869 | | // to use. |
1870 | 0 | if current.rd.cost <= best_cand.rd.cost { |
1871 | 0 | if diamond_radius_log2 == diamond_radius_end_log2 { |
1872 | 0 | break; |
1873 | 0 | } else { |
1874 | 0 | diamond_radius_log2 -= 1; |
1875 | 0 | } |
1876 | 0 | } else { |
1877 | 0 | *current = best_cand; |
1878 | 0 | } |
1879 | | } |
1880 | | |
1881 | 0 | assert!(!current.is_empty()); |
1882 | 0 | } Unexecuted instantiation: av_scenechange::analyze::inter::fullpel_diamond_search::<u16> Unexecuted instantiation: av_scenechange::analyze::inter::fullpel_diamond_search::<u8> Unexecuted instantiation: av_scenechange::analyze::inter::fullpel_diamond_search::<_> |