Coverage Report

Created: 2025-12-05 07:37

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/rust/registry/src/index.crates.io-1949cf8c6b5b557f/av-scenechange-0.14.1/src/analyze/inter.rs
Line
Count
Source
1
use std::sync::Arc;
2
3
use aligned::{Aligned, A64};
4
use arrayvec::ArrayVec;
5
use num_rational::Rational32;
6
use rayon::iter::{IntoParallelIterator, ParallelIterator};
7
use v_frame::{
8
    frame::Frame,
9
    math::{clamp, ILog},
10
    pixel::{ChromaSampling, Pixel},
11
    plane::{Plane, PlaneConfig, PlaneOffset},
12
};
13
14
use super::importance::{
15
    IMPORTANCE_BLOCK_SIZE,
16
    IMP_BLOCK_MV_UNITS_PER_PIXEL,
17
    IMP_BLOCK_SIZE_IN_MV_UNITS,
18
};
19
use crate::{
20
    cpu::CpuFeatureLevel,
21
    data::{
22
        block::{BlockOffset, BlockSize, MIB_SIZE_LOG2},
23
        frame::{FrameInvariants, FrameState, RefType, ALLOWED_REF_FRAMES},
24
        motion::{
25
            MEStats,
26
            MVSamplingMode,
27
            MotionEstimationSubsets,
28
            MotionVector,
29
            ReadGuardMEStats,
30
            RefMEStats,
31
            TileMEStats,
32
            MV_LOW,
33
            MV_UPP,
34
        },
35
        plane::{Area, AsRegion, PlaneBlockOffset, PlaneRegion, PlaneRegionMut, Rect},
36
        prediction::PredictionMode,
37
        sad::get_sad,
38
        satd::get_satd,
39
        superblock::{
40
            SuperBlockOffset,
41
            TileSuperBlockOffset,
42
            MAX_SB_SIZE_LOG2,
43
            MI_SIZE,
44
            MI_SIZE_LOG2,
45
            SB_SIZE,
46
        },
47
        tile::{TileBlockOffset, TileRect, TileStateMut, TilingInfo},
48
    },
49
};
50
51
/// Declares an array of motion vectors in structure of arrays syntax.
52
macro_rules! search_pattern_subpel {
53
    ($field_a:ident: [$($ll_a:expr),*], $field_b:ident: [$($ll_b:expr),*]) => {
54
      [ $(MotionVector { $field_a: $ll_a, $field_b: $ll_b } ),*]
55
    };
56
}
57
58
/// Declares an array of motion vectors in structure of arrays syntax.
59
/// Compared to [`search_pattern_subpel`], this version creates motion vectors
60
/// in fullpel resolution (x8).
61
macro_rules! search_pattern {
62
    ($field_a:ident: [$($ll_a:expr),*], $field_b:ident: [$($ll_b:expr),*]) => {
63
      [ $(MotionVector { $field_a: $ll_a << 3, $field_b: $ll_b << 3 } ),*]
64
    };
65
}
66
67
/// Diamond pattern of radius 1 as shown below. For fullpel search, use
68
/// `DIAMOND_R1_PATTERN_FULLPEL` since it has been scaled for fullpel search.
69
/// ```text
70
///  X
71
/// XoX
72
///  X
73
/// ```
74
/// 'X's are motion candidates and the 'o' is the center.
75
const DIAMOND_R1_PATTERN_SUBPEL: [MotionVector; 4] = search_pattern_subpel!(
76
  col: [  0,  1,  0, -1],
77
  row: [  1,  0, -1,  0]
78
);
79
80
/// Diamond pattern of radius 1 as shown below. Unlike `DIAMOND_R1_PATTERN`, the
81
/// vectors have been shifted fullpel scale.
82
/// ```text
83
///  X
84
/// XoX
85
///  X
86
/// ```
87
/// 'X's are motion candidates and the 'o' is the center.
88
const DIAMOND_R1_PATTERN: [MotionVector; 4] = search_pattern!(
89
  col: [  0,  1,  0, -1],
90
  row: [  1,  0, -1,  0]
91
);
92
93
/// Uneven multi-hexagon search pattern around a center point. Used for locating
94
/// irregular movement.
95
/// ```text
96
///      X
97
///    X   X
98
///  X       X
99
///  X       X
100
///  X   o   X
101
///  X       X
102
///  X       X
103
///    X   X
104
///      X
105
/// ```
106
/// 'X's are motion candidates and the 'o' is the center.
107
const UMH_PATTERN: [MotionVector; 16] = search_pattern!(
108
  col: [ -2, -1,  0,  1,  2,  3,  4,  3,  2,  1,  0, -1, -2,  3, -4, -3],
109
  row: [  4,  4,  4,  4,  4,  2,  0, -2, -4, -4, -4, -4, -4, -2,  0,  2]
110
);
111
112
/// A hexagon pattern around a center point. The pattern is ordered so that the
113
/// offsets circle around the center. This is done to allow pruning locations
114
/// covered by the last iteration.
115
/// ```text
116
///   21012
117
/// 2  X X
118
/// 1
119
/// 0 X o X
120
/// 1
121
/// 2  X X
122
/// ```
123
/// 'X's are motion candidates and the 'o' is the center.
124
///
125
/// The illustration below shows the process of a hexagon search.
126
/// ```text
127
/// Step 1    Step 2
128
///  1 1       1 1 2
129
///
130
/// 1(0)1  => 1 0(1)2
131
///
132
///  1 1       1 1 2
133
/// ```
134
/// The search above has gone through the following steps.
135
/// 1. Search '1' elements for better candidates than the center '0'.
136
/// 2. Recenter around the best candidate ('(1)') and hexagon candidates that
137
///    don't overlap with the previous search step (labeled '2').
138
const HEXAGON_PATTERN: [MotionVector; 6] = search_pattern!(
139
  col: [  0,  2,  2,  0, -2, -2],
140
  row: [ -2, -1,  1,  2,  1, -1]
141
);
142
143
/// A small square pattern around a center point.
144
/// ```text
145
///   101
146
/// 1 XXX
147
/// 0 XoX
148
/// 1 XXX
149
/// ```
150
/// 'X's are motion candidates and the 'o' is the center.
151
const SQUARE_REFINE_PATTERN: [MotionVector; 8] = search_pattern!(
152
  col: [ -1,  0,  1, -1,  1, -1,  0,  1],
153
  row: [  1,  1,  1,  0,  0, -1, -1, -1]
154
);
155
156
0
pub(crate) fn estimate_inter_costs<T: Pixel>(
157
0
    frame: Arc<Frame<T>>,
158
0
    ref_frame: Arc<Frame<T>>,
159
0
    bit_depth: usize,
160
0
    frame_rate: Rational32,
161
0
    chroma_sampling: ChromaSampling,
162
0
    buffer: RefMEStats,
163
0
    cpu_feature_level: CpuFeatureLevel,
164
0
) -> f64 {
165
0
    let last_fi =
166
0
        FrameInvariants::new_key_frame(frame.planes[0].cfg.width, frame.planes[0].cfg.height);
167
0
    let fi = FrameInvariants::new_inter_frame(&last_fi, 1).unwrap();
168
169
    // Compute the motion vectors.
170
0
    let mut fs = FrameState::new_with_frame_and_me_stats_and_rec(Arc::clone(&frame), buffer);
171
0
    let mut tiling = TilingInfo::from_target_tiles(
172
0
        frame.planes[0].cfg.width,
173
0
        frame.planes[0].cfg.height,
174
0
        *frame_rate.numer() as f64 / *frame_rate.denom() as f64,
175
0
        TilingInfo::tile_log2(1, 0).unwrap(),
176
0
        TilingInfo::tile_log2(1, 0).unwrap(),
177
0
        chroma_sampling == ChromaSampling::Cs422,
178
    );
179
0
    compute_motion_vectors(&fi, &mut fs, &mut tiling, bit_depth, cpu_feature_level);
180
181
    // Estimate inter costs
182
0
    let plane_org = &frame.planes[0];
183
0
    let plane_ref = &ref_frame.planes[0];
184
0
    let h_in_imp_b = plane_org.cfg.height / IMPORTANCE_BLOCK_SIZE;
185
0
    let w_in_imp_b = plane_org.cfg.width / IMPORTANCE_BLOCK_SIZE;
186
0
    let stats = &fs.frame_me_stats.read().expect("poisoned lock")[0];
187
0
    let bsize = BlockSize::from_width_and_height(IMPORTANCE_BLOCK_SIZE, IMPORTANCE_BLOCK_SIZE);
188
189
0
    let mut inter_costs = 0;
190
0
    (0..h_in_imp_b).for_each(|y| {
191
0
        (0..w_in_imp_b).for_each(|x| {
192
0
            let mv = stats[y * 2][x * 2].mv;
193
194
            // Coordinates of the top-left corner of the reference block, in MV
195
            // units.
196
0
            let reference_x = x as i64 * IMP_BLOCK_SIZE_IN_MV_UNITS + mv.col as i64;
197
0
            let reference_y = y as i64 * IMP_BLOCK_SIZE_IN_MV_UNITS + mv.row as i64;
198
199
0
            let region_org = plane_org.region(Area::Rect(Rect {
200
0
                x: (x * IMPORTANCE_BLOCK_SIZE) as isize,
201
0
                y: (y * IMPORTANCE_BLOCK_SIZE) as isize,
202
0
                width: IMPORTANCE_BLOCK_SIZE,
203
0
                height: IMPORTANCE_BLOCK_SIZE,
204
0
            }));
205
206
0
            let region_ref = plane_ref.region(Area::Rect(Rect {
207
0
                x: reference_x as isize / IMP_BLOCK_MV_UNITS_PER_PIXEL as isize,
208
0
                y: reference_y as isize / IMP_BLOCK_MV_UNITS_PER_PIXEL as isize,
209
0
                width: IMPORTANCE_BLOCK_SIZE,
210
0
                height: IMPORTANCE_BLOCK_SIZE,
211
0
            }));
212
213
0
            inter_costs += get_satd(
214
0
                &region_org,
215
0
                &region_ref,
216
0
                bsize.width(),
217
0
                bsize.height(),
218
0
                bit_depth,
219
0
                cpu_feature_level,
220
0
            ) as u64;
221
0
        });
Unexecuted instantiation: av_scenechange::analyze::inter::estimate_inter_costs::<u16>::{closure#0}::{closure#0}
Unexecuted instantiation: av_scenechange::analyze::inter::estimate_inter_costs::<u8>::{closure#0}::{closure#0}
Unexecuted instantiation: av_scenechange::analyze::inter::estimate_inter_costs::<_>::{closure#0}::{closure#0}
222
0
    });
Unexecuted instantiation: av_scenechange::analyze::inter::estimate_inter_costs::<u16>::{closure#0}
Unexecuted instantiation: av_scenechange::analyze::inter::estimate_inter_costs::<u8>::{closure#0}
Unexecuted instantiation: av_scenechange::analyze::inter::estimate_inter_costs::<_>::{closure#0}
223
0
    inter_costs as f64 / (w_in_imp_b * h_in_imp_b) as f64
224
0
}
Unexecuted instantiation: av_scenechange::analyze::inter::estimate_inter_costs::<u16>
Unexecuted instantiation: av_scenechange::analyze::inter::estimate_inter_costs::<u8>
Unexecuted instantiation: av_scenechange::analyze::inter::estimate_inter_costs::<_>
225
226
0
fn compute_motion_vectors<T: Pixel>(
227
0
    fi: &FrameInvariants<T>,
228
0
    fs: &mut FrameState<T>,
229
0
    tiling_info: &mut TilingInfo,
230
0
    bit_depth: usize,
231
0
    cpu_feature_level: CpuFeatureLevel,
232
0
) {
233
0
    tiling_info
234
0
        .tile_iter_mut(fs)
235
0
        .collect::<Vec<_>>()
236
0
        .into_par_iter()
237
0
        .for_each(|mut ctx| {
238
0
            let ts = &mut ctx.ts;
239
0
            estimate_tile_motion(fi, ts, bit_depth, cpu_feature_level);
240
0
        });
Unexecuted instantiation: av_scenechange::analyze::inter::compute_motion_vectors::<u16>::{closure#0}
Unexecuted instantiation: av_scenechange::analyze::inter::compute_motion_vectors::<u8>::{closure#0}
Unexecuted instantiation: av_scenechange::analyze::inter::compute_motion_vectors::<_>::{closure#0}
241
0
}
Unexecuted instantiation: av_scenechange::analyze::inter::compute_motion_vectors::<u16>
Unexecuted instantiation: av_scenechange::analyze::inter::compute_motion_vectors::<u8>
Unexecuted instantiation: av_scenechange::analyze::inter::compute_motion_vectors::<_>
242
243
0
fn estimate_tile_motion<T: Pixel>(
244
0
    fi: &FrameInvariants<T>,
245
0
    ts: &mut TileStateMut<'_, T>,
246
0
    bit_depth: usize,
247
0
    cpu_feature_level: CpuFeatureLevel,
248
0
) {
249
0
    let init_size = MIB_SIZE_LOG2;
250
251
0
    let mut prev_ssdec: Option<u8> = None;
252
0
    for mv_size_in_b_log2 in (2..=init_size).rev() {
253
0
        let init = mv_size_in_b_log2 == init_size;
254
255
        // Choose subsampling. Pass one is quarter res and pass two is at half res.
256
0
        let ssdec = match init_size - mv_size_in_b_log2 {
257
0
            0 => 2,
258
0
            1 => 1,
259
0
            _ => 0,
260
        };
261
262
0
        let new_subsampling = if let Some(prev) = prev_ssdec {
263
0
            prev != ssdec
264
        } else {
265
0
            false
266
        };
267
0
        prev_ssdec = Some(ssdec);
268
269
        // 0.5 and 0.125 are a fudge factors
270
0
        let lambda = 0;
271
272
0
        for sby in 0..ts.sb_height {
273
0
            for sbx in 0..ts.sb_width {
274
0
                let mut tested_frames_flags = 0;
275
0
                for &ref_frame in ALLOWED_REF_FRAMES {
276
0
                    let frame_flag = 1 << fi.ref_frames[ref_frame.to_index()];
277
0
                    if tested_frames_flags & frame_flag == frame_flag {
278
0
                        continue;
279
0
                    }
280
0
                    tested_frames_flags |= frame_flag;
281
282
0
                    let tile_bo = TileSuperBlockOffset(SuperBlockOffset { x: sbx, y: sby })
283
0
                        .block_offset(0, 0);
284
285
0
                    if new_subsampling {
286
0
                        refine_subsampled_sb_motion(
287
0
                            fi,
288
0
                            ts,
289
0
                            ref_frame,
290
0
                            mv_size_in_b_log2 + 1,
291
0
                            tile_bo,
292
0
                            ssdec,
293
0
                            lambda,
294
0
                            bit_depth,
295
0
                            cpu_feature_level,
296
0
                        );
297
0
                    }
298
299
0
                    estimate_sb_motion(
300
0
                        fi,
301
0
                        ts,
302
0
                        ref_frame,
303
0
                        mv_size_in_b_log2,
304
0
                        tile_bo,
305
0
                        init,
306
0
                        ssdec,
307
0
                        lambda,
308
0
                        bit_depth,
309
0
                        cpu_feature_level,
310
                    );
311
                }
312
            }
313
        }
314
    }
315
0
}
Unexecuted instantiation: av_scenechange::analyze::inter::estimate_tile_motion::<u16>
Unexecuted instantiation: av_scenechange::analyze::inter::estimate_tile_motion::<u8>
Unexecuted instantiation: av_scenechange::analyze::inter::estimate_tile_motion::<_>
316
317
#[allow(clippy::too_many_arguments)]
318
0
fn refine_subsampled_sb_motion<T: Pixel>(
319
0
    fi: &FrameInvariants<T>,
320
0
    ts: &mut TileStateMut<'_, T>,
321
0
    ref_frame: RefType,
322
0
    mv_size_in_b_log2: usize,
323
0
    tile_bo: TileBlockOffset,
324
0
    ssdec: u8,
325
0
    lambda: u32,
326
0
    bit_depth: usize,
327
0
    cpu_feature_level: CpuFeatureLevel,
328
0
) {
329
0
    let pix_offset = tile_bo.to_luma_plane_offset();
330
0
    let sb_h: usize = SB_SIZE.min(ts.height - pix_offset.y as usize);
331
0
    let sb_w: usize = SB_SIZE.min(ts.width - pix_offset.x as usize);
332
333
0
    let mv_size = MI_SIZE << mv_size_in_b_log2;
334
335
    // Process in blocks, cropping at edges.
336
0
    for y in (0..sb_h).step_by(mv_size) {
337
0
        for x in (0..sb_w).step_by(mv_size) {
338
0
            let sub_bo =
339
0
                tile_bo.with_offset(x as isize >> MI_SIZE_LOG2, y as isize >> MI_SIZE_LOG2);
340
341
            // Clamp to frame edge, rounding up in the case of subsampling.
342
            // The rounding makes some assumptions about how subsampling is done.
343
0
            let w = mv_size.min(sb_w - x + (1 << ssdec) - 1) >> ssdec;
344
0
            let h = mv_size.min(sb_h - y + (1 << ssdec) - 1) >> ssdec;
345
346
            // Refine the existing motion estimate
347
0
            if let Some(results) = refine_subsampled_motion_estimate(
348
0
                fi,
349
0
                ts,
350
0
                w,
351
0
                h,
352
0
                sub_bo,
353
0
                ref_frame,
354
0
                ssdec,
355
0
                lambda,
356
0
                bit_depth,
357
0
                cpu_feature_level,
358
0
            ) {
359
0
                // normalize sad to 128x128 block
360
0
                let sad =
361
0
                    (((results.rd.sad as u64) << (MAX_SB_SIZE_LOG2 * 2)) / (w * h) as u64) as u32;
362
0
                save_me_stats(ts, mv_size_in_b_log2, sub_bo, ref_frame, MEStats {
363
0
                    mv: results.mv,
364
0
                    normalized_sad: sad,
365
0
                });
366
0
            }
367
        }
368
    }
369
0
}
Unexecuted instantiation: av_scenechange::analyze::inter::refine_subsampled_sb_motion::<u16>
Unexecuted instantiation: av_scenechange::analyze::inter::refine_subsampled_sb_motion::<u8>
Unexecuted instantiation: av_scenechange::analyze::inter::refine_subsampled_sb_motion::<_>
370
371
/// Refine motion estimation that was computed one level of subsampling up.
372
#[allow(clippy::too_many_arguments)]
373
0
fn refine_subsampled_motion_estimate<T: Pixel>(
374
0
    fi: &FrameInvariants<T>,
375
0
    ts: &TileStateMut<'_, T>,
376
0
    w: usize,
377
0
    h: usize,
378
0
    tile_bo: TileBlockOffset,
379
0
    ref_frame: RefType,
380
0
    ssdec: u8,
381
0
    lambda: u32,
382
0
    bit_depth: usize,
383
0
    cpu_feature_level: CpuFeatureLevel,
384
0
) -> Option<MotionSearchResult> {
385
0
    if let Some(ref rec) = fi.rec_buffer.frames[fi.ref_frames[ref_frame.to_index()] as usize] {
386
0
        let frame_bo = ts.to_frame_block_offset(tile_bo);
387
0
        let (mvx_min, mvx_max, mvy_min, mvy_max) =
388
0
            get_mv_range(fi.w_in_b, fi.h_in_b, frame_bo, w << ssdec, h << ssdec);
389
390
0
        let pmv = [MotionVector { row: 0, col: 0 }; 2];
391
392
0
        let po = frame_bo.to_luma_plane_offset();
393
0
        let (mvx_min, mvx_max, mvy_min, mvy_max) = (
394
0
            mvx_min >> ssdec,
395
0
            mvx_max >> ssdec,
396
0
            mvy_min >> ssdec,
397
0
            mvy_max >> ssdec,
398
0
        );
399
0
        let po = PlaneOffset {
400
0
            x: po.x >> ssdec,
401
0
            y: po.y >> ssdec,
402
0
        };
403
0
        let p_ref = match ssdec {
404
0
            0 => &rec.frame.planes[0],
405
0
            1 => &rec.input_hres,
406
0
            2 => &rec.input_qres,
407
0
            _ => unimplemented!(),
408
        };
409
410
0
        let org_region = &match ssdec {
411
0
            0 => ts.input_tile.planes[0].subregion(Area::BlockStartingAt { bo: tile_bo.0 }),
412
0
            1 => ts.input_hres.region(Area::StartingAt { x: po.x, y: po.y }),
413
0
            2 => ts.input_qres.region(Area::StartingAt { x: po.x, y: po.y }),
414
0
            _ => unimplemented!(),
415
        };
416
417
0
        let mv = ts.me_stats[ref_frame.to_index()][tile_bo.0.y][tile_bo.0.x].mv >> ssdec;
418
419
        // Given a motion vector at 0 at higher subsampling:
420
        // |  -1   |   0   |   1   |
421
        // then the vectors at -1 to 2 should be tested at the current subsampling.
422
        //      |-------------|
423
        // | -2 -1 |  0  1 |  2  3 |
424
        // This corresponds to a 4x4 full search.
425
0
        let x_lo = po.x + (mv.col as isize / 8 - 1).max(mvx_min / 8);
426
0
        let x_hi = po.x + (mv.col as isize / 8 + 2).min(mvx_max / 8);
427
0
        let y_lo = po.y + (mv.row as isize / 8 - 1).max(mvy_min / 8);
428
0
        let y_hi = po.y + (mv.row as isize / 8 + 2).min(mvy_max / 8);
429
0
        let mut results = full_search(
430
0
            x_lo,
431
0
            x_hi,
432
0
            y_lo,
433
0
            y_hi,
434
0
            w,
435
0
            h,
436
0
            org_region,
437
0
            p_ref,
438
0
            po,
439
            1,
440
0
            lambda,
441
0
            pmv,
442
0
            bit_depth,
443
0
            cpu_feature_level,
444
        );
445
446
        // Scale motion vectors to full res size
447
0
        results.mv = results.mv << ssdec;
448
449
0
        Some(results)
450
    } else {
451
0
        None
452
    }
453
0
}
Unexecuted instantiation: av_scenechange::analyze::inter::refine_subsampled_motion_estimate::<u16>
Unexecuted instantiation: av_scenechange::analyze::inter::refine_subsampled_motion_estimate::<u8>
Unexecuted instantiation: av_scenechange::analyze::inter::refine_subsampled_motion_estimate::<_>
454
455
0
fn get_mv_range(
456
0
    w_in_b: usize,
457
0
    h_in_b: usize,
458
0
    bo: PlaneBlockOffset,
459
0
    blk_w: usize,
460
0
    blk_h: usize,
461
0
) -> (isize, isize, isize, isize) {
462
0
    let border_w = 128 + blk_w as isize * 8;
463
0
    let border_h = 128 + blk_h as isize * 8;
464
0
    let mvx_min = -(bo.0.x as isize) * (8 * MI_SIZE) as isize - border_w;
465
0
    let mvx_max = ((w_in_b - bo.0.x) as isize - (blk_w / MI_SIZE) as isize)
466
0
        * (8 * MI_SIZE) as isize
467
0
        + border_w;
468
0
    let mvy_min = -(bo.0.y as isize) * (8 * MI_SIZE) as isize - border_h;
469
0
    let mvy_max = ((h_in_b - bo.0.y) as isize - (blk_h / MI_SIZE) as isize)
470
0
        * (8 * MI_SIZE) as isize
471
0
        + border_h;
472
473
    // <https://aomediacodec.github.io/av1-spec/#assign-mv-semantics>
474
0
    (
475
0
        mvx_min.max(MV_LOW as isize + 1),
476
0
        mvx_max.min(MV_UPP as isize - 1),
477
0
        mvy_min.max(MV_LOW as isize + 1),
478
0
        mvy_max.min(MV_UPP as isize - 1),
479
0
    )
480
0
}
481
482
#[allow(clippy::too_many_arguments)]
483
0
fn full_search<T: Pixel>(
484
0
    x_lo: isize,
485
0
    x_hi: isize,
486
0
    y_lo: isize,
487
0
    y_hi: isize,
488
0
    w: usize,
489
0
    h: usize,
490
0
    org_region: &PlaneRegion<T>,
491
0
    p_ref: &Plane<T>,
492
0
    po: PlaneOffset,
493
0
    step: usize,
494
0
    lambda: u32,
495
0
    pmv: [MotionVector; 2],
496
0
    bit_depth: usize,
497
0
    cpu_feature_level: CpuFeatureLevel,
498
0
) -> MotionSearchResult {
499
0
    let search_region = p_ref.region(Area::Rect(Rect {
500
0
        x: x_lo,
501
0
        y: y_lo,
502
0
        width: (x_hi - x_lo) as usize + w,
503
0
        height: (y_hi - y_lo) as usize + h,
504
0
    }));
505
506
0
    let mut best: MotionSearchResult = MotionSearchResult::empty();
507
508
    // Select rectangular regions within search region with vert+horz windows
509
0
    for vert_window in search_region.vert_windows(h).step_by(step) {
510
0
        for ref_window in vert_window.horz_windows(w).step_by(step) {
511
0
            let &Rect { x, y, .. } = ref_window.rect();
512
513
0
            let mv = MotionVector {
514
0
                row: 8 * (y as i16 - po.y as i16),
515
0
                col: 8 * (x as i16 - po.x as i16),
516
0
            };
517
518
0
            let rd = compute_mv_rd(
519
0
                pmv,
520
0
                lambda,
521
                false,
522
0
                bit_depth,
523
0
                w,
524
0
                h,
525
0
                mv,
526
0
                org_region,
527
0
                &ref_window,
528
0
                cpu_feature_level,
529
            );
530
531
0
            if rd.cost < best.rd.cost {
532
0
                best.rd = rd;
533
0
                best.mv = mv;
534
0
            }
535
        }
536
    }
537
538
0
    best
539
0
}
Unexecuted instantiation: av_scenechange::analyze::inter::full_search::<u16>
Unexecuted instantiation: av_scenechange::analyze::inter::full_search::<u8>
Unexecuted instantiation: av_scenechange::analyze::inter::full_search::<_>
540
541
/// Compute the rate distortion stats for a motion vector.
542
#[allow(clippy::too_many_arguments)]
543
0
fn compute_mv_rd<T: Pixel>(
544
0
    pmv: [MotionVector; 2],
545
0
    lambda: u32,
546
0
    use_satd: bool,
547
0
    bit_depth: usize,
548
0
    w: usize,
549
0
    h: usize,
550
0
    cand_mv: MotionVector,
551
0
    plane_org: &PlaneRegion<'_, T>,
552
0
    plane_ref: &PlaneRegion<'_, T>,
553
0
    cpu_feature_level: CpuFeatureLevel,
554
0
) -> MVCandidateRD {
555
0
    let sad = if use_satd {
556
0
        get_satd(plane_org, plane_ref, w, h, bit_depth, cpu_feature_level)
557
    } else {
558
0
        get_sad(plane_org, plane_ref, w, h, bit_depth, cpu_feature_level)
559
    };
560
561
0
    let rate1 = get_mv_rate(cand_mv, pmv[0]);
562
0
    let rate2 = get_mv_rate(cand_mv, pmv[1]);
563
0
    let rate = rate1.min(rate2 + 1);
564
565
0
    MVCandidateRD {
566
0
        cost: 256 * sad as u64 + rate as u64 * lambda as u64,
567
0
        sad,
568
0
    }
569
0
}
Unexecuted instantiation: av_scenechange::analyze::inter::compute_mv_rd::<u16>
Unexecuted instantiation: av_scenechange::analyze::inter::compute_mv_rd::<u8>
Unexecuted instantiation: av_scenechange::analyze::inter::compute_mv_rd::<_>
570
571
0
fn diff_to_rate(diff: i16) -> u32 {
572
0
    let d = diff >> 1;
573
0
    2 * ILog::ilog(d.abs()) as u32
574
0
}
575
576
0
fn get_mv_rate(a: MotionVector, b: MotionVector) -> u32 {
577
0
    diff_to_rate(a.row - b.row) + diff_to_rate(a.col - b.col)
578
0
}
579
580
/// Result of motion search.
581
#[derive(Debug, Copy, Clone)]
582
pub struct MotionSearchResult {
583
    /// Motion vector chosen by the motion search.
584
    pub mv: MotionVector,
585
    /// Rate distortion data associated with `mv`.
586
    pub rd: MVCandidateRD,
587
}
588
589
impl MotionSearchResult {
590
    /// Creates an 'empty' value.
591
    ///
592
    /// To be considered empty, cost is set higher than any naturally occurring
593
    /// cost value. The idea is that comparing to any valid rd output, the
594
    /// search result will always be replaced.
595
0
    pub fn empty() -> MotionSearchResult {
596
0
        MotionSearchResult {
597
0
            mv: MotionVector::default(),
598
0
            rd: MVCandidateRD::empty(),
599
0
        }
600
0
    }
601
602
    /// Check if the value should be considered to be empty.
603
0
    const fn is_empty(&self) -> bool {
604
0
        self.rd.cost == u64::MAX
605
0
    }
606
}
607
608
/// Holds data from computing rate distortion of a motion vector.
609
#[derive(Debug, Copy, Clone)]
610
pub struct MVCandidateRD {
611
    /// Rate distortion cost of the motion vector.
612
    pub cost: u64,
613
    /// Distortion metric value for the motion vector.
614
    pub sad: u32,
615
}
616
617
impl MVCandidateRD {
618
    /// Creates an 'empty' value.
619
    ///
620
    /// To be considered empty, cost is set higher than any naturally occurring
621
    /// cost value. The idea is that comparing to any valid rd output, the
622
    /// search result will always be replaced.
623
0
    const fn empty() -> MVCandidateRD {
624
0
        MVCandidateRD {
625
0
            sad: u32::MAX,
626
0
            cost: u64::MAX,
627
0
        }
628
0
    }
629
}
630
631
0
fn save_me_stats<T: Pixel>(
632
0
    ts: &mut TileStateMut<'_, T>,
633
0
    mv_size_in_b_log2: usize,
634
0
    tile_bo: TileBlockOffset,
635
0
    ref_frame: RefType,
636
0
    stats: MEStats,
637
0
) {
638
0
    let size_in_b = 1 << mv_size_in_b_log2;
639
0
    let tile_me_stats = &mut ts.me_stats[ref_frame.to_index()];
640
0
    let tile_bo_x_end = (tile_bo.0.x + size_in_b).min(ts.mi_width);
641
0
    let tile_bo_y_end = (tile_bo.0.y + size_in_b).min(ts.mi_height);
642
0
    for mi_y in tile_bo.0.y..tile_bo_y_end {
643
0
        for a in tile_me_stats[mi_y][tile_bo.0.x..tile_bo_x_end].iter_mut() {
644
0
            *a = stats;
645
0
        }
646
    }
647
0
}
Unexecuted instantiation: av_scenechange::analyze::inter::save_me_stats::<u16>
Unexecuted instantiation: av_scenechange::analyze::inter::save_me_stats::<u8>
Unexecuted instantiation: av_scenechange::analyze::inter::save_me_stats::<_>
648
649
#[allow(clippy::too_many_arguments)]
650
0
fn estimate_sb_motion<T: Pixel>(
651
0
    fi: &FrameInvariants<T>,
652
0
    ts: &mut TileStateMut<'_, T>,
653
0
    ref_frame: RefType,
654
0
    mv_size_in_b_log2: usize,
655
0
    tile_bo: TileBlockOffset,
656
0
    init: bool,
657
0
    ssdec: u8,
658
0
    lambda: u32,
659
0
    bit_depth: usize,
660
0
    cpu_feature_level: CpuFeatureLevel,
661
0
) {
662
0
    let pix_offset = tile_bo.to_luma_plane_offset();
663
0
    let sb_h: usize = SB_SIZE.min(ts.height - pix_offset.y as usize);
664
0
    let sb_w: usize = SB_SIZE.min(ts.width - pix_offset.x as usize);
665
666
0
    let mv_size = MI_SIZE << mv_size_in_b_log2;
667
668
    // Process in blocks, cropping at edges.
669
0
    for y in (0..sb_h).step_by(mv_size) {
670
0
        for x in (0..sb_w).step_by(mv_size) {
671
0
            let corner: MVSamplingMode = if init {
672
0
                MVSamplingMode::INIT
673
            } else {
674
                // Processing the block a size up produces data that can be used by
675
                // the right and bottom corners.
676
0
                MVSamplingMode::CORNER {
677
0
                    right: x & mv_size == mv_size,
678
0
                    bottom: y & mv_size == mv_size,
679
0
                }
680
            };
681
682
0
            let sub_bo =
683
0
                tile_bo.with_offset(x as isize >> MI_SIZE_LOG2, y as isize >> MI_SIZE_LOG2);
684
685
            // Clamp to frame edge, rounding up in the case of subsampling.
686
            // The rounding makes some assumptions about how subsampling is done.
687
0
            let w = mv_size.min(sb_w - x + (1 << ssdec) - 1) >> ssdec;
688
0
            let h = mv_size.min(sb_h - y + (1 << ssdec) - 1) >> ssdec;
689
690
            // Run motion estimation.
691
            // Note that the initial search (init) instructs the called function to
692
            // perform a more extensive search.
693
0
            if let Some(results) = estimate_motion(
694
0
                fi,
695
0
                ts,
696
0
                w,
697
0
                h,
698
0
                sub_bo,
699
0
                ref_frame,
700
0
                None,
701
0
                corner,
702
0
                init,
703
0
                ssdec,
704
0
                Some(lambda),
705
0
                bit_depth,
706
0
                cpu_feature_level,
707
0
            ) {
708
0
                // normalize sad to 128x128 block
709
0
                let sad =
710
0
                    (((results.rd.sad as u64) << (MAX_SB_SIZE_LOG2 * 2)) / (w * h) as u64) as u32;
711
0
                save_me_stats(ts, mv_size_in_b_log2, sub_bo, ref_frame, MEStats {
712
0
                    mv: results.mv,
713
0
                    normalized_sad: sad,
714
0
                });
715
0
            }
716
        }
717
    }
718
0
}
Unexecuted instantiation: av_scenechange::analyze::inter::estimate_sb_motion::<u16>
Unexecuted instantiation: av_scenechange::analyze::inter::estimate_sb_motion::<u8>
Unexecuted instantiation: av_scenechange::analyze::inter::estimate_sb_motion::<_>
719
720
#[allow(clippy::too_many_arguments)]
721
0
fn estimate_motion<T: Pixel>(
722
0
    fi: &FrameInvariants<T>,
723
0
    ts: &TileStateMut<'_, T>,
724
0
    w: usize,
725
0
    h: usize,
726
0
    tile_bo: TileBlockOffset,
727
0
    ref_frame: RefType,
728
0
    pmv: Option<[MotionVector; 2]>,
729
0
    corner: MVSamplingMode,
730
0
    extensive_search: bool,
731
0
    ssdec: u8,
732
0
    lambda: Option<u32>,
733
0
    bit_depth: usize,
734
0
    cpu_feature_level: CpuFeatureLevel,
735
0
) -> Option<MotionSearchResult> {
736
0
    if let Some(ref rec) = fi.rec_buffer.frames[fi.ref_frames[ref_frame.to_index()] as usize] {
737
0
        let frame_bo = ts.to_frame_block_offset(tile_bo);
738
0
        let (mvx_min, mvx_max, mvy_min, mvy_max) =
739
0
            get_mv_range(fi.w_in_b, fi.h_in_b, frame_bo, w << ssdec, h << ssdec);
740
741
0
        let lambda = lambda.unwrap_or(0);
742
743
0
        let global_mv = [MotionVector { row: 0, col: 0 }; 2];
744
745
0
        let po = frame_bo.to_luma_plane_offset();
746
0
        let (mvx_min, mvx_max, mvy_min, mvy_max) = (
747
0
            mvx_min >> ssdec,
748
0
            mvx_max >> ssdec,
749
0
            mvy_min >> ssdec,
750
0
            mvy_max >> ssdec,
751
0
        );
752
0
        let po = PlaneOffset {
753
0
            x: po.x >> ssdec,
754
0
            y: po.y >> ssdec,
755
0
        };
756
0
        let p_ref = match ssdec {
757
0
            0 => &rec.frame.planes[0],
758
0
            1 => &rec.input_hres,
759
0
            2 => &rec.input_qres,
760
0
            _ => unimplemented!(),
761
        };
762
763
0
        let org_region = &match ssdec {
764
0
            0 => ts.input_tile.planes[0].subregion(Area::BlockStartingAt { bo: tile_bo.0 }),
765
0
            1 => ts.input_hres.region(Area::StartingAt { x: po.x, y: po.y }),
766
0
            2 => ts.input_qres.region(Area::StartingAt { x: po.x, y: po.y }),
767
0
            _ => unimplemented!(),
768
        };
769
770
0
        let mut best: MotionSearchResult = full_pixel_me(
771
0
            fi,
772
0
            ts,
773
0
            org_region,
774
0
            p_ref,
775
0
            tile_bo,
776
0
            po,
777
0
            lambda,
778
0
            pmv.unwrap_or(global_mv),
779
0
            w,
780
0
            h,
781
0
            mvx_min,
782
0
            mvx_max,
783
0
            mvy_min,
784
0
            mvy_max,
785
0
            ref_frame,
786
0
            corner,
787
0
            extensive_search,
788
0
            ssdec,
789
0
            bit_depth,
790
0
            cpu_feature_level,
791
        );
792
793
0
        if let Some(pmv) = pmv {
794
0
            best.rd = get_fullpel_mv_rd(
795
0
                po,
796
0
                org_region,
797
0
                p_ref,
798
0
                bit_depth,
799
0
                pmv,
800
0
                lambda,
801
0
                true,
802
0
                mvx_min,
803
0
                mvx_max,
804
0
                mvy_min,
805
0
                mvy_max,
806
0
                w,
807
0
                h,
808
0
                best.mv,
809
0
                cpu_feature_level,
810
0
            );
811
0
812
0
            sub_pixel_me(
813
0
                fi,
814
0
                po,
815
0
                org_region,
816
0
                p_ref,
817
0
                lambda,
818
0
                pmv,
819
0
                mvx_min,
820
0
                mvx_max,
821
0
                mvy_min,
822
0
                mvy_max,
823
0
                w,
824
0
                h,
825
0
                true,
826
0
                &mut best,
827
0
                ref_frame,
828
0
                bit_depth,
829
0
                cpu_feature_level,
830
0
            );
831
0
        }
832
833
        // Scale motion vectors to full res size
834
0
        best.mv = best.mv << ssdec;
835
836
0
        Some(best)
837
    } else {
838
0
        None
839
    }
840
0
}
Unexecuted instantiation: av_scenechange::analyze::inter::estimate_motion::<u16>
Unexecuted instantiation: av_scenechange::analyze::inter::estimate_motion::<u8>
Unexecuted instantiation: av_scenechange::analyze::inter::estimate_motion::<_>
841
842
#[allow(clippy::too_many_arguments)]
843
0
fn full_pixel_me<T: Pixel>(
844
0
    fi: &FrameInvariants<T>,
845
0
    ts: &TileStateMut<'_, T>,
846
0
    org_region: &PlaneRegion<T>,
847
0
    p_ref: &Plane<T>,
848
0
    tile_bo: TileBlockOffset,
849
0
    po: PlaneOffset,
850
0
    lambda: u32,
851
0
    pmv: [MotionVector; 2],
852
0
    w: usize,
853
0
    h: usize,
854
0
    mvx_min: isize,
855
0
    mvx_max: isize,
856
0
    mvy_min: isize,
857
0
    mvy_max: isize,
858
0
    ref_frame: RefType,
859
0
    corner: MVSamplingMode,
860
0
    extensive_search: bool,
861
0
    ssdec: u8,
862
0
    bit_depth: usize,
863
0
    cpu_feature_level: CpuFeatureLevel,
864
0
) -> MotionSearchResult {
865
0
    let ref_frame_id = ref_frame.to_index();
866
0
    let tile_me_stats = &ts.me_stats[ref_frame_id].as_const();
867
0
    let frame_ref = fi.rec_buffer.frames[fi.ref_frames[0] as usize]
868
0
        .as_ref()
869
0
        .map(|frame_ref| frame_ref.frame_me_stats.read().expect("poisoned lock"));
Unexecuted instantiation: av_scenechange::analyze::inter::full_pixel_me::<u16>::{closure#0}
Unexecuted instantiation: av_scenechange::analyze::inter::full_pixel_me::<u8>::{closure#0}
Unexecuted instantiation: av_scenechange::analyze::inter::full_pixel_me::<_>::{closure#0}
870
0
    let subsets = get_subset_predictors(
871
0
        tile_bo,
872
0
        tile_me_stats,
873
0
        frame_ref,
874
0
        ref_frame_id,
875
0
        w,
876
0
        h,
877
0
        mvx_min,
878
0
        mvx_max,
879
0
        mvy_min,
880
0
        mvy_max,
881
0
        corner,
882
0
        ssdec,
883
    );
884
885
0
    let try_cands = |predictors: &[MotionVector], best: &mut MotionSearchResult| {
886
0
        let mut results = get_best_predictor(
887
0
            po,
888
0
            org_region,
889
0
            p_ref,
890
0
            predictors,
891
0
            bit_depth,
892
0
            pmv,
893
0
            lambda,
894
0
            mvx_min,
895
0
            mvx_max,
896
0
            mvy_min,
897
0
            mvy_max,
898
0
            w,
899
0
            h,
900
0
            cpu_feature_level,
901
        );
902
0
        fullpel_diamond_search(
903
0
            po,
904
0
            org_region,
905
0
            p_ref,
906
0
            &mut results,
907
0
            bit_depth,
908
0
            pmv,
909
0
            lambda,
910
0
            mvx_min,
911
0
            mvx_max,
912
0
            mvy_min,
913
0
            mvy_max,
914
0
            w,
915
0
            h,
916
0
            cpu_feature_level,
917
        );
918
919
0
        if results.rd.cost < best.rd.cost {
920
0
            *best = results;
921
0
        }
922
0
    };
Unexecuted instantiation: av_scenechange::analyze::inter::full_pixel_me::<u16>::{closure#1}
Unexecuted instantiation: av_scenechange::analyze::inter::full_pixel_me::<u8>::{closure#1}
Unexecuted instantiation: av_scenechange::analyze::inter::full_pixel_me::<_>::{closure#1}
923
924
0
    let mut best: MotionSearchResult = MotionSearchResult::empty();
925
0
    if !extensive_search {
926
0
        try_cands(&subsets.all_mvs(), &mut best);
927
0
        best
928
    } else {
929
        // Perform a more thorough search before resorting to full search.
930
        // Search the median, the best mvs of neighboring blocks, and motion vectors
931
        // from the previous frame. Stop once a candidate with a sad less than a
932
        // threshold is found.
933
934
0
        let thresh = (subsets.min_sad as f32 * 1.2) as u32 + (((w * h) as u32) << (bit_depth - 8));
935
936
0
        if let Some(median) = subsets.median {
937
0
            try_cands(&[median], &mut best);
938
939
0
            if best.rd.sad < thresh {
940
0
                return best;
941
0
            }
942
0
        }
943
944
0
        try_cands(&subsets.subset_b, &mut best);
945
946
0
        if best.rd.sad < thresh {
947
0
            return best;
948
0
        }
949
950
0
        try_cands(&subsets.subset_c, &mut best);
951
952
0
        if best.rd.sad < thresh {
953
0
            return best;
954
0
        }
955
956
        // Preform UMH search, either as the last possible search when full search
957
        // is disabled, or as the last search before resorting to full search.
958
        // Use 24 merange, since it is the largest range that x264 uses.
959
0
        uneven_multi_hex_search(
960
0
            po,
961
0
            org_region,
962
0
            p_ref,
963
0
            &mut best,
964
0
            bit_depth,
965
0
            pmv,
966
0
            lambda,
967
0
            mvx_min,
968
0
            mvx_max,
969
0
            mvy_min,
970
0
            mvy_max,
971
0
            w,
972
0
            h,
973
            24,
974
0
            cpu_feature_level,
975
        );
976
977
0
        best
978
    }
979
0
}
Unexecuted instantiation: av_scenechange::analyze::inter::full_pixel_me::<u16>
Unexecuted instantiation: av_scenechange::analyze::inter::full_pixel_me::<u8>
Unexecuted instantiation: av_scenechange::analyze::inter::full_pixel_me::<_>
980
981
#[allow(clippy::too_many_arguments)]
982
0
fn sub_pixel_me<T: Pixel>(
983
0
    fi: &FrameInvariants<T>,
984
0
    po: PlaneOffset,
985
0
    org_region: &PlaneRegion<T>,
986
0
    p_ref: &Plane<T>,
987
0
    lambda: u32,
988
0
    pmv: [MotionVector; 2],
989
0
    mvx_min: isize,
990
0
    mvx_max: isize,
991
0
    mvy_min: isize,
992
0
    mvy_max: isize,
993
0
    w: usize,
994
0
    h: usize,
995
0
    use_satd: bool,
996
0
    best: &mut MotionSearchResult,
997
0
    ref_frame: RefType,
998
0
    bit_depth: usize,
999
0
    cpu_feature_level: CpuFeatureLevel,
1000
0
) {
1001
0
    subpel_diamond_search(
1002
0
        fi,
1003
0
        po,
1004
0
        org_region,
1005
0
        p_ref,
1006
0
        bit_depth,
1007
0
        pmv,
1008
0
        lambda,
1009
0
        mvx_min,
1010
0
        mvx_max,
1011
0
        mvy_min,
1012
0
        mvy_max,
1013
0
        w,
1014
0
        h,
1015
0
        use_satd,
1016
0
        best,
1017
0
        ref_frame,
1018
0
        cpu_feature_level,
1019
    );
1020
0
}
Unexecuted instantiation: av_scenechange::analyze::inter::sub_pixel_me::<u16>
Unexecuted instantiation: av_scenechange::analyze::inter::sub_pixel_me::<u8>
Unexecuted instantiation: av_scenechange::analyze::inter::sub_pixel_me::<_>
1021
1022
/// Run a subpixel diamond search. The search is run on multiple step sizes.
1023
///
1024
/// For each step size, candidate motion vectors are examined for improvement
1025
/// to the current search location. The search location is moved to the best
1026
/// candidate (if any). This is repeated until the search location stops moving.
1027
#[allow(clippy::too_many_arguments)]
1028
0
fn subpel_diamond_search<T: Pixel>(
1029
0
    fi: &FrameInvariants<T>,
1030
0
    po: PlaneOffset,
1031
0
    org_region: &PlaneRegion<T>,
1032
0
    _p_ref: &Plane<T>,
1033
0
    bit_depth: usize,
1034
0
    pmv: [MotionVector; 2],
1035
0
    lambda: u32,
1036
0
    mvx_min: isize,
1037
0
    mvx_max: isize,
1038
0
    mvy_min: isize,
1039
0
    mvy_max: isize,
1040
0
    w: usize,
1041
0
    h: usize,
1042
0
    use_satd: bool,
1043
0
    current: &mut MotionSearchResult,
1044
0
    ref_frame: RefType,
1045
0
    cpu_feature_level: CpuFeatureLevel,
1046
0
) {
1047
    // Motion compensation assembly has special requirements for edges
1048
0
    let mc_w = w.next_power_of_two();
1049
0
    let mc_h = (h + 1) & !1;
1050
1051
    // Metadata for subpel scratch pad.
1052
0
    let cfg = PlaneConfig::new(mc_w, mc_h, 0, 0, 0, 0, std::mem::size_of::<T>());
1053
    // Stack allocation for subpel scratch pad.
1054
    // SAFETY: We write to the array below before reading from it.
1055
0
    let mut buf: Aligned<A64, [T; 128 * 128]> = Aligned([T::cast_from(0); 128 * 128]);
1056
0
    let mut tmp_region = PlaneRegionMut::from_slice(buf.as_mut(), &cfg, Rect {
1057
0
        x: 0,
1058
0
        y: 0,
1059
0
        width: cfg.width,
1060
0
        height: cfg.height,
1061
0
    });
1062
1063
    // start at 1/2 pel and end at 1/4 or 1/8 pel
1064
0
    let (mut diamond_radius_log2, diamond_radius_end_log2) = (2u8, 1u8);
1065
1066
    loop {
1067
        // Find the best candidate from the diamond pattern.
1068
0
        let mut best_cand: MotionSearchResult = MotionSearchResult::empty();
1069
0
        for &offset in &DIAMOND_R1_PATTERN_SUBPEL {
1070
0
            let cand_mv = current.mv + (offset << diamond_radius_log2);
1071
1072
0
            let rd = get_subpel_mv_rd(
1073
0
                fi,
1074
0
                po,
1075
0
                org_region,
1076
0
                bit_depth,
1077
0
                pmv,
1078
0
                lambda,
1079
0
                use_satd,
1080
0
                mvx_min,
1081
0
                mvx_max,
1082
0
                mvy_min,
1083
0
                mvy_max,
1084
0
                w,
1085
0
                h,
1086
0
                cand_mv,
1087
0
                &mut tmp_region,
1088
0
                ref_frame,
1089
0
                cpu_feature_level,
1090
            );
1091
1092
0
            if rd.cost < best_cand.rd.cost {
1093
0
                best_cand.mv = cand_mv;
1094
0
                best_cand.rd = rd;
1095
0
            }
1096
        }
1097
1098
        // Continue the search at this scale until a better candidate isn't found.
1099
0
        if current.rd.cost <= best_cand.rd.cost {
1100
0
            if diamond_radius_log2 == diamond_radius_end_log2 {
1101
0
                break;
1102
0
            } else {
1103
0
                diamond_radius_log2 -= 1;
1104
0
            }
1105
0
        } else {
1106
0
            *current = best_cand;
1107
0
        }
1108
    }
1109
1110
0
    assert!(!current.is_empty());
1111
0
}
Unexecuted instantiation: av_scenechange::analyze::inter::subpel_diamond_search::<u16>
Unexecuted instantiation: av_scenechange::analyze::inter::subpel_diamond_search::<u8>
Unexecuted instantiation: av_scenechange::analyze::inter::subpel_diamond_search::<_>
1112
1113
#[allow(clippy::too_many_arguments)]
1114
0
fn get_subpel_mv_rd<T: Pixel>(
1115
0
    fi: &FrameInvariants<T>,
1116
0
    po: PlaneOffset,
1117
0
    org_region: &PlaneRegion<T>,
1118
0
    bit_depth: usize,
1119
0
    pmv: [MotionVector; 2],
1120
0
    lambda: u32,
1121
0
    use_satd: bool,
1122
0
    mvx_min: isize,
1123
0
    mvx_max: isize,
1124
0
    mvy_min: isize,
1125
0
    mvy_max: isize,
1126
0
    w: usize,
1127
0
    h: usize,
1128
0
    cand_mv: MotionVector,
1129
0
    tmp_region: &mut PlaneRegionMut<T>,
1130
0
    ref_frame: RefType,
1131
0
    cpu_feature_level: CpuFeatureLevel,
1132
0
) -> MVCandidateRD {
1133
0
    if (cand_mv.col as isize) < mvx_min
1134
0
        || (cand_mv.col as isize) > mvx_max
1135
0
        || (cand_mv.row as isize) < mvy_min
1136
0
        || (cand_mv.row as isize) > mvy_max
1137
    {
1138
0
        return MVCandidateRD::empty();
1139
0
    }
1140
1141
0
    let tmp_width = tmp_region.rect().width;
1142
0
    let tmp_height = tmp_region.rect().height;
1143
0
    let tile_rect = TileRect {
1144
0
        x: 0,
1145
0
        y: 0,
1146
0
        width: tmp_width,
1147
0
        height: tmp_height,
1148
0
    };
1149
1150
0
    PredictionMode::NEWMV.predict_inter_single(
1151
0
        fi,
1152
0
        tile_rect,
1153
        0,
1154
0
        po,
1155
0
        tmp_region,
1156
        // motion comp's w & h on edges can be different from distortion's
1157
0
        tmp_width,
1158
0
        tmp_height,
1159
0
        ref_frame,
1160
0
        cand_mv,
1161
0
        bit_depth,
1162
0
        cpu_feature_level,
1163
    );
1164
0
    let plane_ref = tmp_region.as_const();
1165
0
    compute_mv_rd(
1166
0
        pmv,
1167
0
        lambda,
1168
0
        use_satd,
1169
0
        bit_depth,
1170
0
        w,
1171
0
        h,
1172
0
        cand_mv,
1173
0
        org_region,
1174
0
        &plane_ref,
1175
0
        cpu_feature_level,
1176
    )
1177
0
}
Unexecuted instantiation: av_scenechange::analyze::inter::get_subpel_mv_rd::<u16>
Unexecuted instantiation: av_scenechange::analyze::inter::get_subpel_mv_rd::<u8>
Unexecuted instantiation: av_scenechange::analyze::inter::get_subpel_mv_rd::<_>
1178
1179
/// Perform an uneven multi-hexagon search. There are 4 stages:
1180
/// 1. Unsymmetrical-cross search: Search the horizontal and vertical directions
1181
///    for the general direction of the motion.
1182
/// 2. A 5x5 full search is done to refine the current candidate.
1183
/// 3. Uneven multi-hexagon search. See [`UMH_PATTERN`].
1184
/// 4. Refinement using standard hexagon search.
1185
///
1186
/// `current` provides the initial search location and serves as
1187
/// the output for the final search results.
1188
///
1189
/// `me_range` parameter determines how far these stages can search.
1190
#[allow(clippy::too_many_arguments)]
1191
0
fn uneven_multi_hex_search<T: Pixel>(
1192
0
    po: PlaneOffset,
1193
0
    org_region: &PlaneRegion<T>,
1194
0
    p_ref: &Plane<T>,
1195
0
    current: &mut MotionSearchResult,
1196
0
    bit_depth: usize,
1197
0
    pmv: [MotionVector; 2],
1198
0
    lambda: u32,
1199
0
    mvx_min: isize,
1200
0
    mvx_max: isize,
1201
0
    mvy_min: isize,
1202
0
    mvy_max: isize,
1203
0
    w: usize,
1204
0
    h: usize,
1205
0
    me_range: i16,
1206
0
    cpu_feature_level: CpuFeatureLevel,
1207
0
) {
1208
0
    assert!(!current.is_empty());
1209
1210
    // Search in a cross pattern to obtain a rough approximate of motion.
1211
    // The cross is split into a horizontal and vertical component. Video content
1212
    // tends to have more horizontal motion, so the horizontal part of the cross
1213
    // is twice as large as the vertical half.
1214
    //        X        -
1215
    //                 | <- me_range/2
1216
    //        X        |
1217
    // X X X XoX X X X -
1218
    //        X
1219
    //
1220
    //        X
1221
    // |------|
1222
    //     \
1223
    //    me_range
1224
0
    let center = current.mv;
1225
1226
    // The larger, horizontal, part of the cross search.
1227
0
    for i in (1..=me_range).step_by(2) {
1228
        const HORIZONTAL_LINE: [MotionVector; 2] = search_pattern!(
1229
          col: [ 0, 0],
1230
          row: [-1, 1]
1231
        );
1232
1233
0
        for &offset in &HORIZONTAL_LINE {
1234
0
            let cand_mv = center + offset * i;
1235
0
            let rd = get_fullpel_mv_rd(
1236
0
                po,
1237
0
                org_region,
1238
0
                p_ref,
1239
0
                bit_depth,
1240
0
                pmv,
1241
0
                lambda,
1242
                false,
1243
0
                mvx_min,
1244
0
                mvx_max,
1245
0
                mvy_min,
1246
0
                mvy_max,
1247
0
                w,
1248
0
                h,
1249
0
                cand_mv,
1250
0
                cpu_feature_level,
1251
            );
1252
1253
0
            if rd.cost < current.rd.cost {
1254
0
                current.mv = cand_mv;
1255
0
                current.rd = rd;
1256
0
            }
1257
        }
1258
    }
1259
1260
    // The smaller, vertical, part of the cross search
1261
0
    for i in (1..=me_range >> 1).step_by(2) {
1262
        const VERTICAL_LINE: [MotionVector; 2] = search_pattern!(
1263
          col: [-1, 1],
1264
          row: [ 0, 0]
1265
        );
1266
1267
0
        for &offset in &VERTICAL_LINE {
1268
0
            let cand_mv = center + offset * i;
1269
0
            let rd = get_fullpel_mv_rd(
1270
0
                po,
1271
0
                org_region,
1272
0
                p_ref,
1273
0
                bit_depth,
1274
0
                pmv,
1275
0
                lambda,
1276
                false,
1277
0
                mvx_min,
1278
0
                mvx_max,
1279
0
                mvy_min,
1280
0
                mvy_max,
1281
0
                w,
1282
0
                h,
1283
0
                cand_mv,
1284
0
                cpu_feature_level,
1285
            );
1286
1287
0
            if rd.cost < current.rd.cost {
1288
0
                current.mv = cand_mv;
1289
0
                current.rd = rd;
1290
0
            }
1291
        }
1292
    }
1293
1294
    // 5x5 full search. Search a 5x5 square region around the current best mv.
1295
0
    let center = current.mv;
1296
0
    for row in -2..=2 {
1297
0
        for col in -2..=2 {
1298
0
            if row == 0 && col == 0 {
1299
0
                continue;
1300
0
            }
1301
0
            let cand_mv = center + MotionVector { row, col };
1302
0
            let rd = get_fullpel_mv_rd(
1303
0
                po,
1304
0
                org_region,
1305
0
                p_ref,
1306
0
                bit_depth,
1307
0
                pmv,
1308
0
                lambda,
1309
                false,
1310
0
                mvx_min,
1311
0
                mvx_max,
1312
0
                mvy_min,
1313
0
                mvy_max,
1314
0
                w,
1315
0
                h,
1316
0
                cand_mv,
1317
0
                cpu_feature_level,
1318
            );
1319
1320
0
            if rd.cost < current.rd.cost {
1321
0
                current.mv = cand_mv;
1322
0
                current.rd = rd;
1323
0
            }
1324
        }
1325
    }
1326
1327
    // Run the hexagons in uneven multi-hexagon. The hexagonal pattern is tested
1328
    // around the best vector at multiple scales.
1329
    // Example of the UMH pattern run on a scale of 1 and 2:
1330
    //         2         -
1331
    //                   | <- me_range
1332
    //     2       2     |
1333
    //                   |
1334
    // 2       1       2 |
1335
    //       1   1       |
1336
    // 2   1       1   2 |
1337
    //     1       1     |
1338
    // 2   1   o   1   2 |
1339
    //     1       1     |
1340
    // 2   1       1   2 |
1341
    //       1   1       |
1342
    // 2       1       2 |
1343
    //                   |
1344
    //     2       2     |
1345
    //                   |
1346
    //         2         -
1347
    // |---------------|
1348
    //        \
1349
    //       me_range
1350
0
    let center = current.mv;
1351
1352
    // Divide by 4, the radius of the UMH's hexagon.
1353
0
    let iterations = me_range >> 2;
1354
0
    for i in 1..=iterations {
1355
0
        for &offset in &UMH_PATTERN {
1356
0
            let cand_mv = center + offset * i;
1357
0
            let rd = get_fullpel_mv_rd(
1358
0
                po,
1359
0
                org_region,
1360
0
                p_ref,
1361
0
                bit_depth,
1362
0
                pmv,
1363
0
                lambda,
1364
                false,
1365
0
                mvx_min,
1366
0
                mvx_max,
1367
0
                mvy_min,
1368
0
                mvy_max,
1369
0
                w,
1370
0
                h,
1371
0
                cand_mv,
1372
0
                cpu_feature_level,
1373
            );
1374
1375
0
            if rd.cost < current.rd.cost {
1376
0
                current.mv = cand_mv;
1377
0
                current.rd = rd;
1378
0
            }
1379
        }
1380
    }
1381
1382
    // Refine the search results using a 'normal' hexagon search.
1383
0
    hexagon_search(
1384
0
        po,
1385
0
        org_region,
1386
0
        p_ref,
1387
0
        current,
1388
0
        bit_depth,
1389
0
        pmv,
1390
0
        lambda,
1391
0
        mvx_min,
1392
0
        mvx_max,
1393
0
        mvy_min,
1394
0
        mvy_max,
1395
0
        w,
1396
0
        h,
1397
0
        cpu_feature_level,
1398
    );
1399
0
}
Unexecuted instantiation: av_scenechange::analyze::inter::uneven_multi_hex_search::<u16>
Unexecuted instantiation: av_scenechange::analyze::inter::uneven_multi_hex_search::<u8>
Unexecuted instantiation: av_scenechange::analyze::inter::uneven_multi_hex_search::<_>
1400
1401
#[allow(clippy::too_many_arguments)]
1402
0
fn get_subset_predictors(
1403
0
    tile_bo: TileBlockOffset,
1404
0
    tile_me_stats: &TileMEStats<'_>,
1405
0
    frame_ref_opt: Option<ReadGuardMEStats<'_>>,
1406
0
    ref_frame_id: usize,
1407
0
    pix_w: usize,
1408
0
    pix_h: usize,
1409
0
    mvx_min: isize,
1410
0
    mvx_max: isize,
1411
0
    mvy_min: isize,
1412
0
    mvy_max: isize,
1413
0
    corner: MVSamplingMode,
1414
0
    ssdec: u8,
1415
0
) -> MotionEstimationSubsets {
1416
0
    let mut min_sad: u32 = u32::MAX;
1417
0
    let mut subset_b = ArrayVec::<MotionVector, 5>::new();
1418
0
    let mut subset_c = ArrayVec::<MotionVector, 5>::new();
1419
1420
    // rounded up width in blocks
1421
0
    let w = ((pix_w << ssdec) + MI_SIZE - 1) >> MI_SIZE_LOG2;
1422
0
    let h = ((pix_h << ssdec) + MI_SIZE - 1) >> MI_SIZE_LOG2;
1423
1424
    // Get predictors from the same frame.
1425
1426
0
    let clipped_half_w = (w >> 1).min(tile_me_stats.cols() - 1 - tile_bo.0.x);
1427
0
    let clipped_half_h = (h >> 1).min(tile_me_stats.rows() - 1 - tile_bo.0.y);
1428
1429
0
    let mut process_cand = |stats: MEStats| -> MotionVector {
1430
0
        min_sad = min_sad.min(stats.normalized_sad);
1431
0
        let mv = stats.mv.quantize_to_fullpel();
1432
0
        MotionVector {
1433
0
            col: clamp(mv.col as isize, mvx_min, mvx_max) as i16,
1434
0
            row: clamp(mv.row as isize, mvy_min, mvy_max) as i16,
1435
0
        }
1436
0
    };
1437
1438
    // Sample the middle of all block edges bordering this one.
1439
    // Note: If motion vectors haven't been precomputed to a given blocksize, then
1440
    // the right and bottom edges will be duplicates of the center predictor when
1441
    // processing in raster order.
1442
1443
    // left
1444
0
    if tile_bo.0.x > 0 {
1445
0
        subset_b.push(process_cand(
1446
0
            tile_me_stats[tile_bo.0.y + clipped_half_h][tile_bo.0.x - 1],
1447
0
        ));
1448
0
    }
1449
    // top
1450
0
    if tile_bo.0.y > 0 {
1451
0
        subset_b.push(process_cand(
1452
0
            tile_me_stats[tile_bo.0.y - 1][tile_bo.0.x + clipped_half_w],
1453
0
        ));
1454
0
    }
1455
1456
    // Sampling far right and far bottom edges was tested, but had worse results
1457
    // without an extensive threshold test (with threshold being applied after
1458
    // checking median and the best of each subset).
1459
1460
    // right
1461
    if let MVSamplingMode::CORNER {
1462
        right: true,
1463
        bottom: _,
1464
0
    } = corner
1465
    {
1466
0
        if tile_bo.0.x + w < tile_me_stats.cols() {
1467
0
            subset_b.push(process_cand(
1468
0
                tile_me_stats[tile_bo.0.y + clipped_half_h][tile_bo.0.x + w],
1469
0
            ));
1470
0
        }
1471
0
    }
1472
    // bottom
1473
    if let MVSamplingMode::CORNER {
1474
        right: _,
1475
        bottom: true,
1476
0
    } = corner
1477
    {
1478
0
        if tile_bo.0.y + h < tile_me_stats.rows() {
1479
0
            subset_b.push(process_cand(
1480
0
                tile_me_stats[tile_bo.0.y + h][tile_bo.0.x + clipped_half_w],
1481
0
            ));
1482
0
        }
1483
0
    }
1484
1485
0
    let median = if corner != MVSamplingMode::INIT {
1486
        // Sample the center of the current block.
1487
0
        Some(process_cand(
1488
0
            tile_me_stats[tile_bo.0.y + clipped_half_h][tile_bo.0.x + clipped_half_w],
1489
0
        ))
1490
0
    } else if subset_b.len() != 3 {
1491
0
        None
1492
    } else {
1493
0
        let mut rows: ArrayVec<i16, 3> = subset_b.iter().map(|&a| a.row).collect();
1494
0
        let mut cols: ArrayVec<i16, 3> = subset_b.iter().map(|&a| a.col).collect();
1495
0
        rows.as_mut_slice().sort_unstable();
1496
0
        cols.as_mut_slice().sort_unstable();
1497
0
        Some(MotionVector {
1498
0
            row: rows[1],
1499
0
            col: cols[1],
1500
0
        })
1501
    };
1502
1503
    // Zero motion vector, don't use add_cand since it skips zero vectors.
1504
0
    subset_b.push(MotionVector::default());
1505
1506
    // EPZS subset C predictors.
1507
    // Sample the middle of bordering side of the left, right, top and bottom
1508
    // blocks of the previous frame.
1509
    // Sample the middle of this block in the previous frame.
1510
1511
0
    if let Some(frame_me_stats) = frame_ref_opt {
1512
0
        let prev_frame = &frame_me_stats[ref_frame_id];
1513
1514
0
        let frame_bo = PlaneBlockOffset(BlockOffset {
1515
0
            x: tile_me_stats.x() + tile_bo.0.x,
1516
0
            y: tile_me_stats.y() + tile_bo.0.y,
1517
0
        });
1518
0
        let clipped_half_w = (w >> 1).min(prev_frame.cols - 1 - frame_bo.0.x);
1519
0
        let clipped_half_h = (h >> 1).min(prev_frame.rows - 1 - frame_bo.0.y);
1520
1521
        // left
1522
0
        if frame_bo.0.x > 0 {
1523
0
            subset_c.push(process_cand(
1524
0
                prev_frame[frame_bo.0.y + clipped_half_h][frame_bo.0.x - 1],
1525
0
            ));
1526
0
        }
1527
        // top
1528
0
        if frame_bo.0.y > 0 {
1529
0
            subset_c.push(process_cand(
1530
0
                prev_frame[frame_bo.0.y - 1][frame_bo.0.x + clipped_half_w],
1531
0
            ));
1532
0
        }
1533
        // right
1534
0
        if frame_bo.0.x + w < prev_frame.cols {
1535
0
            subset_c.push(process_cand(
1536
0
                prev_frame[frame_bo.0.y + clipped_half_h][frame_bo.0.x + w],
1537
0
            ));
1538
0
        }
1539
        // bottom
1540
0
        if frame_bo.0.y + h < prev_frame.rows {
1541
0
            subset_c.push(process_cand(
1542
0
                prev_frame[frame_bo.0.y + h][frame_bo.0.x + clipped_half_w],
1543
0
            ));
1544
0
        }
1545
1546
0
        subset_c.push(process_cand(
1547
0
            prev_frame[frame_bo.0.y + clipped_half_h][frame_bo.0.x + clipped_half_w],
1548
0
        ));
1549
0
    }
1550
1551
    // Undo normalization to 128x128 block size
1552
0
    let min_sad = ((min_sad as u64 * (pix_w * pix_h) as u64) >> (MAX_SB_SIZE_LOG2 * 2)) as u32;
1553
1554
0
    let dec_mv = |mv: MotionVector| MotionVector {
1555
0
        col: mv.col >> ssdec,
1556
0
        row: mv.row >> ssdec,
1557
0
    };
1558
0
    let median = median.map(dec_mv);
1559
0
    for mv in subset_b.iter_mut() {
1560
0
        *mv = dec_mv(*mv);
1561
0
    }
1562
0
    for mv in subset_c.iter_mut() {
1563
0
        *mv = dec_mv(*mv);
1564
0
    }
1565
1566
0
    MotionEstimationSubsets {
1567
0
        min_sad,
1568
0
        median,
1569
0
        subset_b,
1570
0
        subset_c,
1571
0
    }
1572
0
}
1573
1574
#[allow(clippy::too_many_arguments)]
1575
0
fn get_best_predictor<T: Pixel>(
1576
0
    po: PlaneOffset,
1577
0
    org_region: &PlaneRegion<T>,
1578
0
    p_ref: &Plane<T>,
1579
0
    predictors: &[MotionVector],
1580
0
    bit_depth: usize,
1581
0
    pmv: [MotionVector; 2],
1582
0
    lambda: u32,
1583
0
    mvx_min: isize,
1584
0
    mvx_max: isize,
1585
0
    mvy_min: isize,
1586
0
    mvy_max: isize,
1587
0
    w: usize,
1588
0
    h: usize,
1589
0
    cpu_feature_level: CpuFeatureLevel,
1590
0
) -> MotionSearchResult {
1591
0
    let mut best: MotionSearchResult = MotionSearchResult::empty();
1592
1593
0
    for &init_mv in predictors.iter() {
1594
0
        let rd = get_fullpel_mv_rd(
1595
0
            po,
1596
0
            org_region,
1597
0
            p_ref,
1598
0
            bit_depth,
1599
0
            pmv,
1600
0
            lambda,
1601
            false,
1602
0
            mvx_min,
1603
0
            mvx_max,
1604
0
            mvy_min,
1605
0
            mvy_max,
1606
0
            w,
1607
0
            h,
1608
0
            init_mv,
1609
0
            cpu_feature_level,
1610
        );
1611
1612
0
        if rd.cost < best.rd.cost {
1613
0
            best.mv = init_mv;
1614
0
            best.rd = rd;
1615
0
        }
1616
    }
1617
1618
0
    best
1619
0
}
Unexecuted instantiation: av_scenechange::analyze::inter::get_best_predictor::<u16>
Unexecuted instantiation: av_scenechange::analyze::inter::get_best_predictor::<u8>
Unexecuted instantiation: av_scenechange::analyze::inter::get_best_predictor::<_>
1620
1621
#[allow(clippy::too_many_arguments)]
1622
0
fn get_fullpel_mv_rd<T: Pixel>(
1623
0
    po: PlaneOffset,
1624
0
    org_region: &PlaneRegion<T>,
1625
0
    p_ref: &Plane<T>,
1626
0
    bit_depth: usize,
1627
0
    pmv: [MotionVector; 2],
1628
0
    lambda: u32,
1629
0
    use_satd: bool,
1630
0
    mvx_min: isize,
1631
0
    mvx_max: isize,
1632
0
    mvy_min: isize,
1633
0
    mvy_max: isize,
1634
0
    w: usize,
1635
0
    h: usize,
1636
0
    cand_mv: MotionVector,
1637
0
    cpu_feature_level: CpuFeatureLevel,
1638
0
) -> MVCandidateRD {
1639
0
    if (cand_mv.col as isize) < mvx_min
1640
0
        || (cand_mv.col as isize) > mvx_max
1641
0
        || (cand_mv.row as isize) < mvy_min
1642
0
        || (cand_mv.row as isize) > mvy_max
1643
    {
1644
0
        return MVCandidateRD::empty();
1645
0
    }
1646
1647
    // Convert the motion vector into an full pixel offset.
1648
0
    let plane_ref = p_ref.region(Area::StartingAt {
1649
0
        x: po.x + (cand_mv.col / 8) as isize,
1650
0
        y: po.y + (cand_mv.row / 8) as isize,
1651
0
    });
1652
0
    compute_mv_rd(
1653
0
        pmv,
1654
0
        lambda,
1655
0
        use_satd,
1656
0
        bit_depth,
1657
0
        w,
1658
0
        h,
1659
0
        cand_mv,
1660
0
        org_region,
1661
0
        &plane_ref,
1662
0
        cpu_feature_level,
1663
    )
1664
0
}
Unexecuted instantiation: av_scenechange::analyze::inter::get_fullpel_mv_rd::<u16>
Unexecuted instantiation: av_scenechange::analyze::inter::get_fullpel_mv_rd::<u8>
Unexecuted instantiation: av_scenechange::analyze::inter::get_fullpel_mv_rd::<_>
1665
1666
/// Perform hexagon search and refine afterwards.
1667
///
1668
/// In the hexagon search stage, candidate motion vectors are examined for
1669
/// improvement to the current search location. The search location is moved to
1670
/// the best candidate (if any). This is repeated until the search location
1671
/// stops moving.
1672
///
1673
/// Refinement uses a square pattern that fits between the hexagon candidates.
1674
///
1675
/// The hexagon pattern is defined by [`HEXAGON_PATTERN`] and the refinement
1676
/// is defined by [`SQUARE_REFINE_PATTERN`].
1677
///
1678
/// `current` provides the initial search location and serves as
1679
/// the output for the final search results.
1680
#[allow(clippy::too_many_arguments)]
1681
0
fn hexagon_search<T: Pixel>(
1682
0
    po: PlaneOffset,
1683
0
    org_region: &PlaneRegion<T>,
1684
0
    p_ref: &Plane<T>,
1685
0
    current: &mut MotionSearchResult,
1686
0
    bit_depth: usize,
1687
0
    pmv: [MotionVector; 2],
1688
0
    lambda: u32,
1689
0
    mvx_min: isize,
1690
0
    mvx_max: isize,
1691
0
    mvy_min: isize,
1692
0
    mvy_max: isize,
1693
0
    w: usize,
1694
0
    h: usize,
1695
0
    cpu_feature_level: CpuFeatureLevel,
1696
0
) {
1697
    // The first iteration of hexagon search is implemented separate from
1698
    // subsequent iterations, which overlap with previous iterations.
1699
1700
    // Holds what candidate is used (if any). This is used to determine which
1701
    // candidates have already been tested in a previous iteration and can be
1702
    // skipped.
1703
0
    let mut best_cand_idx: usize = 0;
1704
0
    let mut best_cand: MotionSearchResult = MotionSearchResult::empty();
1705
1706
    // First iteration of hexagon search. There are six candidates to consider.
1707
0
    for (i, &pattern_mv) in HEXAGON_PATTERN.iter().enumerate() {
1708
0
        let cand_mv = current.mv + pattern_mv;
1709
0
        let rd = get_fullpel_mv_rd(
1710
0
            po,
1711
0
            org_region,
1712
0
            p_ref,
1713
0
            bit_depth,
1714
0
            pmv,
1715
0
            lambda,
1716
            false,
1717
0
            mvx_min,
1718
0
            mvx_max,
1719
0
            mvy_min,
1720
0
            mvy_max,
1721
0
            w,
1722
0
            h,
1723
0
            cand_mv,
1724
0
            cpu_feature_level,
1725
        );
1726
1727
0
        if rd.cost < best_cand.rd.cost {
1728
0
            best_cand_idx = i;
1729
0
            best_cand.mv = cand_mv;
1730
0
            best_cand.rd = rd;
1731
0
        }
1732
    }
1733
1734
    // Run additional iterations of hexagon search until the search location
1735
    // doesn't update.
1736
0
    while best_cand.rd.cost < current.rd.cost {
1737
        // Update the search location.
1738
0
        *current = best_cand;
1739
0
        best_cand = MotionSearchResult::empty();
1740
1741
        // Save the index/direction taken in the previous iteration to the current
1742
        // search location.
1743
0
        let center_cand_idx = best_cand_idx;
1744
1745
        // Look only at candidates that don't overlap with previous iterations. This
1746
        // corresponds with the three offsets (2D) with the closest direction to
1747
        // that traveled by the previous iteration. HEXAGON_PATTERN has clockwise
1748
        // order, so the last direction -1, +0, and +1 (mod 6) give the indices for
1749
        // these offsets.
1750
0
        for idx_offset_mod6 in 5..=7 {
1751
0
            let i = (center_cand_idx + idx_offset_mod6) % 6;
1752
0
            let cand_mv = current.mv + HEXAGON_PATTERN[i];
1753
1754
0
            let rd = get_fullpel_mv_rd(
1755
0
                po,
1756
0
                org_region,
1757
0
                p_ref,
1758
0
                bit_depth,
1759
0
                pmv,
1760
0
                lambda,
1761
                false,
1762
0
                mvx_min,
1763
0
                mvx_max,
1764
0
                mvy_min,
1765
0
                mvy_max,
1766
0
                w,
1767
0
                h,
1768
0
                cand_mv,
1769
0
                cpu_feature_level,
1770
            );
1771
1772
0
            if rd.cost < best_cand.rd.cost {
1773
0
                best_cand_idx = i;
1774
0
                best_cand.mv = cand_mv;
1775
0
                best_cand.rd = rd;
1776
0
            }
1777
        }
1778
    }
1779
1780
    // Refine the motion after completing hexagon search.
1781
0
    let mut best_cand: MotionSearchResult = MotionSearchResult::empty();
1782
0
    for &offset in &SQUARE_REFINE_PATTERN {
1783
0
        let cand_mv = current.mv + offset;
1784
0
        let rd = get_fullpel_mv_rd(
1785
0
            po,
1786
0
            org_region,
1787
0
            p_ref,
1788
0
            bit_depth,
1789
0
            pmv,
1790
0
            lambda,
1791
            false,
1792
0
            mvx_min,
1793
0
            mvx_max,
1794
0
            mvy_min,
1795
0
            mvy_max,
1796
0
            w,
1797
0
            h,
1798
0
            cand_mv,
1799
0
            cpu_feature_level,
1800
        );
1801
1802
0
        if rd.cost < best_cand.rd.cost {
1803
0
            best_cand.mv = cand_mv;
1804
0
            best_cand.rd = rd;
1805
0
        }
1806
    }
1807
0
    if best_cand.rd.cost < current.rd.cost {
1808
0
        *current = best_cand;
1809
0
    }
1810
1811
0
    assert!(!current.is_empty());
1812
0
}
Unexecuted instantiation: av_scenechange::analyze::inter::hexagon_search::<u16>
Unexecuted instantiation: av_scenechange::analyze::inter::hexagon_search::<u8>
Unexecuted instantiation: av_scenechange::analyze::inter::hexagon_search::<_>
1813
1814
/// Run a full pixel diamond search. The search is run on multiple step sizes.
1815
///
1816
/// For each step size, candidate motion vectors are examined for improvement
1817
/// to the current search location. The search location is moved to the best
1818
/// candidate (if any). This is repeated until the search location stops moving.
1819
#[allow(clippy::too_many_arguments)]
1820
0
fn fullpel_diamond_search<T: Pixel>(
1821
0
    po: PlaneOffset,
1822
0
    org_region: &PlaneRegion<T>,
1823
0
    p_ref: &Plane<T>,
1824
0
    current: &mut MotionSearchResult,
1825
0
    bit_depth: usize,
1826
0
    pmv: [MotionVector; 2],
1827
0
    lambda: u32,
1828
0
    mvx_min: isize,
1829
0
    mvx_max: isize,
1830
0
    mvy_min: isize,
1831
0
    mvy_max: isize,
1832
0
    w: usize,
1833
0
    h: usize,
1834
0
    cpu_feature_level: CpuFeatureLevel,
1835
0
) {
1836
    // Define the initial and the final scale (log2) of the diamond.
1837
0
    let (mut diamond_radius_log2, diamond_radius_end_log2) = (1u8, 0u8);
1838
1839
    loop {
1840
        // Find the best candidate from the diamond pattern.
1841
0
        let mut best_cand: MotionSearchResult = MotionSearchResult::empty();
1842
0
        for &offset in &DIAMOND_R1_PATTERN {
1843
0
            let cand_mv = current.mv + (offset << diamond_radius_log2);
1844
0
            let rd = get_fullpel_mv_rd(
1845
0
                po,
1846
0
                org_region,
1847
0
                p_ref,
1848
0
                bit_depth,
1849
0
                pmv,
1850
0
                lambda,
1851
                false,
1852
0
                mvx_min,
1853
0
                mvx_max,
1854
0
                mvy_min,
1855
0
                mvy_max,
1856
0
                w,
1857
0
                h,
1858
0
                cand_mv,
1859
0
                cpu_feature_level,
1860
            );
1861
1862
0
            if rd.cost < best_cand.rd.cost {
1863
0
                best_cand.mv = cand_mv;
1864
0
                best_cand.rd = rd;
1865
0
            }
1866
        }
1867
1868
        // Continue the search at this scale until the can't find a better candidate
1869
        // to use.
1870
0
        if current.rd.cost <= best_cand.rd.cost {
1871
0
            if diamond_radius_log2 == diamond_radius_end_log2 {
1872
0
                break;
1873
0
            } else {
1874
0
                diamond_radius_log2 -= 1;
1875
0
            }
1876
0
        } else {
1877
0
            *current = best_cand;
1878
0
        }
1879
    }
1880
1881
0
    assert!(!current.is_empty());
1882
0
}
Unexecuted instantiation: av_scenechange::analyze::inter::fullpel_diamond_search::<u16>
Unexecuted instantiation: av_scenechange::analyze::inter::fullpel_diamond_search::<u8>
Unexecuted instantiation: av_scenechange::analyze::inter::fullpel_diamond_search::<_>