Coverage Report

Created: 2025-12-20 06:48

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/rust/registry/src/index.crates.io-1949cf8c6b5b557f/rav1e-0.8.1/src/me.rs
Line
Count
Source
1
// Copyright (c) 2017-2022, The rav1e contributors. All rights reserved
2
//
3
// This source code is subject to the terms of the BSD 2 Clause License and
4
// the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
5
// was not distributed with this source code in the LICENSE file, you can
6
// obtain it at www.aomedia.org/license/software. If the Alliance for Open
7
// Media Patent License 1.0 was not distributed with this source code in the
8
// PATENTS file, you can obtain it at www.aomedia.org/license/patent.
9
10
use crate::api::InterConfig;
11
use crate::context::{
12
  BlockOffset, PlaneBlockOffset, SuperBlockOffset, TileBlockOffset,
13
  TileSuperBlockOffset, MAX_SB_SIZE_LOG2, MIB_SIZE_LOG2, MI_SIZE,
14
  MI_SIZE_LOG2, SB_SIZE,
15
};
16
use crate::dist::*;
17
use crate::frame::*;
18
use crate::mc::MotionVector;
19
use crate::partition::*;
20
use crate::predict::PredictionMode;
21
use crate::tiling::*;
22
use crate::util::ILog;
23
use crate::util::{clamp, Pixel};
24
use crate::FrameInvariants;
25
26
use arrayvec::*;
27
use std::ops::{Index, IndexMut};
28
use std::sync::{Arc, RwLock, RwLockReadGuard, RwLockWriteGuard};
29
30
#[derive(Debug, Copy, Clone, Default)]
31
pub struct MEStats {
32
  pub mv: MotionVector,
33
  /// sad value, on the scale of a 128x128 block
34
  pub normalized_sad: u32,
35
}
36
37
#[derive(Debug, Clone)]
38
pub struct FrameMEStats {
39
  stats: Box<[MEStats]>,
40
  pub cols: usize,
41
  pub rows: usize,
42
}
43
44
/// cbindgen:ignore
45
pub type RefMEStats = Arc<RwLock<[FrameMEStats; REF_FRAMES]>>;
46
/// cbindgen:ignore
47
pub type ReadGuardMEStats<'a> =
48
  RwLockReadGuard<'a, [FrameMEStats; REF_FRAMES]>;
49
/// cbindgen:ignore
50
pub type WriteGuardMEStats<'a> =
51
  RwLockWriteGuard<'a, [FrameMEStats; REF_FRAMES]>;
52
53
impl FrameMEStats {
54
  #[inline]
55
0
  pub fn rows_iter(&self) -> std::slice::ChunksExact<'_, MEStats> {
56
0
    self.stats.chunks_exact(self.cols)
57
0
  }
Unexecuted instantiation: <rav1e::me::FrameMEStats>::rows_iter
Unexecuted instantiation: <rav1e::me::FrameMEStats>::rows_iter
58
59
0
  pub fn new(cols: usize, rows: usize) -> Self {
60
0
    Self {
61
0
      // dynamic allocation: once per frame
62
0
      stats: vec![MEStats::default(); cols * rows].into_boxed_slice(),
63
0
      cols,
64
0
      rows,
65
0
    }
66
0
  }
67
0
  pub fn new_arc_array(cols: usize, rows: usize) -> RefMEStats {
68
0
    Arc::new(RwLock::new([
69
0
      FrameMEStats::new(cols, rows),
70
0
      FrameMEStats::new(cols, rows),
71
0
      FrameMEStats::new(cols, rows),
72
0
      FrameMEStats::new(cols, rows),
73
0
      FrameMEStats::new(cols, rows),
74
0
      FrameMEStats::new(cols, rows),
75
0
      FrameMEStats::new(cols, rows),
76
0
      FrameMEStats::new(cols, rows),
77
0
    ]))
78
0
  }
79
}
80
81
impl Index<usize> for FrameMEStats {
82
  type Output = [MEStats];
83
  #[inline]
84
0
  fn index(&self, index: usize) -> &Self::Output {
85
0
    &self.stats[index * self.cols..(index + 1) * self.cols]
86
0
  }
87
}
88
89
impl IndexMut<usize> for FrameMEStats {
90
  #[inline]
91
0
  fn index_mut(&mut self, index: usize) -> &mut Self::Output {
92
0
    &mut self.stats[index * self.cols..(index + 1) * self.cols]
93
0
  }
Unexecuted instantiation: <rav1e::me::FrameMEStats as core::ops::index::IndexMut<usize>>::index_mut
Unexecuted instantiation: <rav1e::me::FrameMEStats as core::ops::index::IndexMut<usize>>::index_mut
94
}
95
96
/// Result of motion search.
97
#[derive(Debug, Copy, Clone)]
98
pub struct MotionSearchResult {
99
  /// Motion vector chosen by the motion search.
100
  pub mv: MotionVector,
101
  /// Rate distortion data associated with `mv`.
102
  pub rd: MVCandidateRD,
103
}
104
105
impl MotionSearchResult {
106
  /// Creates an 'empty' value.
107
  ///
108
  /// To be considered empty, cost is set higher than any naturally occurring
109
  /// cost value. The idea is that comparing to any valid rd output, the search
110
  /// result will always be replaced.
111
  #[inline(always)]
112
0
  pub fn empty() -> MotionSearchResult {
113
0
    MotionSearchResult {
114
0
      mv: MotionVector::default(),
115
0
      rd: MVCandidateRD::empty(),
116
0
    }
117
0
  }
118
119
  /// Check if the value should be considered to be empty.
120
  #[inline(always)]
121
0
  const fn is_empty(&self) -> bool {
122
0
    self.rd.cost == u64::MAX
123
0
  }
124
}
125
126
/// Holds data from computing rate distortion of a motion vector.
127
#[derive(Debug, Copy, Clone)]
128
pub struct MVCandidateRD {
129
  /// Rate distortion cost of the motion vector.
130
  pub cost: u64,
131
  /// Distortion metric value for the motion vector.
132
  pub sad: u32,
133
}
134
135
impl MVCandidateRD {
136
  /// Creates an 'empty' value.
137
  ///
138
  /// To be considered empty, cost is set higher than any naturally occurring
139
  /// cost value. The idea is that comparing to any valid rd output, the search
140
  /// result will always be replaced.
141
  #[inline(always)]
142
0
  const fn empty() -> MVCandidateRD {
143
0
    MVCandidateRD { sad: u32::MAX, cost: u64::MAX }
144
0
  }
145
}
146
147
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
148
pub enum MVSamplingMode {
149
  INIT,
150
  CORNER { right: bool, bottom: bool },
151
}
152
153
0
pub fn estimate_tile_motion<T: Pixel>(
154
0
  fi: &FrameInvariants<T>, ts: &mut TileStateMut<'_, T>,
155
0
  inter_cfg: &InterConfig,
156
0
) {
157
0
  let init_size = MIB_SIZE_LOG2;
158
159
0
  let mut prev_ssdec: Option<u8> = None;
160
0
  for mv_size_in_b_log2 in (2..=init_size).rev() {
161
0
    let init = mv_size_in_b_log2 == init_size;
162
163
    // Choose subsampling. Pass one is quarter res and pass two is at half res.
164
0
    let ssdec = match init_size - mv_size_in_b_log2 {
165
0
      0 => 2,
166
0
      1 => 1,
167
0
      _ => 0,
168
    };
169
170
0
    let new_subsampling =
171
0
      if let Some(prev) = prev_ssdec { prev != ssdec } else { false };
172
0
    prev_ssdec = Some(ssdec);
173
174
    // 0.5 and 0.125 are a fudge factors
175
0
    let lambda = (fi.me_lambda * 256.0 / (1 << (2 * ssdec)) as f64
176
0
      * if ssdec == 0 { 0.5 } else { 0.125 }) as u32;
177
178
0
    for sby in 0..ts.sb_height {
179
0
      for sbx in 0..ts.sb_width {
180
0
        let mut tested_frames_flags = 0;
181
0
        for &ref_frame in inter_cfg.allowed_ref_frames() {
182
0
          let frame_flag = 1 << fi.ref_frames[ref_frame.to_index()];
183
0
          if tested_frames_flags & frame_flag == frame_flag {
184
0
            continue;
185
0
          }
186
0
          tested_frames_flags |= frame_flag;
187
188
0
          let tile_bo =
189
0
            TileSuperBlockOffset(SuperBlockOffset { x: sbx, y: sby })
190
0
              .block_offset(0, 0);
191
192
0
          if new_subsampling {
193
0
            refine_subsampled_sb_motion(
194
0
              fi,
195
0
              ts,
196
0
              ref_frame,
197
0
              mv_size_in_b_log2 + 1,
198
0
              tile_bo,
199
0
              ssdec,
200
0
              lambda,
201
0
            );
202
0
          }
203
204
0
          estimate_sb_motion(
205
0
            fi,
206
0
            ts,
207
0
            ref_frame,
208
0
            mv_size_in_b_log2,
209
0
            tile_bo,
210
0
            init,
211
0
            ssdec,
212
0
            lambda,
213
          );
214
        }
215
      }
216
    }
217
  }
218
0
}
Unexecuted instantiation: rav1e::me::estimate_tile_motion::<u16>
Unexecuted instantiation: rav1e::me::estimate_tile_motion::<u8>
219
220
0
fn estimate_sb_motion<T: Pixel>(
221
0
  fi: &FrameInvariants<T>, ts: &mut TileStateMut<'_, T>, ref_frame: RefType,
222
0
  mv_size_in_b_log2: usize, tile_bo: TileBlockOffset, init: bool, ssdec: u8,
223
0
  lambda: u32,
224
0
) {
225
0
  let pix_offset = tile_bo.to_luma_plane_offset();
226
0
  let sb_h: usize = SB_SIZE.min(ts.height - pix_offset.y as usize);
227
0
  let sb_w: usize = SB_SIZE.min(ts.width - pix_offset.x as usize);
228
229
0
  let mv_size = MI_SIZE << mv_size_in_b_log2;
230
231
  // Process in blocks, cropping at edges.
232
0
  for y in (0..sb_h).step_by(mv_size) {
233
0
    for x in (0..sb_w).step_by(mv_size) {
234
0
      let corner: MVSamplingMode = if init {
235
0
        MVSamplingMode::INIT
236
      } else {
237
        // Processing the block a size up produces data that can be used by
238
        // the right and bottom corners.
239
0
        MVSamplingMode::CORNER {
240
0
          right: x & mv_size == mv_size,
241
0
          bottom: y & mv_size == mv_size,
242
0
        }
243
      };
244
245
0
      let sub_bo = tile_bo
246
0
        .with_offset(x as isize >> MI_SIZE_LOG2, y as isize >> MI_SIZE_LOG2);
247
248
      // Clamp to frame edge, rounding up in the case of subsampling.
249
      // The rounding makes some assumptions about how subsampling is done.
250
0
      let w = mv_size.min(sb_w - x + (1 << ssdec) - 1) >> ssdec;
251
0
      let h = mv_size.min(sb_h - y + (1 << ssdec) - 1) >> ssdec;
252
253
      // Run motion estimation.
254
      // Note that the initial search (init) instructs the called function to
255
      // perform a more extensive search.
256
0
      if let Some(results) = estimate_motion(
257
0
        fi,
258
0
        ts,
259
0
        w,
260
0
        h,
261
0
        sub_bo,
262
0
        ref_frame,
263
0
        None,
264
0
        corner,
265
0
        init,
266
0
        ssdec,
267
0
        Some(lambda),
268
0
      ) {
269
0
        // normalize sad to 128x128 block
270
0
        let sad = (((results.rd.sad as u64) << (MAX_SB_SIZE_LOG2 * 2))
271
0
          / (w * h) as u64) as u32;
272
0
        save_me_stats(
273
0
          ts,
274
0
          mv_size_in_b_log2,
275
0
          sub_bo,
276
0
          ref_frame,
277
0
          MEStats { mv: results.mv, normalized_sad: sad },
278
0
        );
279
0
      }
280
    }
281
  }
282
0
}
Unexecuted instantiation: rav1e::me::estimate_sb_motion::<u16>
Unexecuted instantiation: rav1e::me::estimate_sb_motion::<u8>
283
284
0
fn refine_subsampled_sb_motion<T: Pixel>(
285
0
  fi: &FrameInvariants<T>, ts: &mut TileStateMut<'_, T>, ref_frame: RefType,
286
0
  mv_size_in_b_log2: usize, tile_bo: TileBlockOffset, ssdec: u8, lambda: u32,
287
0
) {
288
0
  let pix_offset = tile_bo.to_luma_plane_offset();
289
0
  let sb_h: usize = SB_SIZE.min(ts.height - pix_offset.y as usize);
290
0
  let sb_w: usize = SB_SIZE.min(ts.width - pix_offset.x as usize);
291
292
0
  let mv_size = MI_SIZE << mv_size_in_b_log2;
293
294
  // Process in blocks, cropping at edges.
295
0
  for y in (0..sb_h).step_by(mv_size) {
296
0
    for x in (0..sb_w).step_by(mv_size) {
297
0
      let sub_bo = tile_bo
298
0
        .with_offset(x as isize >> MI_SIZE_LOG2, y as isize >> MI_SIZE_LOG2);
299
300
      // Clamp to frame edge, rounding up in the case of subsampling.
301
      // The rounding makes some assumptions about how subsampling is done.
302
0
      let w = mv_size.min(sb_w - x + (1 << ssdec) - 1) >> ssdec;
303
0
      let h = mv_size.min(sb_h - y + (1 << ssdec) - 1) >> ssdec;
304
305
      // Refine the existing motion estimate
306
0
      if let Some(results) = refine_subsampled_motion_estimate(
307
0
        fi, ts, w, h, sub_bo, ref_frame, ssdec, lambda,
308
0
      ) {
309
0
        // normalize sad to 128x128 block
310
0
        let sad = (((results.rd.sad as u64) << (MAX_SB_SIZE_LOG2 * 2))
311
0
          / (w * h) as u64) as u32;
312
0
        save_me_stats(
313
0
          ts,
314
0
          mv_size_in_b_log2,
315
0
          sub_bo,
316
0
          ref_frame,
317
0
          MEStats { mv: results.mv, normalized_sad: sad },
318
0
        );
319
0
      }
320
    }
321
  }
322
0
}
Unexecuted instantiation: rav1e::me::refine_subsampled_sb_motion::<u16>
Unexecuted instantiation: rav1e::me::refine_subsampled_sb_motion::<u8>
323
324
0
fn save_me_stats<T: Pixel>(
325
0
  ts: &mut TileStateMut<'_, T>, mv_size_in_b_log2: usize,
326
0
  tile_bo: TileBlockOffset, ref_frame: RefType, stats: MEStats,
327
0
) {
328
0
  let size_in_b = 1 << mv_size_in_b_log2;
329
0
  let tile_me_stats = &mut ts.me_stats[ref_frame.to_index()];
330
0
  let tile_bo_x_end = (tile_bo.0.x + size_in_b).min(ts.mi_width);
331
0
  let tile_bo_y_end = (tile_bo.0.y + size_in_b).min(ts.mi_height);
332
0
  for mi_y in tile_bo.0.y..tile_bo_y_end {
333
0
    for a in tile_me_stats[mi_y][tile_bo.0.x..tile_bo_x_end].iter_mut() {
334
0
      *a = stats;
335
0
    }
336
  }
337
0
}
Unexecuted instantiation: rav1e::me::save_me_stats::<u16>
Unexecuted instantiation: rav1e::me::save_me_stats::<u8>
338
339
0
fn get_mv_range(
340
0
  w_in_b: usize, h_in_b: usize, bo: PlaneBlockOffset, blk_w: usize,
341
0
  blk_h: usize,
342
0
) -> (isize, isize, isize, isize) {
343
0
  let border_w = 128 + blk_w as isize * 8;
344
0
  let border_h = 128 + blk_h as isize * 8;
345
0
  let mvx_min = -(bo.0.x as isize) * (8 * MI_SIZE) as isize - border_w;
346
0
  let mvx_max = ((w_in_b - bo.0.x) as isize - (blk_w / MI_SIZE) as isize)
347
0
    * (8 * MI_SIZE) as isize
348
0
    + border_w;
349
0
  let mvy_min = -(bo.0.y as isize) * (8 * MI_SIZE) as isize - border_h;
350
0
  let mvy_max = ((h_in_b - bo.0.y) as isize - (blk_h / MI_SIZE) as isize)
351
0
    * (8 * MI_SIZE) as isize
352
0
    + border_h;
353
354
  // <https://aomediacodec.github.io/av1-spec/#assign-mv-semantics>
355
  use crate::context::{MV_LOW, MV_UPP};
356
0
  (
357
0
    mvx_min.max(MV_LOW as isize + 1),
358
0
    mvx_max.min(MV_UPP as isize - 1),
359
0
    mvy_min.max(MV_LOW as isize + 1),
360
0
    mvy_max.min(MV_UPP as isize - 1),
361
0
  )
362
0
}
363
364
struct MotionEstimationSubsets {
365
  min_sad: u32,
366
  median: Option<MotionVector>,
367
  subset_b: ArrayVec<MotionVector, 5>,
368
  subset_c: ArrayVec<MotionVector, 5>,
369
}
370
371
impl MotionEstimationSubsets {
372
0
  fn all_mvs(&self) -> ArrayVec<MotionVector, 11> {
373
0
    let mut all = ArrayVec::new();
374
0
    if let Some(median) = self.median {
375
0
      all.push(median);
376
0
    }
377
378
0
    all.extend(self.subset_b.iter().copied());
379
0
    all.extend(self.subset_c.iter().copied());
380
381
0
    all
382
0
  }
383
}
384
385
#[profiling::function]
386
fn get_subset_predictors(
387
  tile_bo: TileBlockOffset, tile_me_stats: &TileMEStats<'_>,
388
  frame_ref_opt: Option<ReadGuardMEStats<'_>>, ref_frame_id: usize,
389
  pix_w: usize, pix_h: usize, mvx_min: isize, mvx_max: isize, mvy_min: isize,
390
  mvy_max: isize, corner: MVSamplingMode, ssdec: u8,
391
) -> MotionEstimationSubsets {
392
  let mut min_sad: u32 = u32::MAX;
393
  let mut subset_b = ArrayVec::<MotionVector, 5>::new();
394
  let mut subset_c = ArrayVec::<MotionVector, 5>::new();
395
396
  // rounded up width in blocks
397
  let w = ((pix_w << ssdec) + MI_SIZE - 1) >> MI_SIZE_LOG2;
398
  let h = ((pix_h << ssdec) + MI_SIZE - 1) >> MI_SIZE_LOG2;
399
400
  // Get predictors from the same frame.
401
402
  let clipped_half_w = (w >> 1).min(tile_me_stats.cols() - 1 - tile_bo.0.x);
403
  let clipped_half_h = (h >> 1).min(tile_me_stats.rows() - 1 - tile_bo.0.y);
404
405
0
  let mut process_cand = |stats: MEStats| -> MotionVector {
406
0
    min_sad = min_sad.min(stats.normalized_sad);
407
0
    let mv = stats.mv.quantize_to_fullpel();
408
0
    MotionVector {
409
0
      col: clamp(mv.col as isize, mvx_min, mvx_max) as i16,
410
0
      row: clamp(mv.row as isize, mvy_min, mvy_max) as i16,
411
0
    }
412
0
  };
413
414
  // Sample the middle of all block edges bordering this one.
415
  // Note: If motion vectors haven't been precomputed to a given blocksize, then
416
  // the right and bottom edges will be duplicates of the center predictor when
417
  // processing in raster order.
418
419
  // left
420
  if tile_bo.0.x > 0 {
421
    subset_b.push(process_cand(
422
      tile_me_stats[tile_bo.0.y + clipped_half_h][tile_bo.0.x - 1],
423
    ));
424
  }
425
  // top
426
  if tile_bo.0.y > 0 {
427
    subset_b.push(process_cand(
428
      tile_me_stats[tile_bo.0.y - 1][tile_bo.0.x + clipped_half_w],
429
    ));
430
  }
431
432
  // Sampling far right and far bottom edges was tested, but had worse results
433
  // without an extensive threshold test (with threshold being applied after
434
  // checking median and the best of each subset).
435
436
  // right
437
  if let MVSamplingMode::CORNER { right: true, bottom: _ } = corner {
438
    if tile_bo.0.x + w < tile_me_stats.cols() {
439
      subset_b.push(process_cand(
440
        tile_me_stats[tile_bo.0.y + clipped_half_h][tile_bo.0.x + w],
441
      ));
442
    }
443
  }
444
  // bottom
445
  if let MVSamplingMode::CORNER { right: _, bottom: true } = corner {
446
    if tile_bo.0.y + h < tile_me_stats.rows() {
447
      subset_b.push(process_cand(
448
        tile_me_stats[tile_bo.0.y + h][tile_bo.0.x + clipped_half_w],
449
      ));
450
    }
451
  }
452
453
  let median = if corner != MVSamplingMode::INIT {
454
    // Sample the center of the current block.
455
    Some(process_cand(
456
      tile_me_stats[tile_bo.0.y + clipped_half_h]
457
        [tile_bo.0.x + clipped_half_w],
458
    ))
459
  } else if subset_b.len() != 3 {
460
    None
461
  } else {
462
    let mut rows: ArrayVec<i16, 3> = subset_b.iter().map(|&a| a.row).collect();
463
    let mut cols: ArrayVec<i16, 3> = subset_b.iter().map(|&a| a.col).collect();
464
    rows.as_mut_slice().sort_unstable();
465
    cols.as_mut_slice().sort_unstable();
466
    Some(MotionVector { row: rows[1], col: cols[1] })
467
  };
468
469
  // Zero motion vector, don't use add_cand since it skips zero vectors.
470
  subset_b.push(MotionVector::default());
471
472
  // EPZS subset C predictors.
473
  // Sample the middle of bordering side of the left, right, top and bottom
474
  // blocks of the previous frame.
475
  // Sample the middle of this block in the previous frame.
476
477
  if let Some(frame_me_stats) = frame_ref_opt {
478
    let prev_frame = &frame_me_stats[ref_frame_id];
479
480
    let frame_bo = PlaneBlockOffset(BlockOffset {
481
      x: tile_me_stats.x() + tile_bo.0.x,
482
      y: tile_me_stats.y() + tile_bo.0.y,
483
    });
484
    let clipped_half_w = (w >> 1).min(prev_frame.cols - 1 - frame_bo.0.x);
485
    let clipped_half_h = (h >> 1).min(prev_frame.rows - 1 - frame_bo.0.y);
486
487
    // left
488
    if frame_bo.0.x > 0 {
489
      subset_c.push(process_cand(
490
        prev_frame[frame_bo.0.y + clipped_half_h][frame_bo.0.x - 1],
491
      ));
492
    }
493
    // top
494
    if frame_bo.0.y > 0 {
495
      subset_c.push(process_cand(
496
        prev_frame[frame_bo.0.y - 1][frame_bo.0.x + clipped_half_w],
497
      ));
498
    }
499
    // right
500
    if frame_bo.0.x + w < prev_frame.cols {
501
      subset_c.push(process_cand(
502
        prev_frame[frame_bo.0.y + clipped_half_h][frame_bo.0.x + w],
503
      ));
504
    }
505
    // bottom
506
    if frame_bo.0.y + h < prev_frame.rows {
507
      subset_c.push(process_cand(
508
        prev_frame[frame_bo.0.y + h][frame_bo.0.x + clipped_half_w],
509
      ));
510
    }
511
512
    subset_c.push(process_cand(
513
      prev_frame[frame_bo.0.y + clipped_half_h][frame_bo.0.x + clipped_half_w],
514
    ));
515
  }
516
517
  // Undo normalization to 128x128 block size
518
  let min_sad = ((min_sad as u64 * (pix_w * pix_h) as u64)
519
    >> (MAX_SB_SIZE_LOG2 * 2)) as u32;
520
521
  let dec_mv = |mv: MotionVector| MotionVector {
522
0
    col: mv.col >> ssdec,
523
0
    row: mv.row >> ssdec,
524
0
  };
525
  let median = median.map(dec_mv);
526
  for mv in subset_b.iter_mut() {
527
    *mv = dec_mv(*mv);
528
  }
529
  for mv in subset_c.iter_mut() {
530
    *mv = dec_mv(*mv);
531
  }
532
533
  MotionEstimationSubsets { min_sad, median, subset_b, subset_c }
534
}
535
536
0
pub fn estimate_motion<T: Pixel>(
537
0
  fi: &FrameInvariants<T>, ts: &TileStateMut<'_, T>, w: usize, h: usize,
538
0
  tile_bo: TileBlockOffset, ref_frame: RefType,
539
0
  pmv: Option<[MotionVector; 2]>, corner: MVSamplingMode,
540
0
  extensive_search: bool, ssdec: u8, lambda: Option<u32>,
541
0
) -> Option<MotionSearchResult> {
542
0
  if let Some(ref rec) =
543
0
    fi.rec_buffer.frames[fi.ref_frames[ref_frame.to_index()] as usize]
544
  {
545
0
    let frame_bo = ts.to_frame_block_offset(tile_bo);
546
0
    let (mvx_min, mvx_max, mvy_min, mvy_max) =
547
0
      get_mv_range(fi.w_in_b, fi.h_in_b, frame_bo, w << ssdec, h << ssdec);
548
549
0
    let lambda = lambda.unwrap_or({
550
      // 0.5 is a fudge factor
551
0
      (fi.me_lambda * 256.0 * 0.5) as u32
552
    });
553
554
0
    let global_mv = [MotionVector { row: 0, col: 0 }; 2];
555
556
0
    let po = frame_bo.to_luma_plane_offset();
557
0
    let (mvx_min, mvx_max, mvy_min, mvy_max) =
558
0
      (mvx_min >> ssdec, mvx_max >> ssdec, mvy_min >> ssdec, mvy_max >> ssdec);
559
0
    let po = PlaneOffset { x: po.x >> ssdec, y: po.y >> ssdec };
560
0
    let p_ref = match ssdec {
561
0
      0 => &rec.frame.planes[0],
562
0
      1 => &rec.input_hres,
563
0
      2 => &rec.input_qres,
564
0
      _ => unimplemented!(),
565
    };
566
567
0
    let org_region = &match ssdec {
568
0
      0 => ts.input_tile.planes[0]
569
0
        .subregion(Area::BlockStartingAt { bo: tile_bo.0 }),
570
0
      1 => ts.input_hres.region(Area::StartingAt { x: po.x, y: po.y }),
571
0
      2 => ts.input_qres.region(Area::StartingAt { x: po.x, y: po.y }),
572
0
      _ => unimplemented!(),
573
    };
574
575
0
    let mut best: MotionSearchResult = full_pixel_me(
576
0
      fi,
577
0
      ts,
578
0
      org_region,
579
0
      p_ref,
580
0
      tile_bo,
581
0
      po,
582
0
      lambda,
583
0
      pmv.unwrap_or(global_mv),
584
0
      w,
585
0
      h,
586
0
      mvx_min,
587
0
      mvx_max,
588
0
      mvy_min,
589
0
      mvy_max,
590
0
      ref_frame,
591
0
      corner,
592
0
      extensive_search,
593
0
      ssdec,
594
    );
595
596
0
    if let Some(pmv) = pmv {
597
0
      let use_satd: bool = fi.config.speed_settings.motion.use_satd_subpel;
598
0
      if use_satd {
599
0
        best.rd = get_fullpel_mv_rd(
600
0
          fi,
601
0
          po,
602
0
          org_region,
603
0
          p_ref,
604
0
          fi.sequence.bit_depth,
605
0
          pmv,
606
0
          lambda,
607
0
          use_satd,
608
0
          mvx_min,
609
0
          mvx_max,
610
0
          mvy_min,
611
0
          mvy_max,
612
0
          w,
613
0
          h,
614
0
          best.mv,
615
0
        );
616
0
      }
617
618
0
      sub_pixel_me(
619
0
        fi, po, org_region, p_ref, lambda, pmv, mvx_min, mvx_max, mvy_min,
620
0
        mvy_max, w, h, use_satd, &mut best, ref_frame,
621
      );
622
0
    }
623
624
    // Scale motion vectors to full res size
625
0
    best.mv = best.mv << ssdec;
626
627
0
    Some(best)
628
  } else {
629
0
    None
630
  }
631
0
}
Unexecuted instantiation: rav1e::me::estimate_motion::<u16>
Unexecuted instantiation: rav1e::me::estimate_motion::<u8>
632
633
/// Refine motion estimation that was computed one level of subsampling up.
634
0
fn refine_subsampled_motion_estimate<T: Pixel>(
635
0
  fi: &FrameInvariants<T>, ts: &TileStateMut<'_, T>, w: usize, h: usize,
636
0
  tile_bo: TileBlockOffset, ref_frame: RefType, ssdec: u8, lambda: u32,
637
0
) -> Option<MotionSearchResult> {
638
0
  if let Some(ref rec) =
639
0
    fi.rec_buffer.frames[fi.ref_frames[ref_frame.to_index()] as usize]
640
  {
641
0
    let frame_bo = ts.to_frame_block_offset(tile_bo);
642
0
    let (mvx_min, mvx_max, mvy_min, mvy_max) =
643
0
      get_mv_range(fi.w_in_b, fi.h_in_b, frame_bo, w << ssdec, h << ssdec);
644
645
0
    let pmv = [MotionVector { row: 0, col: 0 }; 2];
646
647
0
    let po = frame_bo.to_luma_plane_offset();
648
0
    let (mvx_min, mvx_max, mvy_min, mvy_max) =
649
0
      (mvx_min >> ssdec, mvx_max >> ssdec, mvy_min >> ssdec, mvy_max >> ssdec);
650
0
    let po = PlaneOffset { x: po.x >> ssdec, y: po.y >> ssdec };
651
0
    let p_ref = match ssdec {
652
0
      0 => &rec.frame.planes[0],
653
0
      1 => &rec.input_hres,
654
0
      2 => &rec.input_qres,
655
0
      _ => unimplemented!(),
656
    };
657
658
0
    let org_region = &match ssdec {
659
0
      0 => ts.input_tile.planes[0]
660
0
        .subregion(Area::BlockStartingAt { bo: tile_bo.0 }),
661
0
      1 => ts.input_hres.region(Area::StartingAt { x: po.x, y: po.y }),
662
0
      2 => ts.input_qres.region(Area::StartingAt { x: po.x, y: po.y }),
663
0
      _ => unimplemented!(),
664
    };
665
666
0
    let mv =
667
0
      ts.me_stats[ref_frame.to_index()][tile_bo.0.y][tile_bo.0.x].mv >> ssdec;
668
669
    // Given a motion vector at 0 at higher subsampling:
670
    // |  -1   |   0   |   1   |
671
    // then the vectors at -1 to 2 should be tested at the current subsampling.
672
    //      |-------------|
673
    // | -2 -1 |  0  1 |  2  3 |
674
    // This corresponds to a 4x4 full search.
675
0
    let x_lo = po.x + (mv.col as isize / 8 - 1).max(mvx_min / 8);
676
0
    let x_hi = po.x + (mv.col as isize / 8 + 2).min(mvx_max / 8);
677
0
    let y_lo = po.y + (mv.row as isize / 8 - 1).max(mvy_min / 8);
678
0
    let y_hi = po.y + (mv.row as isize / 8 + 2).min(mvy_max / 8);
679
0
    let mut results = full_search(
680
0
      fi, x_lo, x_hi, y_lo, y_hi, w, h, org_region, p_ref, po, 1, lambda, pmv,
681
    );
682
683
    // Scale motion vectors to full res size
684
0
    results.mv = results.mv << ssdec;
685
686
0
    Some(results)
687
  } else {
688
0
    None
689
  }
690
0
}
Unexecuted instantiation: rav1e::me::refine_subsampled_motion_estimate::<u16>
Unexecuted instantiation: rav1e::me::refine_subsampled_motion_estimate::<u8>
691
692
#[profiling::function]
693
fn full_pixel_me<T: Pixel>(
694
  fi: &FrameInvariants<T>, ts: &TileStateMut<'_, T>,
695
  org_region: &PlaneRegion<T>, p_ref: &Plane<T>, tile_bo: TileBlockOffset,
696
  po: PlaneOffset, lambda: u32, pmv: [MotionVector; 2], w: usize, h: usize,
697
  mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize,
698
  ref_frame: RefType, corner: MVSamplingMode, extensive_search: bool,
699
  ssdec: u8,
700
) -> MotionSearchResult {
701
  let ref_frame_id = ref_frame.to_index();
702
  let tile_me_stats = &ts.me_stats[ref_frame_id].as_const();
703
  let frame_ref = fi.rec_buffer.frames[fi.ref_frames[0] as usize]
704
    .as_ref()
705
0
    .map(|frame_ref| frame_ref.frame_me_stats.read().expect("poisoned lock"));
Unexecuted instantiation: rav1e::me::full_pixel_me::<u16>::{closure#0}
Unexecuted instantiation: rav1e::me::full_pixel_me::<u8>::{closure#0}
706
  let subsets = get_subset_predictors(
707
    tile_bo,
708
    tile_me_stats,
709
    frame_ref,
710
    ref_frame_id,
711
    w,
712
    h,
713
    mvx_min,
714
    mvx_max,
715
    mvy_min,
716
    mvy_max,
717
    corner,
718
    ssdec,
719
  );
720
721
  let try_cands = |predictors: &[MotionVector],
722
0
                   best: &mut MotionSearchResult| {
723
0
    let mut results = get_best_predictor(
724
0
      fi,
725
0
      po,
726
0
      org_region,
727
0
      p_ref,
728
0
      predictors,
729
0
      fi.sequence.bit_depth,
730
0
      pmv,
731
0
      lambda,
732
0
      mvx_min,
733
0
      mvx_max,
734
0
      mvy_min,
735
0
      mvy_max,
736
0
      w,
737
0
      h,
738
    );
739
0
    fullpel_diamond_search(
740
0
      fi,
741
0
      po,
742
0
      org_region,
743
0
      p_ref,
744
0
      &mut results,
745
0
      fi.sequence.bit_depth,
746
0
      pmv,
747
0
      lambda,
748
0
      mvx_min,
749
0
      mvx_max,
750
0
      mvy_min,
751
0
      mvy_max,
752
0
      w,
753
0
      h,
754
    );
755
756
0
    if results.rd.cost < best.rd.cost {
757
0
      *best = results;
758
0
    }
759
0
  };
Unexecuted instantiation: rav1e::me::full_pixel_me::<u16>::{closure#1}
Unexecuted instantiation: rav1e::me::full_pixel_me::<u8>::{closure#1}
760
761
  let mut best: MotionSearchResult = MotionSearchResult::empty();
762
  if !extensive_search {
763
    try_cands(&subsets.all_mvs(), &mut best);
764
    best
765
  } else {
766
    // Perform a more thorough search before resorting to full search.
767
    // Search the median, the best mvs of neighboring blocks, and motion vectors
768
    // from the previous frame. Stop once a candidate with a sad less than a
769
    // threshold is found.
770
771
    let thresh = (subsets.min_sad as f32 * 1.2) as u32
772
      + (((w * h) as u32) << (fi.sequence.bit_depth - 8));
773
774
    if let Some(median) = subsets.median {
775
      try_cands(&[median], &mut best);
776
777
      if best.rd.sad < thresh {
778
        return best;
779
      }
780
    }
781
782
    try_cands(&subsets.subset_b, &mut best);
783
784
    if best.rd.sad < thresh {
785
      return best;
786
    }
787
788
    try_cands(&subsets.subset_c, &mut best);
789
790
    if best.rd.sad < thresh {
791
      return best;
792
    }
793
794
    // Preform UMH search, either as the last possible search when full search
795
    // is disabled, or as the last search before resorting to full search.
796
    uneven_multi_hex_search(
797
      fi,
798
      po,
799
      org_region,
800
      p_ref,
801
      &mut best,
802
      fi.sequence.bit_depth,
803
      pmv,
804
      lambda,
805
      mvx_min,
806
      mvx_max,
807
      mvy_min,
808
      mvy_max,
809
      w,
810
      h,
811
      // Use 24, since it is the largest range that x264 uses.
812
      24,
813
    );
814
815
    if !fi.config.speed_settings.motion.me_allow_full_search
816
      || best.rd.sad < thresh
817
    {
818
      return best;
819
    }
820
821
    {
822
      let range_x = (192 * fi.me_range_scale as isize) >> ssdec;
823
      let range_y = (64 * fi.me_range_scale as isize) >> ssdec;
824
      let x_lo = po.x + (-range_x).max(mvx_min / 8);
825
      let x_hi = po.x + (range_x).min(mvx_max / 8);
826
      let y_lo = po.y + (-range_y).max(mvy_min / 8);
827
      let y_hi = po.y + (range_y).min(mvy_max / 8);
828
829
      let results = full_search(
830
        fi,
831
        x_lo,
832
        x_hi,
833
        y_lo,
834
        y_hi,
835
        w,
836
        h,
837
        org_region,
838
        p_ref,
839
        po,
840
        // Full search is run at quarter resolution, except for short edges.
841
        // When subsampling is lower than usual, the step size is raised so that
842
        // the number of search locations stays the same.
843
        4 >> ssdec,
844
        lambda,
845
        [MotionVector::default(); 2],
846
      );
847
848
      if results.rd.cost < best.rd.cost {
849
        results
850
      } else {
851
        best
852
      }
853
    }
854
  }
855
}
856
857
0
fn sub_pixel_me<T: Pixel>(
858
0
  fi: &FrameInvariants<T>, po: PlaneOffset, org_region: &PlaneRegion<T>,
859
0
  p_ref: &Plane<T>, lambda: u32, pmv: [MotionVector; 2], mvx_min: isize,
860
0
  mvx_max: isize, mvy_min: isize, mvy_max: isize, w: usize, h: usize,
861
0
  use_satd: bool, best: &mut MotionSearchResult, ref_frame: RefType,
862
0
) {
863
0
  subpel_diamond_search(
864
0
    fi,
865
0
    po,
866
0
    org_region,
867
0
    p_ref,
868
0
    fi.sequence.bit_depth,
869
0
    pmv,
870
0
    lambda,
871
0
    mvx_min,
872
0
    mvx_max,
873
0
    mvy_min,
874
0
    mvy_max,
875
0
    w,
876
0
    h,
877
0
    use_satd,
878
0
    best,
879
0
    ref_frame,
880
  );
881
0
}
Unexecuted instantiation: rav1e::me::sub_pixel_me::<u16>
Unexecuted instantiation: rav1e::me::sub_pixel_me::<u8>
882
883
#[profiling::function]
884
fn get_best_predictor<T: Pixel>(
885
  fi: &FrameInvariants<T>, po: PlaneOffset, org_region: &PlaneRegion<T>,
886
  p_ref: &Plane<T>, predictors: &[MotionVector], bit_depth: usize,
887
  pmv: [MotionVector; 2], lambda: u32, mvx_min: isize, mvx_max: isize,
888
  mvy_min: isize, mvy_max: isize, w: usize, h: usize,
889
) -> MotionSearchResult {
890
  let mut best: MotionSearchResult = MotionSearchResult::empty();
891
892
  for &init_mv in predictors.iter() {
893
    let rd = get_fullpel_mv_rd(
894
      fi, po, org_region, p_ref, bit_depth, pmv, lambda, false, mvx_min,
895
      mvx_max, mvy_min, mvy_max, w, h, init_mv,
896
    );
897
898
    if rd.cost < best.rd.cost {
899
      best.mv = init_mv;
900
      best.rd = rd;
901
    }
902
  }
903
904
  best
905
}
906
907
/// Declares an array of motion vectors in structure of arrays syntax.
908
/// Compared to [`search_pattern_subpel`], this version creates motion vectors
909
/// in fullpel resolution (x8).
910
macro_rules! search_pattern {
911
    ($field_a:ident: [$($ll_a:expr),*], $field_b:ident: [$($ll_b:expr),*]) => {
912
      [ $(MotionVector { $field_a: $ll_a << 3, $field_b: $ll_b << 3 } ),*]
913
    };
914
}
915
916
/// Declares an array of motion vectors in structure of arrays syntax.
917
macro_rules! search_pattern_subpel {
918
    ($field_a:ident: [$($ll_a:expr),*], $field_b:ident: [$($ll_b:expr),*]) => {
919
      [ $(MotionVector { $field_a: $ll_a, $field_b: $ll_b } ),*]
920
    };
921
}
922
923
/// Diamond pattern of radius 1 as shown below. For fullpel search, use
924
/// `DIAMOND_R1_PATTERN_FULLPEL` since it has been scaled for fullpel search.
925
/// ```text
926
///  X
927
/// XoX
928
///  X
929
/// ```
930
/// 'X's are motion candidates and the 'o' is the center.
931
///
932
const DIAMOND_R1_PATTERN_SUBPEL: [MotionVector; 4] = search_pattern_subpel!(
933
  col: [  0,  1,  0, -1],
934
  row: [  1,  0, -1,  0]
935
);
936
/// Diamond pattern of radius 1 as shown below. Unlike `DIAMOND_R1_PATTERN`, the
937
/// vectors have been shifted fullpel scale.
938
/// ```text
939
///  X
940
/// XoX
941
///  X
942
/// ```
943
/// 'X's are motion candidates and the 'o' is the center.
944
const DIAMOND_R1_PATTERN: [MotionVector; 4] = search_pattern!(
945
  col: [  0,  1,  0, -1],
946
  row: [  1,  0, -1,  0]
947
);
948
949
/// Run a full pixel diamond search. The search is run on multiple step sizes.
950
///
951
/// For each step size, candidate motion vectors are examined for improvement
952
/// to the current search location. The search location is moved to the best
953
/// candidate (if any). This is repeated until the search location stops moving.
954
#[profiling::function]
955
fn fullpel_diamond_search<T: Pixel>(
956
  fi: &FrameInvariants<T>, po: PlaneOffset, org_region: &PlaneRegion<T>,
957
  p_ref: &Plane<T>, current: &mut MotionSearchResult, bit_depth: usize,
958
  pmv: [MotionVector; 2], lambda: u32, mvx_min: isize, mvx_max: isize,
959
  mvy_min: isize, mvy_max: isize, w: usize, h: usize,
960
) {
961
  // Define the initial and the final scale (log2) of the diamond.
962
  let (mut diamond_radius_log2, diamond_radius_end_log2) = (1u8, 0u8);
963
964
  loop {
965
    // Find the best candidate from the diamond pattern.
966
    let mut best_cand: MotionSearchResult = MotionSearchResult::empty();
967
    for &offset in &DIAMOND_R1_PATTERN {
968
      let cand_mv = current.mv + (offset << diamond_radius_log2);
969
      let rd = get_fullpel_mv_rd(
970
        fi, po, org_region, p_ref, bit_depth, pmv, lambda, false, mvx_min,
971
        mvx_max, mvy_min, mvy_max, w, h, cand_mv,
972
      );
973
974
      if rd.cost < best_cand.rd.cost {
975
        best_cand.mv = cand_mv;
976
        best_cand.rd = rd;
977
      }
978
    }
979
980
    // Continue the search at this scale until the can't find a better candidate
981
    // to use.
982
    if current.rd.cost <= best_cand.rd.cost {
983
      if diamond_radius_log2 == diamond_radius_end_log2 {
984
        break;
985
      } else {
986
        diamond_radius_log2 -= 1;
987
      }
988
    } else {
989
      *current = best_cand;
990
    }
991
  }
992
993
  assert!(!current.is_empty());
994
}
995
996
/// A hexagon pattern around a center point. The pattern is ordered so that the
997
/// offsets circle around the center. This is done to allow pruning locations
998
/// covered by the last iteration.
999
/// ```text
1000
///   21012
1001
/// 2  X X
1002
/// 1
1003
/// 0 X o X
1004
/// 1
1005
/// 2  X X
1006
/// ```
1007
/// 'X's are motion candidates and the 'o' is the center.
1008
///
1009
/// The illustration below shows the process of a hexagon search.
1010
/// ```text
1011
/// Step 1    Step 2
1012
///  1 1       1 1 2
1013
///
1014
/// 1(0)1  => 1 0(1)2
1015
///
1016
///  1 1       1 1 2
1017
/// ```
1018
/// The search above has gone through the following steps.
1019
/// 1. Search '1' elements for better candidates than the center '0'.
1020
/// 2. Recenter around the best candidate ('(1)') and hexagon candidates that
1021
///    don't overlap with the previous search step (labeled '2').
1022
const HEXAGON_PATTERN: [MotionVector; 6] = search_pattern!(
1023
  col: [  0,  2,  2,  0, -2, -2],
1024
  row: [ -2, -1,  1,  2,  1, -1]
1025
);
1026
1027
/// A small square pattern around a center point.
1028
/// ```text
1029
///   101
1030
/// 1 XXX
1031
/// 0 XoX
1032
/// 1 XXX
1033
/// ```
1034
/// 'X's are motion candidates and the 'o' is the center.
1035
const SQUARE_REFINE_PATTERN: [MotionVector; 8] = search_pattern!(
1036
  col: [ -1,  0,  1, -1,  1, -1,  0,  1],
1037
  row: [  1,  1,  1,  0,  0, -1, -1, -1]
1038
);
1039
1040
/// Perform hexagon search and refine afterwards.
1041
///
1042
/// In the hexagon search stage, candidate motion vectors are examined for
1043
/// improvement to the current search location. The search location is moved to
1044
/// the best candidate (if any). This is repeated until the search location
1045
/// stops moving.
1046
///
1047
/// Refinement uses a square pattern that fits between the hexagon candidates.
1048
///
1049
/// The hexagon pattern is defined by [`HEXAGON_PATTERN`] and the refinement
1050
/// is defined by [`SQUARE_REFINE_PATTERN`].
1051
///
1052
/// `current` provides the initial search location and serves as
1053
/// the output for the final search results.
1054
#[profiling::function]
1055
fn hexagon_search<T: Pixel>(
1056
  fi: &FrameInvariants<T>, po: PlaneOffset, org_region: &PlaneRegion<T>,
1057
  p_ref: &Plane<T>, current: &mut MotionSearchResult, bit_depth: usize,
1058
  pmv: [MotionVector; 2], lambda: u32, mvx_min: isize, mvx_max: isize,
1059
  mvy_min: isize, mvy_max: isize, w: usize, h: usize,
1060
) {
1061
  // The first iteration of hexagon search is implemented separate from
1062
  // subsequent iterations, which overlap with previous iterations.
1063
1064
  // Holds what candidate is used (if any). This is used to determine which
1065
  // candidates have already been tested in a previous iteration and can be
1066
  // skipped.
1067
  let mut best_cand_idx: usize = 0;
1068
  let mut best_cand: MotionSearchResult = MotionSearchResult::empty();
1069
1070
  // First iteration of hexagon search. There are six candidates to consider.
1071
  for i in 0..6 {
1072
    let cand_mv = current.mv + HEXAGON_PATTERN[i];
1073
    let rd = get_fullpel_mv_rd(
1074
      fi, po, org_region, p_ref, bit_depth, pmv, lambda, false, mvx_min,
1075
      mvx_max, mvy_min, mvy_max, w, h, cand_mv,
1076
    );
1077
1078
    if rd.cost < best_cand.rd.cost {
1079
      best_cand_idx = i;
1080
      best_cand.mv = cand_mv;
1081
      best_cand.rd = rd;
1082
    }
1083
  }
1084
1085
  // Run additional iterations of hexagon search until the search location
1086
  // doesn't update.
1087
  while best_cand.rd.cost < current.rd.cost {
1088
    // Update the search location.
1089
    *current = best_cand;
1090
    best_cand = MotionSearchResult::empty();
1091
1092
    // Save the index/direction taken in the previous iteration to the current
1093
    // search location.
1094
    let center_cand_idx = best_cand_idx;
1095
1096
    // Look only at candidates that don't overlap with previous iterations. This
1097
    // corresponds with the three offsets (2D) with the closest direction to
1098
    // that traveled by the previous iteration. HEXAGON_PATTERN has clockwise
1099
    // order, so the last direction -1, +0, and +1 (mod 6) give the indices for
1100
    // these offsets.
1101
    for idx_offset_mod6 in 5..=7 {
1102
      let i = (center_cand_idx + idx_offset_mod6) % 6;
1103
      let cand_mv = current.mv + HEXAGON_PATTERN[i];
1104
1105
      let rd = get_fullpel_mv_rd(
1106
        fi, po, org_region, p_ref, bit_depth, pmv, lambda, false, mvx_min,
1107
        mvx_max, mvy_min, mvy_max, w, h, cand_mv,
1108
      );
1109
1110
      if rd.cost < best_cand.rd.cost {
1111
        best_cand_idx = i;
1112
        best_cand.mv = cand_mv;
1113
        best_cand.rd = rd;
1114
      }
1115
    }
1116
  }
1117
1118
  // Refine the motion after completing hexagon search.
1119
  let mut best_cand: MotionSearchResult = MotionSearchResult::empty();
1120
  for &offset in &SQUARE_REFINE_PATTERN {
1121
    let cand_mv = current.mv + offset;
1122
    let rd = get_fullpel_mv_rd(
1123
      fi, po, org_region, p_ref, bit_depth, pmv, lambda, false, mvx_min,
1124
      mvx_max, mvy_min, mvy_max, w, h, cand_mv,
1125
    );
1126
1127
    if rd.cost < best_cand.rd.cost {
1128
      best_cand.mv = cand_mv;
1129
      best_cand.rd = rd;
1130
    }
1131
  }
1132
  if best_cand.rd.cost < current.rd.cost {
1133
    *current = best_cand;
1134
  }
1135
1136
  assert!(!current.is_empty());
1137
}
1138
1139
/// Uneven multi-hexagon search pattern around a center point. Used for locating
1140
/// irregular movement.
1141
/// ```text
1142
///      X
1143
///    X   X
1144
///  X       X
1145
///  X       X
1146
///  X   o   X
1147
///  X       X
1148
///  X       X
1149
///    X   X
1150
///      X
1151
/// ```
1152
/// 'X's are motion candidates and the 'o' is the center.
1153
const UMH_PATTERN: [MotionVector; 16] = search_pattern!(
1154
  col: [ -2, -1,  0,  1,  2,  3,  4,  3,  2,  1,  0, -1, -2,  3, -4, -3],
1155
  row: [  4,  4,  4,  4,  4,  2,  0, -2, -4, -4, -4, -4, -4, -2,  0,  2]
1156
);
1157
1158
/// Perform an uneven multi-hexagon search. There are 4 stages:
1159
/// 1. Unsymmetrical-cross search: Search the horizontal and vertical directions
1160
///   for the general direction of the motion.
1161
/// 2. A 5x5 full search is done to refine the current candidate.
1162
/// 3. Uneven multi-hexagon search. See [`UMH_PATTERN`].
1163
/// 4. Refinement using standard hexagon search.
1164
///
1165
/// `current` provides the initial search location and serves as
1166
/// the output for the final search results.
1167
///
1168
/// `me_range` parameter determines how far these stages can search.
1169
#[profiling::function]
1170
fn uneven_multi_hex_search<T: Pixel>(
1171
  fi: &FrameInvariants<T>, po: PlaneOffset, org_region: &PlaneRegion<T>,
1172
  p_ref: &Plane<T>, current: &mut MotionSearchResult, bit_depth: usize,
1173
  pmv: [MotionVector; 2], lambda: u32, mvx_min: isize, mvx_max: isize,
1174
  mvy_min: isize, mvy_max: isize, w: usize, h: usize, me_range: i16,
1175
) {
1176
  assert!(!current.is_empty());
1177
1178
  // Search in a cross pattern to obtain a rough approximate of motion.
1179
  // The cross is split into a horizontal and vertical component. Video content
1180
  // tends to have more horizontal motion, so the horizontal part of the cross
1181
  // is twice as large as the vertical half.
1182
  //        X        -
1183
  //                 | <- me_range/2
1184
  //        X        |
1185
  // X X X XoX X X X -
1186
  //        X
1187
  //
1188
  //        X
1189
  // |------|
1190
  //     \
1191
  //    me_range
1192
  let center = current.mv;
1193
1194
  // The larger, horizontal, part of the cross search.
1195
  for i in (1..=me_range).step_by(2) {
1196
    const HORIZONTAL_LINE: [MotionVector; 2] = search_pattern!(
1197
      col: [ 0, 0],
1198
      row: [-1, 1]
1199
    );
1200
1201
    for &offset in &HORIZONTAL_LINE {
1202
      let cand_mv = center + offset * i;
1203
      let rd = get_fullpel_mv_rd(
1204
        fi, po, org_region, p_ref, bit_depth, pmv, lambda, false, mvx_min,
1205
        mvx_max, mvy_min, mvy_max, w, h, cand_mv,
1206
      );
1207
1208
      if rd.cost < current.rd.cost {
1209
        current.mv = cand_mv;
1210
        current.rd = rd;
1211
      }
1212
    }
1213
  }
1214
1215
  // The smaller, vertical, part of the cross search
1216
  for i in (1..=me_range >> 1).step_by(2) {
1217
    const VERTICAL_LINE: [MotionVector; 2] = search_pattern!(
1218
      col: [-1, 1],
1219
      row: [ 0, 0]
1220
    );
1221
1222
    for &offset in &VERTICAL_LINE {
1223
      let cand_mv = center + offset * i;
1224
      let rd = get_fullpel_mv_rd(
1225
        fi, po, org_region, p_ref, bit_depth, pmv, lambda, false, mvx_min,
1226
        mvx_max, mvy_min, mvy_max, w, h, cand_mv,
1227
      );
1228
1229
      if rd.cost < current.rd.cost {
1230
        current.mv = cand_mv;
1231
        current.rd = rd;
1232
      }
1233
    }
1234
  }
1235
1236
  // 5x5 full search. Search a 5x5 square region around the current best mv.
1237
  let center = current.mv;
1238
  for row in -2..=2 {
1239
    for col in -2..=2 {
1240
      if row == 0 && col == 0 {
1241
        continue;
1242
      }
1243
      let cand_mv = center + MotionVector { row, col };
1244
      let rd = get_fullpel_mv_rd(
1245
        fi, po, org_region, p_ref, bit_depth, pmv, lambda, false, mvx_min,
1246
        mvx_max, mvy_min, mvy_max, w, h, cand_mv,
1247
      );
1248
1249
      if rd.cost < current.rd.cost {
1250
        current.mv = cand_mv;
1251
        current.rd = rd;
1252
      }
1253
    }
1254
  }
1255
1256
  // Run the hexagons in uneven multi-hexagon. The hexagonal pattern is tested
1257
  // around the best vector at multiple scales.
1258
  // Example of the UMH pattern run on a scale of 1 and 2:
1259
  //         2         -
1260
  //                   | <- me_range
1261
  //     2       2     |
1262
  //                   |
1263
  // 2       1       2 |
1264
  //       1   1       |
1265
  // 2   1       1   2 |
1266
  //     1       1     |
1267
  // 2   1   o   1   2 |
1268
  //     1       1     |
1269
  // 2   1       1   2 |
1270
  //       1   1       |
1271
  // 2       1       2 |
1272
  //                   |
1273
  //     2       2     |
1274
  //                   |
1275
  //         2         -
1276
  // |---------------|
1277
  //        \
1278
  //       me_range
1279
  let center = current.mv;
1280
1281
  // Divide by 4, the radius of the UMH's hexagon.
1282
  let iterations = me_range >> 2;
1283
  for i in 1..=iterations {
1284
    for &offset in &UMH_PATTERN {
1285
      let cand_mv = center + offset * i;
1286
      let rd = get_fullpel_mv_rd(
1287
        fi, po, org_region, p_ref, bit_depth, pmv, lambda, false, mvx_min,
1288
        mvx_max, mvy_min, mvy_max, w, h, cand_mv,
1289
      );
1290
1291
      if rd.cost < current.rd.cost {
1292
        current.mv = cand_mv;
1293
        current.rd = rd;
1294
      }
1295
    }
1296
  }
1297
1298
  // Refine the search results using a 'normal' hexagon search.
1299
  hexagon_search(
1300
    fi, po, org_region, p_ref, current, bit_depth, pmv, lambda, mvx_min,
1301
    mvx_max, mvy_min, mvy_max, w, h,
1302
  );
1303
}
1304
1305
/// Run a subpixel diamond search. The search is run on multiple step sizes.
1306
///
1307
/// For each step size, candidate motion vectors are examined for improvement
1308
/// to the current search location. The search location is moved to the best
1309
/// candidate (if any). This is repeated until the search location stops moving.
1310
#[profiling::function]
1311
fn subpel_diamond_search<T: Pixel>(
1312
  fi: &FrameInvariants<T>, po: PlaneOffset, org_region: &PlaneRegion<T>,
1313
  _p_ref: &Plane<T>, bit_depth: usize, pmv: [MotionVector; 2], lambda: u32,
1314
  mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize, w: usize,
1315
  h: usize, use_satd: bool, current: &mut MotionSearchResult,
1316
  ref_frame: RefType,
1317
) {
1318
  use crate::util::Aligned;
1319
1320
  // Motion compensation assembly has special requirements for edges
1321
  let mc_w = w.next_power_of_two();
1322
  let mc_h = (h + 1) & !1;
1323
1324
  // Metadata for subpel scratch pad.
1325
  let cfg = PlaneConfig::new(mc_w, mc_h, 0, 0, 0, 0, std::mem::size_of::<T>());
1326
  // Stack allocation for subpel scratch pad.
1327
  // SAFETY: We write to the array below before reading from it.
1328
  let mut buf: Aligned<[T; 128 * 128]> = unsafe { Aligned::uninitialized() };
1329
  let mut tmp_region = PlaneRegionMut::from_slice(
1330
    &mut buf.data,
1331
    &cfg,
1332
    Rect { x: 0, y: 0, width: cfg.width, height: cfg.height },
1333
  );
1334
1335
  // start at 1/2 pel and end at 1/4 or 1/8 pel
1336
  let (mut diamond_radius_log2, diamond_radius_end_log2) =
1337
    (2u8, u8::from(!fi.allow_high_precision_mv));
1338
1339
  loop {
1340
    // Find the best candidate from the diamond pattern.
1341
    let mut best_cand: MotionSearchResult = MotionSearchResult::empty();
1342
    for &offset in &DIAMOND_R1_PATTERN_SUBPEL {
1343
      let cand_mv = current.mv + (offset << diamond_radius_log2);
1344
1345
      let rd = get_subpel_mv_rd(
1346
        fi,
1347
        po,
1348
        org_region,
1349
        bit_depth,
1350
        pmv,
1351
        lambda,
1352
        use_satd,
1353
        mvx_min,
1354
        mvx_max,
1355
        mvy_min,
1356
        mvy_max,
1357
        w,
1358
        h,
1359
        cand_mv,
1360
        &mut tmp_region,
1361
        ref_frame,
1362
      );
1363
1364
      if rd.cost < best_cand.rd.cost {
1365
        best_cand.mv = cand_mv;
1366
        best_cand.rd = rd;
1367
      }
1368
    }
1369
1370
    // Continue the search at this scale until a better candidate isn't found.
1371
    if current.rd.cost <= best_cand.rd.cost {
1372
      if diamond_radius_log2 == diamond_radius_end_log2 {
1373
        break;
1374
      } else {
1375
        diamond_radius_log2 -= 1;
1376
      }
1377
    } else {
1378
      *current = best_cand;
1379
    }
1380
  }
1381
1382
  assert!(!current.is_empty());
1383
}
1384
1385
#[inline]
1386
0
fn get_fullpel_mv_rd<T: Pixel>(
1387
0
  fi: &FrameInvariants<T>, po: PlaneOffset, org_region: &PlaneRegion<T>,
1388
0
  p_ref: &Plane<T>, bit_depth: usize, pmv: [MotionVector; 2], lambda: u32,
1389
0
  use_satd: bool, mvx_min: isize, mvx_max: isize, mvy_min: isize,
1390
0
  mvy_max: isize, w: usize, h: usize, cand_mv: MotionVector,
1391
0
) -> MVCandidateRD {
1392
0
  if (cand_mv.col as isize) < mvx_min
1393
0
    || (cand_mv.col as isize) > mvx_max
1394
0
    || (cand_mv.row as isize) < mvy_min
1395
0
    || (cand_mv.row as isize) > mvy_max
1396
  {
1397
0
    return MVCandidateRD::empty();
1398
0
  }
1399
1400
  // Convert the motion vector into an full pixel offset.
1401
0
  let plane_ref = p_ref.region(Area::StartingAt {
1402
0
    x: po.x + (cand_mv.col / 8) as isize,
1403
0
    y: po.y + (cand_mv.row / 8) as isize,
1404
0
  });
1405
0
  compute_mv_rd(
1406
0
    fi, pmv, lambda, use_satd, bit_depth, w, h, cand_mv, org_region,
1407
0
    &plane_ref,
1408
  )
1409
0
}
Unexecuted instantiation: rav1e::me::get_fullpel_mv_rd::<u16>
Unexecuted instantiation: rav1e::me::get_fullpel_mv_rd::<u8>
1410
1411
0
fn get_subpel_mv_rd<T: Pixel>(
1412
0
  fi: &FrameInvariants<T>, po: PlaneOffset, org_region: &PlaneRegion<T>,
1413
0
  bit_depth: usize, pmv: [MotionVector; 2], lambda: u32, use_satd: bool,
1414
0
  mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize, w: usize,
1415
0
  h: usize, cand_mv: MotionVector, tmp_region: &mut PlaneRegionMut<T>,
1416
0
  ref_frame: RefType,
1417
0
) -> MVCandidateRD {
1418
0
  if (cand_mv.col as isize) < mvx_min
1419
0
    || (cand_mv.col as isize) > mvx_max
1420
0
    || (cand_mv.row as isize) < mvy_min
1421
0
    || (cand_mv.row as isize) > mvy_max
1422
  {
1423
0
    return MVCandidateRD::empty();
1424
0
  }
1425
1426
0
  let tmp_width = tmp_region.rect().width;
1427
0
  let tmp_height = tmp_region.rect().height;
1428
0
  let tile_rect =
1429
0
    TileRect { x: 0, y: 0, width: tmp_width, height: tmp_height };
1430
1431
0
  PredictionMode::NEWMV.predict_inter_single(
1432
0
    fi, tile_rect, 0, po, tmp_region,
1433
    // motion comp's w & h on edges can be different than distortion's
1434
0
    tmp_width, tmp_height, ref_frame, cand_mv,
1435
  );
1436
0
  let plane_ref = tmp_region.as_const();
1437
0
  compute_mv_rd(
1438
0
    fi, pmv, lambda, use_satd, bit_depth, w, h, cand_mv, org_region,
1439
0
    &plane_ref,
1440
  )
1441
0
}
Unexecuted instantiation: rav1e::me::get_subpel_mv_rd::<u16>
Unexecuted instantiation: rav1e::me::get_subpel_mv_rd::<u8>
1442
1443
/// Compute the rate distortion stats for a motion vector.
1444
#[inline(always)]
1445
0
fn compute_mv_rd<T: Pixel>(
1446
0
  fi: &FrameInvariants<T>, pmv: [MotionVector; 2], lambda: u32,
1447
0
  use_satd: bool, bit_depth: usize, w: usize, h: usize, cand_mv: MotionVector,
1448
0
  plane_org: &PlaneRegion<'_, T>, plane_ref: &PlaneRegion<'_, T>,
1449
0
) -> MVCandidateRD {
1450
0
  let sad = if use_satd {
1451
0
    get_satd(plane_org, plane_ref, w, h, bit_depth, fi.cpu_feature_level)
1452
  } else {
1453
0
    get_sad(plane_org, plane_ref, w, h, bit_depth, fi.cpu_feature_level)
1454
  };
1455
1456
0
  let rate1 = get_mv_rate(cand_mv, pmv[0], fi.allow_high_precision_mv);
1457
0
  let rate2 = get_mv_rate(cand_mv, pmv[1], fi.allow_high_precision_mv);
1458
0
  let rate = rate1.min(rate2 + 1);
1459
1460
0
  MVCandidateRD { cost: 256 * sad as u64 + rate as u64 * lambda as u64, sad }
1461
0
}
Unexecuted instantiation: rav1e::me::compute_mv_rd::<u16>
Unexecuted instantiation: rav1e::me::compute_mv_rd::<u8>
1462
1463
#[profiling::function]
1464
fn full_search<T: Pixel>(
1465
  fi: &FrameInvariants<T>, x_lo: isize, x_hi: isize, y_lo: isize, y_hi: isize,
1466
  w: usize, h: usize, org_region: &PlaneRegion<T>, p_ref: &Plane<T>,
1467
  po: PlaneOffset, step: usize, lambda: u32, pmv: [MotionVector; 2],
1468
) -> MotionSearchResult {
1469
  let search_region = p_ref.region(Area::Rect {
1470
    x: x_lo,
1471
    y: y_lo,
1472
    width: (x_hi - x_lo) as usize + w,
1473
    height: (y_hi - y_lo) as usize + h,
1474
  });
1475
1476
  let mut best: MotionSearchResult = MotionSearchResult::empty();
1477
1478
  // Select rectangular regions within search region with vert+horz windows
1479
  for vert_window in search_region.vert_windows(h).step_by(step) {
1480
    for ref_window in vert_window.horz_windows(w).step_by(step) {
1481
      let &Rect { x, y, .. } = ref_window.rect();
1482
1483
      let mv = MotionVector {
1484
        row: 8 * (y as i16 - po.y as i16),
1485
        col: 8 * (x as i16 - po.x as i16),
1486
      };
1487
1488
      let rd = compute_mv_rd(
1489
        fi,
1490
        pmv,
1491
        lambda,
1492
        false,
1493
        fi.sequence.bit_depth,
1494
        w,
1495
        h,
1496
        mv,
1497
        org_region,
1498
        &ref_window,
1499
      );
1500
1501
      if rd.cost < best.rd.cost {
1502
        best.rd = rd;
1503
        best.mv = mv;
1504
      }
1505
    }
1506
  }
1507
1508
  best
1509
}
1510
1511
#[inline(always)]
1512
0
fn get_mv_rate(
1513
0
  a: MotionVector, b: MotionVector, allow_high_precision_mv: bool,
1514
0
) -> u32 {
1515
  #[inline(always)]
1516
0
  fn diff_to_rate(diff: i16, allow_high_precision_mv: bool) -> u32 {
1517
0
    let d = if allow_high_precision_mv { diff } else { diff >> 1 };
1518
0
    2 * ILog::ilog(d.abs()) as u32
1519
0
  }
1520
1521
0
  diff_to_rate(a.row - b.row, allow_high_precision_mv)
1522
0
    + diff_to_rate(a.col - b.col, allow_high_precision_mv)
1523
0
}