Coverage Report

Created: 2025-12-05 07:37

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/rust/registry/src/index.crates.io-1949cf8c6b5b557f/rav1e-0.8.1/src/mc.rs
Line
Count
Source
1
// Copyright (c) 2019-2022, The rav1e contributors. All rights reserved
2
//
3
// This source code is subject to the terms of the BSD 2 Clause License and
4
// the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
5
// was not distributed with this source code in the LICENSE file, you can
6
// obtain it at www.aomedia.org/license/software. If the Alliance for Open
7
// Media Patent License 1.0 was not distributed with this source code in the
8
// PATENTS file, you can obtain it at www.aomedia.org/license/patent.
9
10
cfg_if::cfg_if! {
11
  if #[cfg(nasm_x86_64)] {
12
    pub use crate::asm::x86::mc::*;
13
  } else if #[cfg(asm_neon)] {
14
    pub use crate::asm::aarch64::mc::*;
15
  } else {
16
    pub use self::rust::*;
17
  }
18
}
19
20
use crate::cpu_features::CpuFeatureLevel;
21
use crate::frame::*;
22
use crate::tiling::*;
23
use crate::util::*;
24
25
use simd_helpers::cold_for_target_arch;
26
use std::ops;
27
28
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
29
pub struct MotionVector {
30
  pub row: i16,
31
  pub col: i16,
32
}
33
34
impl MotionVector {
35
  #[inline]
36
0
  pub const fn quantize_to_fullpel(self) -> Self {
37
0
    Self { row: (self.row / 8) * 8, col: (self.col / 8) * 8 }
38
0
  }
39
40
  #[inline]
41
0
  pub const fn is_zero(self) -> bool {
42
0
    self.row == 0 && self.col == 0
43
0
  }
44
45
  #[inline]
46
0
  pub const fn is_valid(self) -> bool {
47
    use crate::context::{MV_LOW, MV_UPP};
48
0
    ((MV_LOW as i16) < self.row && self.row < (MV_UPP as i16))
49
0
      && ((MV_LOW as i16) < self.col && self.col < (MV_UPP as i16))
50
0
  }
51
}
52
53
impl ops::Mul<i16> for MotionVector {
54
  type Output = MotionVector;
55
56
  #[inline]
57
0
  fn mul(self, rhs: i16) -> MotionVector {
58
0
    MotionVector { row: self.row * rhs, col: self.col * rhs }
59
0
  }
Unexecuted instantiation: <rav1e::mc::MotionVector as core::ops::arith::Mul<i16>>::mul
Unexecuted instantiation: <rav1e::mc::MotionVector as core::ops::arith::Mul<i16>>::mul
60
}
61
62
impl ops::Mul<u16> for MotionVector {
63
  type Output = MotionVector;
64
65
  #[inline]
66
0
  fn mul(self, rhs: u16) -> MotionVector {
67
0
    MotionVector { row: self.row * rhs as i16, col: self.col * rhs as i16 }
68
0
  }
69
}
70
71
impl ops::Shr<u8> for MotionVector {
72
  type Output = MotionVector;
73
74
  #[inline]
75
0
  fn shr(self, rhs: u8) -> MotionVector {
76
0
    MotionVector { row: self.row >> rhs, col: self.col >> rhs }
77
0
  }
Unexecuted instantiation: <rav1e::mc::MotionVector as core::ops::bit::Shr<u8>>::shr
Unexecuted instantiation: <rav1e::mc::MotionVector as core::ops::bit::Shr<u8>>::shr
78
}
79
80
impl ops::Shl<u8> for MotionVector {
81
  type Output = MotionVector;
82
83
  #[inline]
84
0
  fn shl(self, rhs: u8) -> MotionVector {
85
0
    MotionVector { row: self.row << rhs, col: self.col << rhs }
86
0
  }
Unexecuted instantiation: <rav1e::mc::MotionVector as core::ops::bit::Shl<u8>>::shl
Unexecuted instantiation: <rav1e::mc::MotionVector as core::ops::bit::Shl<u8>>::shl
87
}
88
89
impl ops::Add<MotionVector> for MotionVector {
90
  type Output = MotionVector;
91
92
  #[inline]
93
0
  fn add(self, rhs: MotionVector) -> MotionVector {
94
0
    MotionVector { row: self.row + rhs.row, col: self.col + rhs.col }
95
0
  }
Unexecuted instantiation: <rav1e::mc::MotionVector as core::ops::arith::Add>::add
Unexecuted instantiation: <rav1e::mc::MotionVector as core::ops::arith::Add>::add
96
}
97
98
#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd)]
99
#[allow(unused)]
100
pub enum FilterMode {
101
  REGULAR = 0,
102
  SMOOTH = 1,
103
  SHARP = 2,
104
  BILINEAR = 3,
105
  SWITCHABLE = 4,
106
}
107
108
pub const SUBPEL_FILTER_SIZE: usize = 8;
109
110
const SUBPEL_FILTERS: [[[i32; SUBPEL_FILTER_SIZE]; 16]; 6] = [
111
  [
112
    [0, 0, 0, 128, 0, 0, 0, 0],
113
    [0, 2, -6, 126, 8, -2, 0, 0],
114
    [0, 2, -10, 122, 18, -4, 0, 0],
115
    [0, 2, -12, 116, 28, -8, 2, 0],
116
    [0, 2, -14, 110, 38, -10, 2, 0],
117
    [0, 2, -14, 102, 48, -12, 2, 0],
118
    [0, 2, -16, 94, 58, -12, 2, 0],
119
    [0, 2, -14, 84, 66, -12, 2, 0],
120
    [0, 2, -14, 76, 76, -14, 2, 0],
121
    [0, 2, -12, 66, 84, -14, 2, 0],
122
    [0, 2, -12, 58, 94, -16, 2, 0],
123
    [0, 2, -12, 48, 102, -14, 2, 0],
124
    [0, 2, -10, 38, 110, -14, 2, 0],
125
    [0, 2, -8, 28, 116, -12, 2, 0],
126
    [0, 0, -4, 18, 122, -10, 2, 0],
127
    [0, 0, -2, 8, 126, -6, 2, 0],
128
  ],
129
  [
130
    [0, 0, 0, 128, 0, 0, 0, 0],
131
    [0, 2, 28, 62, 34, 2, 0, 0],
132
    [0, 0, 26, 62, 36, 4, 0, 0],
133
    [0, 0, 22, 62, 40, 4, 0, 0],
134
    [0, 0, 20, 60, 42, 6, 0, 0],
135
    [0, 0, 18, 58, 44, 8, 0, 0],
136
    [0, 0, 16, 56, 46, 10, 0, 0],
137
    [0, -2, 16, 54, 48, 12, 0, 0],
138
    [0, -2, 14, 52, 52, 14, -2, 0],
139
    [0, 0, 12, 48, 54, 16, -2, 0],
140
    [0, 0, 10, 46, 56, 16, 0, 0],
141
    [0, 0, 8, 44, 58, 18, 0, 0],
142
    [0, 0, 6, 42, 60, 20, 0, 0],
143
    [0, 0, 4, 40, 62, 22, 0, 0],
144
    [0, 0, 4, 36, 62, 26, 0, 0],
145
    [0, 0, 2, 34, 62, 28, 2, 0],
146
  ],
147
  [
148
    [0, 0, 0, 128, 0, 0, 0, 0],
149
    [-2, 2, -6, 126, 8, -2, 2, 0],
150
    [-2, 6, -12, 124, 16, -6, 4, -2],
151
    [-2, 8, -18, 120, 26, -10, 6, -2],
152
    [-4, 10, -22, 116, 38, -14, 6, -2],
153
    [-4, 10, -22, 108, 48, -18, 8, -2],
154
    [-4, 10, -24, 100, 60, -20, 8, -2],
155
    [-4, 10, -24, 90, 70, -22, 10, -2],
156
    [-4, 12, -24, 80, 80, -24, 12, -4],
157
    [-2, 10, -22, 70, 90, -24, 10, -4],
158
    [-2, 8, -20, 60, 100, -24, 10, -4],
159
    [-2, 8, -18, 48, 108, -22, 10, -4],
160
    [-2, 6, -14, 38, 116, -22, 10, -4],
161
    [-2, 6, -10, 26, 120, -18, 8, -2],
162
    [-2, 4, -6, 16, 124, -12, 6, -2],
163
    [0, 2, -2, 8, 126, -6, 2, -2],
164
  ],
165
  [
166
    [0, 0, 0, 128, 0, 0, 0, 0],
167
    [0, 0, 0, 120, 8, 0, 0, 0],
168
    [0, 0, 0, 112, 16, 0, 0, 0],
169
    [0, 0, 0, 104, 24, 0, 0, 0],
170
    [0, 0, 0, 96, 32, 0, 0, 0],
171
    [0, 0, 0, 88, 40, 0, 0, 0],
172
    [0, 0, 0, 80, 48, 0, 0, 0],
173
    [0, 0, 0, 72, 56, 0, 0, 0],
174
    [0, 0, 0, 64, 64, 0, 0, 0],
175
    [0, 0, 0, 56, 72, 0, 0, 0],
176
    [0, 0, 0, 48, 80, 0, 0, 0],
177
    [0, 0, 0, 40, 88, 0, 0, 0],
178
    [0, 0, 0, 32, 96, 0, 0, 0],
179
    [0, 0, 0, 24, 104, 0, 0, 0],
180
    [0, 0, 0, 16, 112, 0, 0, 0],
181
    [0, 0, 0, 8, 120, 0, 0, 0],
182
  ],
183
  [
184
    [0, 0, 0, 128, 0, 0, 0, 0],
185
    [0, 0, -4, 126, 8, -2, 0, 0],
186
    [0, 0, -8, 122, 18, -4, 0, 0],
187
    [0, 0, -10, 116, 28, -6, 0, 0],
188
    [0, 0, -12, 110, 38, -8, 0, 0],
189
    [0, 0, -12, 102, 48, -10, 0, 0],
190
    [0, 0, -14, 94, 58, -10, 0, 0],
191
    [0, 0, -12, 84, 66, -10, 0, 0],
192
    [0, 0, -12, 76, 76, -12, 0, 0],
193
    [0, 0, -10, 66, 84, -12, 0, 0],
194
    [0, 0, -10, 58, 94, -14, 0, 0],
195
    [0, 0, -10, 48, 102, -12, 0, 0],
196
    [0, 0, -8, 38, 110, -12, 0, 0],
197
    [0, 0, -6, 28, 116, -10, 0, 0],
198
    [0, 0, -4, 18, 122, -8, 0, 0],
199
    [0, 0, -2, 8, 126, -4, 0, 0],
200
  ],
201
  [
202
    [0, 0, 0, 128, 0, 0, 0, 0],
203
    [0, 0, 30, 62, 34, 2, 0, 0],
204
    [0, 0, 26, 62, 36, 4, 0, 0],
205
    [0, 0, 22, 62, 40, 4, 0, 0],
206
    [0, 0, 20, 60, 42, 6, 0, 0],
207
    [0, 0, 18, 58, 44, 8, 0, 0],
208
    [0, 0, 16, 56, 46, 10, 0, 0],
209
    [0, 0, 14, 54, 48, 12, 0, 0],
210
    [0, 0, 12, 52, 52, 12, 0, 0],
211
    [0, 0, 12, 48, 54, 14, 0, 0],
212
    [0, 0, 10, 46, 56, 16, 0, 0],
213
    [0, 0, 8, 44, 58, 18, 0, 0],
214
    [0, 0, 6, 42, 60, 20, 0, 0],
215
    [0, 0, 4, 40, 62, 22, 0, 0],
216
    [0, 0, 4, 36, 62, 26, 0, 0],
217
    [0, 0, 2, 34, 62, 30, 0, 0],
218
  ],
219
];
220
221
pub(crate) mod rust {
222
  use super::*;
223
  use num_traits::*;
224
225
0
  unsafe fn run_filter<T: AsPrimitive<i32>>(
226
0
    src: *const T, stride: usize, filter: [i32; 8],
227
0
  ) -> i32 {
228
0
    filter
229
0
      .iter()
230
0
      .enumerate()
231
0
      .map(|(i, f)| {
232
0
        let p = src.add(i * stride);
233
0
        f * (*p).as_()
234
0
      })
Unexecuted instantiation: rav1e::mc::rust::run_filter::<u16>::{closure#0}
Unexecuted instantiation: rav1e::mc::rust::run_filter::<u8>::{closure#0}
Unexecuted instantiation: rav1e::mc::rust::run_filter::<i16>::{closure#0}
235
0
      .sum::<i32>()
236
0
  }
Unexecuted instantiation: rav1e::mc::rust::run_filter::<u16>
Unexecuted instantiation: rav1e::mc::rust::run_filter::<u8>
Unexecuted instantiation: rav1e::mc::rust::run_filter::<i16>
237
238
0
  fn get_filter(
239
0
    mode: FilterMode, frac: i32, length: usize,
240
0
  ) -> [i32; SUBPEL_FILTER_SIZE] {
241
0
    let filter_idx = if mode == FilterMode::BILINEAR || length > 4 {
242
0
      mode as usize
243
    } else {
244
0
      (mode as usize).min(1) + 4
245
    };
246
0
    SUBPEL_FILTERS[filter_idx][frac as usize]
247
0
  }
248
249
  #[cold_for_target_arch("x86_64")]
250
0
  pub fn put_8tap<T: Pixel>(
251
0
    dst: &mut PlaneRegionMut<'_, T>, src: PlaneSlice<'_, T>, width: usize,
252
0
    height: usize, col_frac: i32, row_frac: i32, mode_x: FilterMode,
253
0
    mode_y: FilterMode, bit_depth: usize, _cpu: CpuFeatureLevel,
254
0
  ) {
255
    // The assembly only supports even heights and valid uncropped widths
256
0
    assert_eq!(height & 1, 0);
257
0
    assert!(width.is_power_of_two() && (2..=128).contains(&width));
258
259
0
    let ref_stride = src.plane.cfg.stride;
260
0
    let y_filter = get_filter(mode_y, row_frac, height);
261
0
    let x_filter = get_filter(mode_x, col_frac, width);
262
0
    let max_sample_val = (1 << bit_depth) - 1;
263
0
    let intermediate_bits = 4 - if bit_depth == 12 { 2 } else { 0 };
264
0
    match (col_frac, row_frac) {
265
      (0, 0) => {
266
0
        for r in 0..height {
267
0
          let src_slice = &src[r];
268
0
          let dst_slice = &mut dst[r];
269
0
          dst_slice[..width].copy_from_slice(&src_slice[..width]);
270
0
        }
271
      }
272
      (0, _) => {
273
0
        let offset_slice = src.go_up(3);
274
0
        for r in 0..height {
275
0
          let src_slice = &offset_slice[r];
276
0
          let dst_slice = &mut dst[r];
277
0
          for c in 0..width {
278
0
            dst_slice[c] = T::cast_from(
279
0
              round_shift(
280
0
                // SAFETY: We pass this a raw pointer, but it's created from a
281
0
                // checked slice, so we are safe.
282
0
                unsafe {
283
0
                  run_filter(src_slice[c..].as_ptr(), ref_stride, y_filter)
284
0
                },
285
0
                7,
286
0
              )
287
0
              .clamp(0, max_sample_val),
288
0
            );
289
0
          }
290
        }
291
      }
292
      (_, 0) => {
293
0
        let offset_slice = src.go_left(3);
294
0
        for r in 0..height {
295
0
          let src_slice = &offset_slice[r];
296
0
          let dst_slice = &mut dst[r];
297
0
          for c in 0..width {
298
0
            dst_slice[c] = T::cast_from(
299
0
              round_shift(
300
0
                round_shift(
301
0
                  // SAFETY: We pass this a raw pointer, but it's created from a
302
0
                  // checked slice, so we are safe.
303
0
                  unsafe { run_filter(src_slice[c..].as_ptr(), 1, x_filter) },
304
0
                  7 - intermediate_bits,
305
0
                ),
306
0
                intermediate_bits,
307
0
              )
308
0
              .clamp(0, max_sample_val),
309
0
            );
310
0
          }
311
        }
312
      }
313
      (_, _) => {
314
0
        let mut intermediate: [i16; 8 * (128 + 7)] = [0; 8 * (128 + 7)];
315
316
0
        let offset_slice = src.go_left(3).go_up(3);
317
0
        for cg in (0..width).step_by(8) {
318
0
          for r in 0..height + 7 {
319
0
            let src_slice = &offset_slice[r];
320
0
            for c in cg..(cg + 8).min(width) {
321
0
              intermediate[8 * r + (c - cg)] = round_shift(
322
0
                // SAFETY: We pass this a raw pointer, but it's created from a
323
0
                // checked slice, so we are safe.
324
0
                unsafe { run_filter(src_slice[c..].as_ptr(), 1, x_filter) },
325
0
                7 - intermediate_bits,
326
0
              ) as i16;
327
0
            }
328
          }
329
330
0
          for r in 0..height {
331
0
            let dst_slice = &mut dst[r];
332
0
            for c in cg..(cg + 8).min(width) {
333
0
              dst_slice[c] = T::cast_from(
334
0
                round_shift(
335
0
                  // SAFETY: We pass this a raw pointer, but it's created from a
336
0
                  // checked slice, so we are safe.
337
0
                  unsafe {
338
0
                    run_filter(
339
0
                      intermediate[8 * r + c - cg..].as_ptr(),
340
0
                      8,
341
0
                      y_filter,
342
0
                    )
343
0
                  },
344
0
                  7 + intermediate_bits,
345
0
                )
346
0
                .clamp(0, max_sample_val),
347
0
              );
348
0
            }
349
          }
350
        }
351
      }
352
    }
353
0
  }
Unexecuted instantiation: rav1e::mc::rust::put_8tap::<u16>
Unexecuted instantiation: rav1e::mc::rust::put_8tap::<u8>
354
355
  // HBD output interval is [-20588, 36956] (10-bit), [-20602, 36983] (12-bit)
356
  // Subtract PREP_BIAS to ensure result fits in i16 and matches dav1d assembly
357
  const PREP_BIAS: i32 = 8192;
358
359
  #[cold_for_target_arch("x86_64")]
360
0
  pub fn prep_8tap<T: Pixel>(
361
0
    tmp: &mut [i16], src: PlaneSlice<'_, T>, width: usize, height: usize,
362
0
    col_frac: i32, row_frac: i32, mode_x: FilterMode, mode_y: FilterMode,
363
0
    bit_depth: usize, _cpu: CpuFeatureLevel,
364
0
  ) {
365
    // The assembly only supports even heights and valid uncropped widths
366
0
    assert_eq!(height & 1, 0);
367
0
    assert!(width.is_power_of_two() && (2..=128).contains(&width));
368
369
0
    let ref_stride = src.plane.cfg.stride;
370
0
    let y_filter = get_filter(mode_y, row_frac, height);
371
0
    let x_filter = get_filter(mode_x, col_frac, width);
372
0
    let intermediate_bits = 4 - if bit_depth == 12 { 2 } else { 0 };
373
0
    let prep_bias = if bit_depth == 8 { 0 } else { PREP_BIAS };
374
0
    match (col_frac, row_frac) {
375
      (0, 0) => {
376
0
        for r in 0..height {
377
0
          let src_slice = &src[r];
378
0
          for c in 0..width {
379
0
            tmp[r * width + c] = (i16::cast_from(src_slice[c])
380
0
              << intermediate_bits)
381
0
              - prep_bias as i16;
382
0
          }
383
        }
384
      }
385
      (0, _) => {
386
0
        let offset_slice = src.go_up(3);
387
0
        for r in 0..height {
388
0
          let src_slice = &offset_slice[r];
389
0
          for c in 0..width {
390
0
            tmp[r * width + c] = (round_shift(
391
0
              // SAFETY: We pass this a raw pointer, but it's created from a
392
0
              // checked slice, so we are safe.
393
0
              unsafe {
394
0
                run_filter(src_slice[c..].as_ptr(), ref_stride, y_filter)
395
0
              },
396
0
              7 - intermediate_bits,
397
0
            ) - prep_bias) as i16;
398
0
          }
399
        }
400
      }
401
      (_, 0) => {
402
0
        let offset_slice = src.go_left(3);
403
0
        for r in 0..height {
404
0
          let src_slice = &offset_slice[r];
405
0
          for c in 0..width {
406
0
            tmp[r * width + c] = (round_shift(
407
0
              // SAFETY: We pass this a raw pointer, but it's created from a
408
0
              // checked slice, so we are safe.
409
0
              unsafe { run_filter(src_slice[c..].as_ptr(), 1, x_filter) },
410
0
              7 - intermediate_bits,
411
0
            ) - prep_bias) as i16;
412
0
          }
413
        }
414
      }
415
      (_, _) => {
416
0
        let mut intermediate: [i16; 8 * (128 + 7)] = [0; 8 * (128 + 7)];
417
418
0
        let offset_slice = src.go_left(3).go_up(3);
419
0
        for cg in (0..width).step_by(8) {
420
0
          for r in 0..height + 7 {
421
0
            let src_slice = &offset_slice[r];
422
0
            for c in cg..(cg + 8).min(width) {
423
0
              intermediate[8 * r + (c - cg)] = round_shift(
424
0
                // SAFETY: We pass this a raw pointer, but it's created from a
425
0
                // checked slice, so we are safe.
426
0
                unsafe { run_filter(src_slice[c..].as_ptr(), 1, x_filter) },
427
0
                7 - intermediate_bits,
428
0
              ) as i16;
429
0
            }
430
          }
431
432
0
          for r in 0..height {
433
0
            for c in cg..(cg + 8).min(width) {
434
0
              tmp[r * width + c] = (round_shift(
435
0
                // SAFETY: We pass this a raw pointer, but it's created from a
436
0
                // checked slice, so we are safe.
437
0
                unsafe {
438
0
                  run_filter(
439
0
                    intermediate[8 * r + c - cg..].as_ptr(),
440
0
                    8,
441
0
                    y_filter,
442
0
                  )
443
0
                },
444
0
                7,
445
0
              ) - prep_bias) as i16;
446
0
            }
447
          }
448
        }
449
      }
450
    }
451
0
  }
Unexecuted instantiation: rav1e::mc::rust::prep_8tap::<u16>
Unexecuted instantiation: rav1e::mc::rust::prep_8tap::<u8>
452
453
  #[cold_for_target_arch("x86_64")]
454
0
  pub fn mc_avg<T: Pixel>(
455
0
    dst: &mut PlaneRegionMut<'_, T>, tmp1: &[i16], tmp2: &[i16], width: usize,
456
0
    height: usize, bit_depth: usize, _cpu: CpuFeatureLevel,
457
0
  ) {
458
    // The assembly only supports even heights and valid uncropped widths
459
0
    assert_eq!(height & 1, 0);
460
0
    assert!(width.is_power_of_two() && (2..=128).contains(&width));
461
462
0
    let max_sample_val = (1 << bit_depth) - 1;
463
0
    let intermediate_bits = 4 - if bit_depth == 12 { 2 } else { 0 };
464
0
    let prep_bias = if bit_depth == 8 { 0 } else { PREP_BIAS * 2 };
465
0
    for r in 0..height {
466
0
      let dst_slice = &mut dst[r];
467
0
      for c in 0..width {
468
0
        dst_slice[c] = T::cast_from(
469
0
          round_shift(
470
0
            tmp1[r * width + c] as i32
471
0
              + tmp2[r * width + c] as i32
472
0
              + prep_bias,
473
0
            intermediate_bits + 1,
474
0
          )
475
0
          .clamp(0, max_sample_val),
476
0
        );
477
0
      }
478
    }
479
0
  }
Unexecuted instantiation: rav1e::mc::rust::mc_avg::<u16>
Unexecuted instantiation: rav1e::mc::rust::mc_avg::<u8>
480
}