Coverage Report

Created: 2026-01-22 07:28

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/rust/registry/src/index.crates.io-1949cf8c6b5b557f/rav1e-0.8.1/src/lrf.rs
Line
Count
Source
1
// Copyright (c) 2017-2022, The rav1e contributors. All rights reserved
2
//
3
// This source code is subject to the terms of the BSD 2 Clause License and
4
// the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
5
// was not distributed with this source code in the LICENSE file, you can
6
// obtain it at www.aomedia.org/license/software. If the Alliance for Open
7
// Media Patent License 1.0 was not distributed with this source code in the
8
// PATENTS file, you can obtain it at www.aomedia.org/license/patent.
9
10
use std::cmp;
11
use std::iter::FusedIterator;
12
use std::ops::{Index, IndexMut};
13
14
use crate::api::SGRComplexityLevel;
15
use crate::color::ChromaSampling::Cs400;
16
use crate::context::{MAX_PLANES, SB_SIZE};
17
use crate::encoder::FrameInvariants;
18
use crate::frame::{
19
  AsRegion, Frame, Plane, PlaneConfig, PlaneOffset, PlaneSlice,
20
};
21
use crate::tiling::{Area, PlaneRegion, PlaneRegionMut, Rect};
22
use crate::util::{clamp, CastFromPrimitive, ILog, Pixel};
23
24
cfg_if::cfg_if! {
25
  if #[cfg(nasm_x86_64)] {
26
    use crate::asm::x86::lrf::*;
27
  } else {
28
    use self::rust::*;
29
  }
30
}
31
32
pub const RESTORATION_TILESIZE_MAX_LOG2: usize = 8;
33
34
pub const RESTORE_NONE: u8 = 0;
35
pub const RESTORE_SWITCHABLE: u8 = 1;
36
pub const RESTORE_WIENER: u8 = 2;
37
pub const RESTORE_SGRPROJ: u8 = 3;
38
39
pub const WIENER_TAPS_MIN: [i8; 3] = [-5, -23, -17];
40
pub const WIENER_TAPS_MID: [i8; 3] = [3, -7, 15];
41
pub const WIENER_TAPS_MAX: [i8; 3] = [10, 8, 46];
42
#[allow(unused)]
43
pub const WIENER_TAPS_K: [i8; 3] = [1, 2, 3];
44
pub const WIENER_BITS: usize = 7;
45
46
pub const SGRPROJ_XQD_MIN: [i8; 2] = [-96, -32];
47
pub const SGRPROJ_XQD_MID: [i8; 2] = [-32, 31];
48
pub const SGRPROJ_XQD_MAX: [i8; 2] = [31, 95];
49
pub const SGRPROJ_PRJ_SUBEXP_K: u8 = 4;
50
pub const SGRPROJ_PRJ_BITS: u8 = 7;
51
pub const SGRPROJ_PARAMS_BITS: u8 = 4;
52
pub const SGRPROJ_MTABLE_BITS: u8 = 20;
53
pub const SGRPROJ_SGR_BITS: u8 = 8;
54
pub const SGRPROJ_RECIP_BITS: u8 = 12;
55
pub const SGRPROJ_RST_BITS: u8 = 4;
56
pub const SGRPROJ_PARAMS_S: [[u32; 2]; 1 << SGRPROJ_PARAMS_BITS] = [
57
  [140, 3236],
58
  [112, 2158],
59
  [93, 1618],
60
  [80, 1438],
61
  [70, 1295],
62
  [58, 1177],
63
  [47, 1079],
64
  [37, 996],
65
  [30, 925],
66
  [25, 863],
67
  [0, 2589],
68
  [0, 1618],
69
  [0, 1177],
70
  [0, 925],
71
  [56, 0],
72
  [22, 0],
73
];
74
75
// List of indices to SGRPROJ_PARAMS_S values that at a given complexity level.
76
// SGRPROJ_ALL_SETS contains every possible index
77
const SGRPROJ_ALL_SETS: &[u8] =
78
  &[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15];
79
// SGRPROJ_REDUCED_SETS has half of the values. Using only these values gives
80
// most of the gains from sgr. The decision of which values to use is somewhat
81
// arbitrary. The sgr parameters has 3 discontinuous groups. The first has both
82
// parameters as non-zero. The other two are distinguishable by which of the
83
// two parameters is zero. There are an even number of each of these groups and
84
// the non-zero parameters grow as the indices increase. This array uses the
85
// 1st, 3rd, ... smallest params of each group.
86
const SGRPROJ_REDUCED_SETS: &[u8] = &[1, 3, 5, 7, 9, 11, 13, 15];
87
88
0
pub const fn get_sgr_sets(complexity: SGRComplexityLevel) -> &'static [u8] {
89
0
  match complexity {
90
0
    SGRComplexityLevel::Full => SGRPROJ_ALL_SETS,
91
0
    SGRComplexityLevel::Reduced => SGRPROJ_REDUCED_SETS,
92
  }
93
0
}
94
95
pub const SOLVE_IMAGE_MAX: usize = 1 << RESTORATION_TILESIZE_MAX_LOG2;
96
pub const SOLVE_IMAGE_STRIDE: usize = SOLVE_IMAGE_MAX + 6 + 2;
97
pub const SOLVE_IMAGE_HEIGHT: usize = SOLVE_IMAGE_STRIDE;
98
pub const SOLVE_IMAGE_SIZE: usize = SOLVE_IMAGE_STRIDE * SOLVE_IMAGE_HEIGHT;
99
100
pub const STRIPE_IMAGE_MAX: usize = (1 << RESTORATION_TILESIZE_MAX_LOG2)
101
  + (1 << (RESTORATION_TILESIZE_MAX_LOG2 - 1));
102
pub const STRIPE_IMAGE_STRIDE: usize = STRIPE_IMAGE_MAX + 6 + 2;
103
pub const STRIPE_IMAGE_HEIGHT: usize = 64 + 6 + 2;
104
pub const STRIPE_IMAGE_SIZE: usize = STRIPE_IMAGE_STRIDE * STRIPE_IMAGE_HEIGHT;
105
106
pub const IMAGE_WIDTH_MAX: usize = [STRIPE_IMAGE_MAX, SOLVE_IMAGE_MAX]
107
  [(STRIPE_IMAGE_MAX < SOLVE_IMAGE_MAX) as usize];
108
109
/// The buffer used in `sgrproj_stripe_filter()` and `sgrproj_solve()`.
110
#[derive(Debug)]
111
pub struct IntegralImageBuffer {
112
  pub integral_image: Vec<u32>,
113
  pub sq_integral_image: Vec<u32>,
114
}
115
116
impl IntegralImageBuffer {
117
  /// Creates a new buffer with the given size, filled with zeros.
118
  #[inline]
119
0
  pub fn zeroed(size: usize) -> Self {
120
0
    Self { integral_image: vec![0; size], sq_integral_image: vec![0; size] }
121
0
  }
Unexecuted instantiation: <rav1e::lrf::IntegralImageBuffer>::zeroed
Unexecuted instantiation: <rav1e::lrf::IntegralImageBuffer>::zeroed
122
}
123
124
#[allow(unused)] // Wiener coming soon!
125
#[derive(Copy, Clone, Debug, PartialEq, Eq, Default)]
126
pub enum RestorationFilter {
127
  #[default]
128
  None,
129
  Wiener {
130
    coeffs: [[i8; 3]; 2],
131
  },
132
  Sgrproj {
133
    set: u8,
134
    xqd: [i8; 2],
135
  },
136
}
137
138
impl RestorationFilter {
139
0
  pub const fn notequal(self, cmp: RestorationFilter) -> bool {
140
0
    match self {
141
0
      RestorationFilter::None => !matches!(cmp, RestorationFilter::None),
142
0
      RestorationFilter::Sgrproj { set, xqd } => {
143
0
        if let RestorationFilter::Sgrproj { set: set2, xqd: xqd2 } = cmp {
144
0
          !(set == set2 && xqd[0] == xqd2[0] && xqd[1] == xqd2[1])
145
        } else {
146
0
          true
147
        }
148
      }
149
0
      RestorationFilter::Wiener { coeffs } => {
150
0
        if let RestorationFilter::Wiener { coeffs: coeffs2 } = cmp {
151
0
          !(coeffs[0][0] == coeffs2[0][0]
152
0
            && coeffs[0][1] == coeffs2[0][1]
153
0
            && coeffs[0][2] == coeffs2[0][2]
154
0
            && coeffs[1][0] == coeffs2[1][0]
155
0
            && coeffs[1][1] == coeffs2[1][1]
156
0
            && coeffs[1][2] == coeffs2[1][2])
157
        } else {
158
0
          true
159
        }
160
      }
161
    }
162
0
  }
163
}
164
165
pub(crate) mod rust {
166
  use crate::cpu_features::CpuFeatureLevel;
167
  use crate::frame::PlaneSlice;
168
  use crate::lrf::{
169
    get_integral_square, sgrproj_sum_finish, SGRPROJ_RST_BITS,
170
    SGRPROJ_SGR_BITS,
171
  };
172
  use crate::util::CastFromPrimitive;
173
  use crate::Pixel;
174
175
  #[inline(always)]
176
0
  pub(crate) fn sgrproj_box_ab_internal<const BD: usize>(
177
0
    r: usize, af: &mut [u32], bf: &mut [u32], iimg: &[u32], iimg_sq: &[u32],
178
0
    iimg_stride: usize, start_x: usize, y: usize, stripe_w: usize, s: u32,
179
0
  ) {
180
0
    let d: usize = r * 2 + 1;
181
0
    let n: usize = d * d;
182
0
    let one_over_n = if r == 1 { 455 } else { 164 };
183
184
0
    assert!(iimg.len() > (y + d) * iimg_stride + stripe_w + 1 + d);
185
0
    assert!(iimg_sq.len() > (y + d) * iimg_stride + stripe_w + 1 + d);
186
0
    assert!(af.len() > stripe_w + 1);
187
0
    assert!(bf.len() > stripe_w + 1);
188
189
0
    for x in start_x..stripe_w + 2 {
190
      // SAFETY: We perform the bounds checks above, once for the whole loop
191
0
      unsafe {
192
0
        let sum = get_integral_square(iimg, iimg_stride, x, y, d);
193
0
        let ssq = get_integral_square(iimg_sq, iimg_stride, x, y, d);
194
0
        let (reta, retb) =
195
0
          sgrproj_sum_finish::<BD>(ssq, sum, n as u32, one_over_n, s);
196
0
        *af.get_unchecked_mut(x) = reta;
197
0
        *bf.get_unchecked_mut(x) = retb;
198
0
      }
199
    }
200
0
  }
Unexecuted instantiation: rav1e::lrf::rust::sgrproj_box_ab_internal::<8>
Unexecuted instantiation: rav1e::lrf::rust::sgrproj_box_ab_internal::<10>
Unexecuted instantiation: rav1e::lrf::rust::sgrproj_box_ab_internal::<12>
201
202
  // computes an intermediate (ab) row for stripe_w + 2 columns at row y
203
0
  pub(crate) fn sgrproj_box_ab_r1<const BD: usize>(
204
0
    af: &mut [u32], bf: &mut [u32], iimg: &[u32], iimg_sq: &[u32],
205
0
    iimg_stride: usize, y: usize, stripe_w: usize, s: u32,
206
0
    _cpu: CpuFeatureLevel,
207
0
  ) {
208
0
    sgrproj_box_ab_internal::<BD>(
209
      1,
210
0
      af,
211
0
      bf,
212
0
      iimg,
213
0
      iimg_sq,
214
0
      iimg_stride,
215
      0,
216
0
      y,
217
0
      stripe_w,
218
0
      s,
219
    );
220
0
  }
Unexecuted instantiation: rav1e::lrf::rust::sgrproj_box_ab_r1::<8>
Unexecuted instantiation: rav1e::lrf::rust::sgrproj_box_ab_r1::<10>
Unexecuted instantiation: rav1e::lrf::rust::sgrproj_box_ab_r1::<12>
221
222
  // computes an intermediate (ab) row for stripe_w + 2 columns at row y
223
0
  pub(crate) fn sgrproj_box_ab_r2<const BD: usize>(
224
0
    af: &mut [u32], bf: &mut [u32], iimg: &[u32], iimg_sq: &[u32],
225
0
    iimg_stride: usize, y: usize, stripe_w: usize, s: u32,
226
0
    _cpu: CpuFeatureLevel,
227
0
  ) {
228
0
    sgrproj_box_ab_internal::<BD>(
229
      2,
230
0
      af,
231
0
      bf,
232
0
      iimg,
233
0
      iimg_sq,
234
0
      iimg_stride,
235
      0,
236
0
      y,
237
0
      stripe_w,
238
0
      s,
239
    );
240
0
  }
Unexecuted instantiation: rav1e::lrf::rust::sgrproj_box_ab_r2::<8>
Unexecuted instantiation: rav1e::lrf::rust::sgrproj_box_ab_r2::<10>
Unexecuted instantiation: rav1e::lrf::rust::sgrproj_box_ab_r2::<12>
241
242
0
  pub(crate) fn sgrproj_box_f_r0<T: Pixel>(
243
0
    f: &mut [u32], y: usize, w: usize, cdeffed: &PlaneSlice<T>,
244
0
    _cpu: CpuFeatureLevel,
245
0
  ) {
246
0
    sgrproj_box_f_r0_internal(f, 0, y, w, cdeffed);
247
0
  }
Unexecuted instantiation: rav1e::lrf::rust::sgrproj_box_f_r0::<u16>
Unexecuted instantiation: rav1e::lrf::rust::sgrproj_box_f_r0::<u8>
248
249
  #[inline(always)]
250
0
  pub(crate) fn sgrproj_box_f_r0_internal<T: Pixel>(
251
0
    f: &mut [u32], start_x: usize, y: usize, w: usize, cdeffed: &PlaneSlice<T>,
252
0
  ) {
253
0
    let line = cdeffed.row(y);
254
0
    for (fp, &v) in f[start_x..w].iter_mut().zip(line[start_x..w].iter()) {
255
0
      *fp = u32::cast_from(v) << SGRPROJ_RST_BITS;
256
0
    }
257
0
  }
Unexecuted instantiation: rav1e::lrf::rust::sgrproj_box_f_r0_internal::<u16>
Unexecuted instantiation: rav1e::lrf::rust::sgrproj_box_f_r0_internal::<u8>
258
259
0
  pub(crate) fn sgrproj_box_f_r1<T: Pixel>(
260
0
    af: &[&[u32]; 3], bf: &[&[u32]; 3], f: &mut [u32], y: usize, w: usize,
261
0
    cdeffed: &PlaneSlice<T>, _cpu: CpuFeatureLevel,
262
0
  ) {
263
0
    sgrproj_box_f_r1_internal(af, bf, f, 0, y, w, cdeffed);
264
0
  }
Unexecuted instantiation: rav1e::lrf::rust::sgrproj_box_f_r1::<u16>
Unexecuted instantiation: rav1e::lrf::rust::sgrproj_box_f_r1::<u8>
265
266
  #[inline(always)]
267
0
  pub(crate) fn sgrproj_box_f_r1_internal<T: Pixel>(
268
0
    af: &[&[u32]; 3], bf: &[&[u32]; 3], f: &mut [u32], start_x: usize,
269
0
    y: usize, w: usize, cdeffed: &PlaneSlice<T>,
270
0
  ) {
271
0
    let shift = 5 + SGRPROJ_SGR_BITS - SGRPROJ_RST_BITS;
272
0
    let line = cdeffed.row(y);
273
0
    for x in start_x..w {
274
0
      let a = 3 * (af[0][x] + af[2][x] + af[0][x + 2] + af[2][x + 2])
275
0
        + 4
276
0
          * (af[1][x]
277
0
            + af[0][x + 1]
278
0
            + af[1][x + 1]
279
0
            + af[2][x + 1]
280
0
            + af[1][x + 2]);
281
0
      let b = 3 * (bf[0][x] + bf[2][x] + bf[0][x + 2] + bf[2][x + 2])
282
0
        + 4
283
0
          * (bf[1][x]
284
0
            + bf[0][x + 1]
285
0
            + bf[1][x + 1]
286
0
            + bf[2][x + 1]
287
0
            + bf[1][x + 2]);
288
0
      let v = a * u32::cast_from(line[x]) + b;
289
0
      f[x] = (v + (1 << shift >> 1)) >> shift;
290
0
    }
291
0
  }
Unexecuted instantiation: rav1e::lrf::rust::sgrproj_box_f_r1_internal::<u16>
Unexecuted instantiation: rav1e::lrf::rust::sgrproj_box_f_r1_internal::<u8>
292
293
0
  pub(crate) fn sgrproj_box_f_r2<T: Pixel>(
294
0
    af: &[&[u32]; 2], bf: &[&[u32]; 2], f0: &mut [u32], f1: &mut [u32],
295
0
    y: usize, w: usize, cdeffed: &PlaneSlice<T>, _cpu: CpuFeatureLevel,
296
0
  ) {
297
0
    sgrproj_box_f_r2_internal(af, bf, f0, f1, 0, y, w, cdeffed);
298
0
  }
Unexecuted instantiation: rav1e::lrf::rust::sgrproj_box_f_r2::<u16>
Unexecuted instantiation: rav1e::lrf::rust::sgrproj_box_f_r2::<u8>
299
300
  #[inline(always)]
301
0
  pub(crate) fn sgrproj_box_f_r2_internal<T: Pixel>(
302
0
    af: &[&[u32]; 2], bf: &[&[u32]; 2], f0: &mut [u32], f1: &mut [u32],
303
0
    start_x: usize, y: usize, w: usize, cdeffed: &PlaneSlice<T>,
304
0
  ) {
305
0
    let shift = 5 + SGRPROJ_SGR_BITS - SGRPROJ_RST_BITS;
306
0
    let shifto = 4 + SGRPROJ_SGR_BITS - SGRPROJ_RST_BITS;
307
0
    let line = cdeffed.row(y);
308
0
    let line1 = cdeffed.row(y + 1);
309
310
0
    let af0 = af[0][start_x..w + 3].windows(3);
311
0
    let af1 = af[1][start_x..w + 3].windows(3);
312
0
    let bf0 = bf[0][start_x..w + 3].windows(3);
313
0
    let bf1 = bf[1][start_x..w + 3].windows(3);
314
315
0
    let af_it = af0.zip(af1);
316
0
    let bf_it = bf0.zip(bf1);
317
318
0
    let in0 = line[start_x..w].iter();
319
0
    let in1 = line1[start_x..w].iter();
320
321
0
    let o0 = f0[start_x..w].iter_mut();
322
0
    let o1 = f1[start_x..w].iter_mut();
323
324
0
    let in_iter = in0.zip(in1);
325
0
    let out_iter = o0.zip(o1);
326
327
0
    let io_iter = out_iter.zip(in_iter);
328
329
0
    for (((o0, o1), (&p0, &p1)), ((af_0, af_1), (bf_0, bf_1))) in
330
0
      io_iter.zip(af_it.zip(bf_it))
331
0
    {
332
0
      let a = 5 * (af_0[0] + af_0[2]) + 6 * af_0[1];
333
0
      let b = 5 * (bf_0[0] + bf_0[2]) + 6 * bf_0[1];
334
0
      let ao = 5 * (af_1[0] + af_1[2]) + 6 * af_1[1];
335
0
      let bo = 5 * (bf_1[0] + bf_1[2]) + 6 * bf_1[1];
336
0
      let v = (a + ao) * u32::cast_from(p0) + b + bo;
337
0
      *o0 = (v + (1 << shift >> 1)) >> shift;
338
0
      let vo = ao * u32::cast_from(p1) + bo;
339
0
      *o1 = (vo + (1 << shifto >> 1)) >> shifto;
340
0
    }
341
0
  }
Unexecuted instantiation: rav1e::lrf::rust::sgrproj_box_f_r2_internal::<u16>
Unexecuted instantiation: rav1e::lrf::rust::sgrproj_box_f_r2_internal::<u8>
342
}
343
344
#[inline(always)]
345
0
fn sgrproj_sum_finish<const BD: usize>(
346
0
  ssq: u32, sum: u32, n: u32, one_over_n: u32, s: u32,
347
0
) -> (u32, u32) {
348
0
  let bdm8 = BD - 8;
349
0
  let scaled_ssq = (ssq + (1 << (2 * bdm8) >> 1)) >> (2 * bdm8);
350
0
  let scaled_sum = (sum + (1 << bdm8 >> 1)) >> bdm8;
351
0
  let p = (scaled_ssq * n).saturating_sub(scaled_sum * scaled_sum);
352
0
  let z = (p * s + (1 << SGRPROJ_MTABLE_BITS >> 1)) >> SGRPROJ_MTABLE_BITS;
353
0
  let a = if z >= 255 {
354
0
    256
355
0
  } else if z == 0 {
356
0
    1
357
  } else {
358
0
    ((z << SGRPROJ_SGR_BITS) + z / 2) / (z + 1)
359
  };
360
0
  let b = ((1 << SGRPROJ_SGR_BITS) - a) * sum * one_over_n;
361
0
  (a, (b + (1 << SGRPROJ_RECIP_BITS >> 1)) >> SGRPROJ_RECIP_BITS)
362
0
}
Unexecuted instantiation: rav1e::lrf::sgrproj_sum_finish::<8>
Unexecuted instantiation: rav1e::lrf::sgrproj_sum_finish::<10>
Unexecuted instantiation: rav1e::lrf::sgrproj_sum_finish::<12>
363
364
// Using an integral image, compute the sum of a square region
365
// SAFETY: The size of `iimg` must be at least `(y + size) * stride + x + size`
366
#[inline(always)]
367
0
unsafe fn get_integral_square(
368
0
  iimg: &[u32], stride: usize, x: usize, y: usize, size: usize,
369
0
) -> u32 {
370
  // Cancel out overflow in iimg by using wrapping arithmetic
371
0
  let top_left = *iimg.get_unchecked(y * stride + x);
372
0
  let top_right = *iimg.get_unchecked(y * stride + x + size);
373
0
  let bottom_left = *iimg.get_unchecked((y + size) * stride + x);
374
0
  let bottom_right = *iimg.get_unchecked((y + size) * stride + x + size);
375
0
  top_left
376
0
    .wrapping_add(bottom_right)
377
0
    .wrapping_sub(bottom_left)
378
0
    .wrapping_sub(top_right)
379
0
}
380
381
struct VertPaddedIter<'a, T: Pixel> {
382
  // The two sources that can be selected when clipping
383
  deblocked: &'a Plane<T>,
384
  cdeffed: &'a Plane<T>,
385
  // x index to choice where on the row to start
386
  x: isize,
387
  // y index that will be mutated
388
  y: isize,
389
  // The index at which to terminate. Can be larger than the slice length.
390
  end: isize,
391
  // Used for source buffer choice/clipping. May (and regularly will)
392
  // be negative.
393
  stripe_begin: isize,
394
  // Also used for source buffer choice/clipping. May specify a stripe boundary
395
  // less than, equal to, or larger than the buffers we're accessing.
396
  stripe_end: isize,
397
  // Active area cropping is done by specifying a value smaller than the height
398
  // of the plane.
399
  crop: isize,
400
}
401
402
impl<'a, T: Pixel> VertPaddedIter<'a, T> {
403
0
  fn new(
404
0
    cdeffed: &PlaneSlice<'a, T>, deblocked: &PlaneSlice<'a, T>,
405
0
    stripe_h: usize, crop: usize,
406
0
  ) -> VertPaddedIter<'a, T> {
407
    // cdeffed and deblocked must start at the same coordinates from their
408
    // underlying planes. Since cropping is provided via a separate params, the
409
    // height of the underlying planes do not need to match.
410
0
    assert_eq!(cdeffed.x, deblocked.x);
411
0
    assert_eq!(cdeffed.y, deblocked.y);
412
413
    // To share integral images, always use the max box filter radius of 2
414
0
    let r = 2;
415
416
    // The number of rows outside the stripe are needed
417
0
    let rows_above = r + 2;
418
0
    let rows_below = 2;
419
420
    // Offset crop and stripe_h so they are relative to the underlying plane
421
    // and not the plane slice.
422
0
    let crop = crop as isize + deblocked.y;
423
0
    let stripe_end = stripe_h as isize + deblocked.y;
424
425
    // Move y up the number rows above.
426
    // If y is negative we repeat the first row
427
0
    let y = deblocked.y - rows_above as isize;
428
429
0
    VertPaddedIter {
430
0
      deblocked: deblocked.plane,
431
0
      cdeffed: cdeffed.plane,
432
0
      x: deblocked.x,
433
0
      y,
434
0
      end: (rows_above + stripe_h + rows_below) as isize + y,
435
0
      stripe_begin: deblocked.y,
436
0
      stripe_end,
437
0
      crop,
438
0
    }
439
0
  }
Unexecuted instantiation: <rav1e::lrf::VertPaddedIter<u16>>::new
Unexecuted instantiation: <rav1e::lrf::VertPaddedIter<u8>>::new
440
}
441
442
impl<'a, T: Pixel> Iterator for VertPaddedIter<'a, T> {
443
  type Item = &'a [T];
444
445
  #[inline(always)]
446
0
  fn next(&mut self) -> Option<Self::Item> {
447
0
    if self.end > self.y {
448
      // clamp before deciding the source
449
      // clamp vertically to storage at top and passed-in height at bottom
450
0
      let cropped_y = clamp(self.y, 0, self.crop - 1);
451
      // clamp vertically to stripe limits
452
0
      let ly = clamp(cropped_y, self.stripe_begin - 2, self.stripe_end + 1);
453
454
      // decide if we're vertically inside or outside the strip
455
0
      let src_plane = if ly >= self.stripe_begin && ly < self.stripe_end {
456
0
        self.cdeffed
457
      } else {
458
0
        self.deblocked
459
      };
460
      // cannot directly return self.ps.row(row) due to lifetime issue
461
0
      let range = src_plane.row_range(self.x, ly);
462
0
      self.y += 1;
463
0
      Some(&src_plane.data[range])
464
    } else {
465
0
      None
466
    }
467
0
  }
Unexecuted instantiation: <rav1e::lrf::VertPaddedIter<u16> as core::iter::traits::iterator::Iterator>::next
Unexecuted instantiation: <rav1e::lrf::VertPaddedIter<u8> as core::iter::traits::iterator::Iterator>::next
468
469
0
  fn size_hint(&self) -> (usize, Option<usize>) {
470
0
    let remaining = self.end - self.y;
471
0
    debug_assert!(remaining >= 0);
472
0
    let remaining = remaining as usize;
473
474
0
    (remaining, Some(remaining))
475
0
  }
476
}
477
478
impl<T: Pixel> ExactSizeIterator for VertPaddedIter<'_, T> {}
479
impl<T: Pixel> FusedIterator for VertPaddedIter<'_, T> {}
480
481
struct HorzPaddedIter<'a, T: Pixel> {
482
  // Active area cropping is done using the length of the slice
483
  slice: &'a [T],
484
  // x index of the iterator
485
  // When less than 0, repeat the first element. When greater than end, repeat
486
  // the last element
487
  index: isize,
488
  // The index at which to terminate. Can be larger than the slice length.
489
  end: usize,
490
}
491
492
impl<'a, T: Pixel> HorzPaddedIter<'a, T> {
493
0
  fn new(
494
0
    slice: &'a [T], start_index: isize, width: usize,
495
0
  ) -> HorzPaddedIter<'a, T> {
496
0
    HorzPaddedIter {
497
0
      slice,
498
0
      index: start_index,
499
0
      end: (width as isize + start_index) as usize,
500
0
    }
501
0
  }
Unexecuted instantiation: <rav1e::lrf::HorzPaddedIter<u16>>::new
Unexecuted instantiation: <rav1e::lrf::HorzPaddedIter<u8>>::new
502
}
503
504
impl<'a, T: Pixel> Iterator for HorzPaddedIter<'a, T> {
505
  type Item = &'a T;
506
507
  #[inline(always)]
508
0
  fn next(&mut self) -> Option<Self::Item> {
509
0
    if self.index < self.end as isize {
510
      // clamp to the edges of the frame
511
0
      let x = clamp(self.index, 0, self.slice.len() as isize - 1) as usize;
512
0
      self.index += 1;
513
0
      Some(&self.slice[x])
514
    } else {
515
0
      None
516
    }
517
0
  }
Unexecuted instantiation: <rav1e::lrf::HorzPaddedIter<u16> as core::iter::traits::iterator::Iterator>::next
Unexecuted instantiation: <rav1e::lrf::HorzPaddedIter<u8> as core::iter::traits::iterator::Iterator>::next
518
519
  #[inline(always)]
520
0
  fn size_hint(&self) -> (usize, Option<usize>) {
521
0
    let size: usize = (self.end as isize - self.index) as usize;
522
0
    (size, Some(size))
523
0
  }
524
}
525
526
impl<T: Pixel> ExactSizeIterator for HorzPaddedIter<'_, T> {}
527
impl<T: Pixel> FusedIterator for HorzPaddedIter<'_, T> {}
528
529
#[profiling::function]
530
pub fn setup_integral_image<T: Pixel>(
531
  integral_image_buffer: &mut IntegralImageBuffer,
532
  integral_image_stride: usize, crop_w: usize, crop_h: usize, stripe_w: usize,
533
  stripe_h: usize, cdeffed: &PlaneSlice<T>, deblocked: &PlaneSlice<T>,
534
) {
535
  let integral_image = &mut integral_image_buffer.integral_image;
536
  let sq_integral_image = &mut integral_image_buffer.sq_integral_image;
537
538
  // Number of elements outside the stripe
539
  let left_w = 4; // max radius of 2 + 2 padding
540
  let right_w = 3; // max radius of 2 + 1 padding
541
542
  assert_eq!(cdeffed.x, deblocked.x);
543
544
  // Find how many unique elements to use to the left and right
545
  let left_uniques = if cdeffed.x == 0 { 0 } else { left_w };
546
  let right_uniques = right_w.min(crop_w - stripe_w);
547
548
  // Find the total number of unique elements used
549
  let row_uniques = left_uniques + stripe_w + right_uniques;
550
551
  // Negative start indices result in repeating the first element of the row
552
  let start_index_x = if cdeffed.x == 0 { -(left_w as isize) } else { 0 };
553
554
  let mut rows_iter = VertPaddedIter::new(
555
    // Move left to encompass all the used data
556
    &cdeffed.go_left(left_uniques),
557
    &deblocked.go_left(left_uniques),
558
    // since r2 uses every other row, we need an extra row if stripe_h is odd
559
    stripe_h + (stripe_h & 1),
560
    crop_h,
561
  )
562
0
  .map(|row: &[T]| {
563
0
    HorzPaddedIter::new(
564
      // Limit how many unique elements we use
565
0
      &row[..row_uniques],
566
0
      start_index_x,
567
0
      left_w + stripe_w + right_w,
568
    )
569
0
  });
Unexecuted instantiation: rav1e::lrf::setup_integral_image::<u16>::{closure#0}
Unexecuted instantiation: rav1e::lrf::setup_integral_image::<u8>::{closure#0}
570
571
  // Setup the first row
572
  {
573
    let mut sum: u32 = 0;
574
    let mut sq_sum: u32 = 0;
575
    // Remove the first row and use it outside of the main loop
576
    let row = rows_iter.next().unwrap();
577
    for (src, (integral, sq_integral)) in
578
      row.zip(integral_image.iter_mut().zip(sq_integral_image.iter_mut()))
579
    {
580
      let current = u32::cast_from(*src);
581
582
      // Wrap adds to prevent undefined behaviour on overflow. Overflow is
583
      // cancelled out when calculating the sum of a region.
584
      sum = sum.wrapping_add(current);
585
      *integral = sum;
586
      sq_sum = sq_sum.wrapping_add(current * current);
587
      *sq_integral = sq_sum;
588
    }
589
  }
590
  // Calculate all other rows
591
  let mut integral_slice = &mut integral_image[..];
592
  let mut sq_integral_slice = &mut sq_integral_image[..];
593
  for row in rows_iter {
594
    let mut sum: u32 = 0;
595
    let mut sq_sum: u32 = 0;
596
597
    // Split the data between the previous row and future rows.
598
    // This allows us to mutate the current row while accessing the
599
    // previous row.
600
    let (integral_row_prev, integral_row) =
601
      integral_slice.split_at_mut(integral_image_stride);
602
    let (sq_integral_row_prev, sq_integral_row) =
603
      sq_integral_slice.split_at_mut(integral_image_stride);
604
    for (
605
      src,
606
      ((integral_above, sq_integral_above), (integral, sq_integral)),
607
    ) in row.zip(
608
      integral_row_prev
609
        .iter()
610
        .zip(sq_integral_row_prev.iter())
611
        .zip(integral_row.iter_mut().zip(sq_integral_row.iter_mut())),
612
    ) {
613
      let current = u32::cast_from(*src);
614
      // Wrap adds to prevent undefined behaviour on overflow. Overflow is
615
      // cancelled out when calculating the sum of a region.
616
      sum = sum.wrapping_add(current);
617
      *integral = sum.wrapping_add(*integral_above);
618
      sq_sum = sq_sum.wrapping_add(current * current);
619
      *sq_integral = sq_sum.wrapping_add(*sq_integral_above);
620
    }
621
622
    // The current row also contains all future rows. Replacing the slice with
623
    // it moves down a row.
624
    integral_slice = integral_row;
625
    sq_integral_slice = sq_integral_row;
626
  }
627
}
628
629
#[profiling::function]
630
pub fn sgrproj_stripe_filter<T: Pixel, U: Pixel>(
631
  set: u8, xqd: [i8; 2], fi: &FrameInvariants<T>,
632
  integral_image_buffer: &IntegralImageBuffer, integral_image_stride: usize,
633
  cdeffed: &PlaneSlice<U>, out: &mut PlaneRegionMut<U>,
634
) {
635
  let &Rect { width: stripe_w, height: stripe_h, .. } = out.rect();
636
  let mut a_r2: [[u32; IMAGE_WIDTH_MAX + 2]; 2] =
637
    [[0; IMAGE_WIDTH_MAX + 2]; 2];
638
  let mut b_r2: [[u32; IMAGE_WIDTH_MAX + 2]; 2] =
639
    [[0; IMAGE_WIDTH_MAX + 2]; 2];
640
  let mut f_r2_0: [u32; IMAGE_WIDTH_MAX] = [0; IMAGE_WIDTH_MAX];
641
  let mut f_r2_1: [u32; IMAGE_WIDTH_MAX] = [0; IMAGE_WIDTH_MAX];
642
  let mut a_r1: [[u32; IMAGE_WIDTH_MAX + 2]; 3] =
643
    [[0; IMAGE_WIDTH_MAX + 2]; 3];
644
  let mut b_r1: [[u32; IMAGE_WIDTH_MAX + 2]; 3] =
645
    [[0; IMAGE_WIDTH_MAX + 2]; 3];
646
  let mut f_r1: [u32; IMAGE_WIDTH_MAX] = [0; IMAGE_WIDTH_MAX];
647
648
  let s_r2: u32 = SGRPROJ_PARAMS_S[set as usize][0];
649
  let s_r1: u32 = SGRPROJ_PARAMS_S[set as usize][1];
650
651
  let fn_ab_r1 = match fi.sequence.bit_depth {
652
    8 => sgrproj_box_ab_r1::<8>,
653
    10 => sgrproj_box_ab_r1::<10>,
654
    12 => sgrproj_box_ab_r1::<12>,
655
    _ => unimplemented!(),
656
  };
657
  let fn_ab_r2 = match fi.sequence.bit_depth {
658
    8 => sgrproj_box_ab_r2::<8>,
659
    10 => sgrproj_box_ab_r2::<10>,
660
    12 => sgrproj_box_ab_r2::<12>,
661
    _ => unimplemented!(),
662
  };
663
664
  /* prime the intermediate arrays */
665
  // One oddness about the radius=2 intermediate array computations that
666
  // the spec doesn't make clear: Although the spec defines computation
667
  // of every row (of a, b and f), only half of the rows (every-other
668
  // row) are actually used.
669
  let integral_image = &integral_image_buffer.integral_image;
670
  let sq_integral_image = &integral_image_buffer.sq_integral_image;
671
  if s_r2 > 0 {
672
    fn_ab_r2(
673
      &mut a_r2[0],
674
      &mut b_r2[0],
675
      integral_image,
676
      sq_integral_image,
677
      integral_image_stride,
678
      0,
679
      stripe_w,
680
      s_r2,
681
      fi.cpu_feature_level,
682
    );
683
  }
684
  if s_r1 > 0 {
685
    let integral_image_offset = integral_image_stride + 1;
686
    fn_ab_r1(
687
      &mut a_r1[0],
688
      &mut b_r1[0],
689
      &integral_image[integral_image_offset..],
690
      &sq_integral_image[integral_image_offset..],
691
      integral_image_stride,
692
      0,
693
      stripe_w,
694
      s_r1,
695
      fi.cpu_feature_level,
696
    );
697
    fn_ab_r1(
698
      &mut a_r1[1],
699
      &mut b_r1[1],
700
      &integral_image[integral_image_offset..],
701
      &sq_integral_image[integral_image_offset..],
702
      integral_image_stride,
703
      1,
704
      stripe_w,
705
      s_r1,
706
      fi.cpu_feature_level,
707
    );
708
  }
709
710
  /* iterate by row */
711
  // Increment by two to handle the use of even rows by r=2 and run a nested
712
  //  loop to handle increments of one.
713
  for y in (0..stripe_h).step_by(2) {
714
    // get results to use y and y+1
715
    let f_r2_ab: [&[u32]; 2] = if s_r2 > 0 {
716
      fn_ab_r2(
717
        &mut a_r2[(y / 2 + 1) % 2],
718
        &mut b_r2[(y / 2 + 1) % 2],
719
        integral_image,
720
        sq_integral_image,
721
        integral_image_stride,
722
        y + 2,
723
        stripe_w,
724
        s_r2,
725
        fi.cpu_feature_level,
726
      );
727
      let ap0: [&[u32]; 2] = [&a_r2[(y / 2) % 2], &a_r2[(y / 2 + 1) % 2]];
728
      let bp0: [&[u32]; 2] = [&b_r2[(y / 2) % 2], &b_r2[(y / 2 + 1) % 2]];
729
      sgrproj_box_f_r2(
730
        &ap0,
731
        &bp0,
732
        &mut f_r2_0,
733
        &mut f_r2_1,
734
        y,
735
        stripe_w,
736
        cdeffed,
737
        fi.cpu_feature_level,
738
      );
739
      [&f_r2_0, &f_r2_1]
740
    } else {
741
      sgrproj_box_f_r0(
742
        &mut f_r2_0,
743
        y,
744
        stripe_w,
745
        cdeffed,
746
        fi.cpu_feature_level,
747
      );
748
      // share results for both rows
749
      [&f_r2_0, &f_r2_0]
750
    };
751
    for dy in 0..(2.min(stripe_h - y)) {
752
      let y = y + dy;
753
      if s_r1 > 0 {
754
        let integral_image_offset = integral_image_stride + 1;
755
        fn_ab_r1(
756
          &mut a_r1[(y + 2) % 3],
757
          &mut b_r1[(y + 2) % 3],
758
          &integral_image[integral_image_offset..],
759
          &sq_integral_image[integral_image_offset..],
760
          integral_image_stride,
761
          y + 2,
762
          stripe_w,
763
          s_r1,
764
          fi.cpu_feature_level,
765
        );
766
        let ap1: [&[u32]; 3] =
767
          [&a_r1[y % 3], &a_r1[(y + 1) % 3], &a_r1[(y + 2) % 3]];
768
        let bp1: [&[u32]; 3] =
769
          [&b_r1[y % 3], &b_r1[(y + 1) % 3], &b_r1[(y + 2) % 3]];
770
        sgrproj_box_f_r1(
771
          &ap1,
772
          &bp1,
773
          &mut f_r1,
774
          y,
775
          stripe_w,
776
          cdeffed,
777
          fi.cpu_feature_level,
778
        );
779
      } else {
780
        sgrproj_box_f_r0(
781
          &mut f_r1,
782
          y,
783
          stripe_w,
784
          cdeffed,
785
          fi.cpu_feature_level,
786
        );
787
      }
788
789
      /* apply filter */
790
      let w0 = xqd[0] as i32;
791
      let w1 = xqd[1] as i32;
792
      let w2 = (1 << SGRPROJ_PRJ_BITS) - w0 - w1;
793
794
      let line = &cdeffed[y];
795
796
      #[inline(always)]
797
0
      fn apply_filter<U: Pixel>(
798
0
        out: &mut [U], line: &[U], f_r1: &[u32], f_r2_ab: &[u32],
799
0
        stripe_w: usize, bit_depth: usize, w0: i32, w1: i32, w2: i32,
800
0
      ) {
801
0
        let line_it = line[..stripe_w].iter();
802
0
        let f_r2_ab_it = f_r2_ab[..stripe_w].iter();
803
0
        let f_r1_it = f_r1[..stripe_w].iter();
804
0
        let out_it = out[..stripe_w].iter_mut();
805
806
0
        for ((o, &u), (&f_r2_ab, &f_r1)) in
807
0
          out_it.zip(line_it).zip(f_r2_ab_it.zip(f_r1_it))
808
0
        {
809
0
          let u = i32::cast_from(u) << SGRPROJ_RST_BITS;
810
0
          let v = w0 * f_r2_ab as i32 + w1 * u + w2 * f_r1 as i32;
811
0
          let s = (v + (1 << (SGRPROJ_RST_BITS + SGRPROJ_PRJ_BITS) >> 1))
812
0
            >> (SGRPROJ_RST_BITS + SGRPROJ_PRJ_BITS);
813
0
          *o = U::cast_from(clamp(s, 0, (1 << bit_depth) - 1));
814
0
        }
815
0
      }
Unexecuted instantiation: rav1e::lrf::sgrproj_stripe_filter::apply_filter::<u16>
Unexecuted instantiation: rav1e::lrf::sgrproj_stripe_filter::apply_filter::<u8>
816
817
      apply_filter(
818
        &mut out[y],
819
        line,
820
        &f_r1,
821
        f_r2_ab[dy],
822
        stripe_w,
823
        fi.sequence.bit_depth,
824
        w0,
825
        w1,
826
        w2,
827
      );
828
    }
829
  }
830
}
831
832
// Frame inputs below aren't all equal, and will change as work
833
// continues.  There's no deblocked reconstruction available at this
834
// point of RDO, so we use the non-deblocked reconstruction, cdef and
835
// input.  The input can be a full-sized frame. Cdef input is a partial
836
// frame constructed specifically for RDO.
837
838
// For simplicity, this ignores stripe segmentation (it's possible the
839
// extra complexity isn't worth it and we'll ignore stripes
840
// permanently during RDO, but that's not been tested yet). Data
841
// access inside the cdef frame is monolithic and clipped to the cdef
842
// borders.
843
844
// Input params follow the same rules as sgrproj_stripe_filter.
845
// Inputs are relative to the colocated slice views.
846
#[profiling::function]
847
pub fn sgrproj_solve<T: Pixel>(
848
  set: u8, fi: &FrameInvariants<T>,
849
  integral_image_buffer: &IntegralImageBuffer, input: &PlaneRegion<'_, T>,
850
  cdeffed: &PlaneSlice<T>, cdef_w: usize, cdef_h: usize,
851
) -> (i8, i8) {
852
  let mut a_r2: [[u32; IMAGE_WIDTH_MAX + 2]; 2] =
853
    [[0; IMAGE_WIDTH_MAX + 2]; 2];
854
  let mut b_r2: [[u32; IMAGE_WIDTH_MAX + 2]; 2] =
855
    [[0; IMAGE_WIDTH_MAX + 2]; 2];
856
  let mut f_r2_0: [u32; IMAGE_WIDTH_MAX] = [0; IMAGE_WIDTH_MAX];
857
  let mut f_r2_1: [u32; IMAGE_WIDTH_MAX] = [0; IMAGE_WIDTH_MAX];
858
  let mut a_r1: [[u32; IMAGE_WIDTH_MAX + 2]; 3] =
859
    [[0; IMAGE_WIDTH_MAX + 2]; 3];
860
  let mut b_r1: [[u32; IMAGE_WIDTH_MAX + 2]; 3] =
861
    [[0; IMAGE_WIDTH_MAX + 2]; 3];
862
  let mut f_r1: [u32; IMAGE_WIDTH_MAX] = [0; IMAGE_WIDTH_MAX];
863
864
  let s_r2: u32 = SGRPROJ_PARAMS_S[set as usize][0];
865
  let s_r1: u32 = SGRPROJ_PARAMS_S[set as usize][1];
866
867
  let mut h: [[f64; 2]; 2] = [[0., 0.], [0., 0.]];
868
  let mut c: [f64; 2] = [0., 0.];
869
870
  let fn_ab_r1 = match fi.sequence.bit_depth {
871
    8 => sgrproj_box_ab_r1::<8>,
872
    10 => sgrproj_box_ab_r1::<10>,
873
    12 => sgrproj_box_ab_r1::<12>,
874
    _ => unimplemented!(),
875
  };
876
  let fn_ab_r2 = match fi.sequence.bit_depth {
877
    8 => sgrproj_box_ab_r2::<8>,
878
    10 => sgrproj_box_ab_r2::<10>,
879
    12 => sgrproj_box_ab_r2::<12>,
880
    _ => unimplemented!(),
881
  };
882
883
  /* prime the intermediate arrays */
884
  // One oddness about the radius=2 intermediate array computations that
885
  // the spec doesn't make clear: Although the spec defines computation
886
  // of every row (of a, b and f), only half of the rows (every-other
887
  // row) are actually used.
888
  let integral_image = &integral_image_buffer.integral_image;
889
  let sq_integral_image = &integral_image_buffer.sq_integral_image;
890
  if s_r2 > 0 {
891
    fn_ab_r2(
892
      &mut a_r2[0],
893
      &mut b_r2[0],
894
      integral_image,
895
      sq_integral_image,
896
      SOLVE_IMAGE_STRIDE,
897
      0,
898
      cdef_w,
899
      s_r2,
900
      fi.cpu_feature_level,
901
    );
902
  }
903
  if s_r1 > 0 {
904
    let integral_image_offset = SOLVE_IMAGE_STRIDE + 1;
905
    fn_ab_r1(
906
      &mut a_r1[0],
907
      &mut b_r1[0],
908
      &integral_image[integral_image_offset..],
909
      &sq_integral_image[integral_image_offset..],
910
      SOLVE_IMAGE_STRIDE,
911
      0,
912
      cdef_w,
913
      s_r1,
914
      fi.cpu_feature_level,
915
    );
916
    fn_ab_r1(
917
      &mut a_r1[1],
918
      &mut b_r1[1],
919
      &integral_image[integral_image_offset..],
920
      &sq_integral_image[integral_image_offset..],
921
      SOLVE_IMAGE_STRIDE,
922
      1,
923
      cdef_w,
924
      s_r1,
925
      fi.cpu_feature_level,
926
    );
927
  }
928
929
  /* iterate by row */
930
  // Increment by two to handle the use of even rows by r=2 and run a nested
931
  //  loop to handle increments of one.
932
  for y in (0..cdef_h).step_by(2) {
933
    // get results to use y and y+1
934
    let f_r2_01: [&[u32]; 2] = if s_r2 > 0 {
935
      fn_ab_r2(
936
        &mut a_r2[(y / 2 + 1) % 2],
937
        &mut b_r2[(y / 2 + 1) % 2],
938
        integral_image,
939
        sq_integral_image,
940
        SOLVE_IMAGE_STRIDE,
941
        y + 2,
942
        cdef_w,
943
        s_r2,
944
        fi.cpu_feature_level,
945
      );
946
      let ap0: [&[u32]; 2] = [&a_r2[(y / 2) % 2], &a_r2[(y / 2 + 1) % 2]];
947
      let bp0: [&[u32]; 2] = [&b_r2[(y / 2) % 2], &b_r2[(y / 2 + 1) % 2]];
948
      sgrproj_box_f_r2(
949
        &ap0,
950
        &bp0,
951
        &mut f_r2_0,
952
        &mut f_r2_1,
953
        y,
954
        cdef_w,
955
        cdeffed,
956
        fi.cpu_feature_level,
957
      );
958
      [&f_r2_0, &f_r2_1]
959
    } else {
960
      sgrproj_box_f_r0(&mut f_r2_0, y, cdef_w, cdeffed, fi.cpu_feature_level);
961
      // share results for both rows
962
      [&f_r2_0, &f_r2_0]
963
    };
964
    for dy in 0..(2.min(cdef_h - y)) {
965
      let y = y + dy;
966
      if s_r1 > 0 {
967
        let integral_image_offset = SOLVE_IMAGE_STRIDE + 1;
968
        fn_ab_r1(
969
          &mut a_r1[(y + 2) % 3],
970
          &mut b_r1[(y + 2) % 3],
971
          &integral_image[integral_image_offset..],
972
          &sq_integral_image[integral_image_offset..],
973
          SOLVE_IMAGE_STRIDE,
974
          y + 2,
975
          cdef_w,
976
          s_r1,
977
          fi.cpu_feature_level,
978
        );
979
        let ap1: [&[u32]; 3] =
980
          [&a_r1[y % 3], &a_r1[(y + 1) % 3], &a_r1[(y + 2) % 3]];
981
        let bp1: [&[u32]; 3] =
982
          [&b_r1[y % 3], &b_r1[(y + 1) % 3], &b_r1[(y + 2) % 3]];
983
        sgrproj_box_f_r1(
984
          &ap1,
985
          &bp1,
986
          &mut f_r1,
987
          y,
988
          cdef_w,
989
          cdeffed,
990
          fi.cpu_feature_level,
991
        );
992
      } else {
993
        sgrproj_box_f_r0(&mut f_r1, y, cdef_w, cdeffed, fi.cpu_feature_level);
994
      }
995
996
      #[inline(always)]
997
0
      fn process_line<T: Pixel>(
998
0
        h: &mut [[f64; 2]; 2], c: &mut [f64; 2], cdeffed: &[T], input: &[T],
999
0
        f_r1: &[u32], f_r2_ab: &[u32], cdef_w: usize,
1000
0
      ) {
1001
0
        let cdeffed_it = cdeffed[..cdef_w].iter();
1002
0
        let input_it = input[..cdef_w].iter();
1003
0
        let f_r2_ab_it = f_r2_ab[..cdef_w].iter();
1004
0
        let f_r1_it = f_r1[..cdef_w].iter();
1005
1006
        #[derive(Debug, Copy, Clone)]
1007
        struct Sums {
1008
          h: [[i64; 2]; 2],
1009
          c: [i64; 2],
1010
        }
1011
1012
0
        let sums: Sums = cdeffed_it
1013
0
          .zip(input_it)
1014
0
          .zip(f_r2_ab_it.zip(f_r1_it))
1015
0
          .map(|((&u, &i), (&f2, &f1))| {
1016
0
            let u = i32::cast_from(u) << SGRPROJ_RST_BITS;
1017
0
            let s = (i32::cast_from(i) << SGRPROJ_RST_BITS) - u;
1018
0
            let f2 = f2 as i32 - u;
1019
0
            let f1 = f1 as i32 - u;
1020
0
            (s as i64, f1 as i64, f2 as i64)
1021
0
          })
Unexecuted instantiation: rav1e::lrf::sgrproj_solve::process_line::<u16>::{closure#0}
Unexecuted instantiation: rav1e::lrf::sgrproj_solve::process_line::<u8>::{closure#0}
1022
0
          .fold(Sums { h: [[0; 2]; 2], c: [0; 2] }, |sums, (s, f1, f2)| {
1023
0
            let mut ret: Sums = sums;
1024
0
            ret.h[0][0] += f2 * f2;
1025
0
            ret.h[1][1] += f1 * f1;
1026
0
            ret.h[0][1] += f1 * f2;
1027
0
            ret.c[0] += f2 * s;
1028
0
            ret.c[1] += f1 * s;
1029
0
            ret
1030
0
          });
Unexecuted instantiation: rav1e::lrf::sgrproj_solve::process_line::<u16>::{closure#1}
Unexecuted instantiation: rav1e::lrf::sgrproj_solve::process_line::<u8>::{closure#1}
1031
1032
0
        h[0][0] += sums.h[0][0] as f64;
1033
0
        h[1][1] += sums.h[1][1] as f64;
1034
0
        h[0][1] += sums.h[0][1] as f64;
1035
0
        c[0] += sums.c[0] as f64;
1036
0
        c[1] += sums.c[1] as f64;
1037
0
      }
Unexecuted instantiation: rav1e::lrf::sgrproj_solve::process_line::<u16>
Unexecuted instantiation: rav1e::lrf::sgrproj_solve::process_line::<u8>
1038
1039
      process_line(
1040
        &mut h,
1041
        &mut c,
1042
        &cdeffed[y],
1043
        &input[y],
1044
        &f_r1,
1045
        f_r2_01[dy],
1046
        cdef_w,
1047
      );
1048
    }
1049
  }
1050
1051
  // this is lifted almost in-tact from libaom
1052
  let n = cdef_w as f64 * cdef_h as f64;
1053
  h[0][0] /= n;
1054
  h[0][1] /= n;
1055
  h[1][1] /= n;
1056
  h[1][0] = h[0][1];
1057
  c[0] *= (1 << SGRPROJ_PRJ_BITS) as f64 / n;
1058
  c[1] *= (1 << SGRPROJ_PRJ_BITS) as f64 / n;
1059
  let (xq0, xq1) = if s_r2 == 0 {
1060
    // H matrix is now only the scalar h[1][1]
1061
    // C vector is now only the scalar c[1]
1062
    if h[1][1] == 0. {
1063
      (0, 0)
1064
    } else {
1065
      (0, (c[1] / h[1][1]).round() as i32)
1066
    }
1067
  } else if s_r1 == 0 {
1068
    // H matrix is now only the scalar h[0][0]
1069
    // C vector is now only the scalar c[0]
1070
    if h[0][0] == 0. {
1071
      (0, 0)
1072
    } else {
1073
      ((c[0] / h[0][0]).round() as i32, 0)
1074
    }
1075
  } else {
1076
    let det = h[0][0].mul_add(h[1][1], -h[0][1] * h[1][0]);
1077
    if det == 0. {
1078
      (0, 0)
1079
    } else {
1080
      // If scaling up dividend would overflow, instead scale down the divisor
1081
      let div1 = h[1][1].mul_add(c[0], -h[0][1] * c[1]);
1082
      let div2 = h[0][0].mul_add(c[1], -h[1][0] * c[0]);
1083
      ((div1 / det).round() as i32, (div2 / det).round() as i32)
1084
    }
1085
  };
1086
  {
1087
    let xqd0 =
1088
      clamp(xq0, SGRPROJ_XQD_MIN[0] as i32, SGRPROJ_XQD_MAX[0] as i32);
1089
    let xqd1 = clamp(
1090
      (1 << SGRPROJ_PRJ_BITS) - xqd0 - xq1,
1091
      SGRPROJ_XQD_MIN[1] as i32,
1092
      SGRPROJ_XQD_MAX[1] as i32,
1093
    );
1094
    (xqd0 as i8, xqd1 as i8)
1095
  }
1096
}
1097
1098
#[profiling::function]
1099
fn wiener_stripe_filter<T: Pixel>(
1100
  coeffs: [[i8; 3]; 2], fi: &FrameInvariants<T>, crop_w: usize, crop_h: usize,
1101
  stripe_w: usize, stripe_h: usize, stripe_x: usize, stripe_y: isize,
1102
  cdeffed: &Plane<T>, deblocked: &Plane<T>, out: &mut Plane<T>,
1103
) {
1104
  let bit_depth = fi.sequence.bit_depth;
1105
  let round_h = if bit_depth == 12 { 5 } else { 3 };
1106
  let round_v = if bit_depth == 12 { 9 } else { 11 };
1107
  let offset = 1 << (bit_depth + WIENER_BITS - round_h - 1);
1108
  let limit = (1 << (bit_depth + 1 + WIENER_BITS - round_h)) - 1;
1109
1110
  let mut coeffs_ = [[0; 3]; 2];
1111
  for i in 0..2 {
1112
    for j in 0..3 {
1113
      coeffs_[i][j] = i32::from(coeffs[i][j]);
1114
    }
1115
  }
1116
1117
  let mut work: [i32; SB_SIZE + 7] = [0; SB_SIZE + 7];
1118
  let vfilter: [i32; 7] = [
1119
    coeffs_[0][0],
1120
    coeffs_[0][1],
1121
    coeffs_[0][2],
1122
    128 - 2 * (coeffs_[0][0] + coeffs_[0][1] + coeffs_[0][2]),
1123
    coeffs_[0][2],
1124
    coeffs_[0][1],
1125
    coeffs_[0][0],
1126
  ];
1127
  let hfilter: [i32; 7] = [
1128
    coeffs_[1][0],
1129
    coeffs_[1][1],
1130
    coeffs_[1][2],
1131
    128 - 2 * (coeffs_[1][0] + coeffs_[1][1] + coeffs_[1][2]),
1132
    coeffs_[1][2],
1133
    coeffs_[1][1],
1134
    coeffs_[1][0],
1135
  ];
1136
1137
  // unlike x, our y can be negative to start as the first stripe
1138
  // starts off the top of the frame by 8 pixels, and can also run off the end of the frame
1139
  let start_wi = if stripe_y < 0 { -stripe_y } else { 0 } as usize;
1140
  let start_yi = if stripe_y < 0 { 0 } else { stripe_y } as usize;
1141
  let end_i = cmp::max(
1142
    0,
1143
    if stripe_h as isize + stripe_y > crop_h as isize {
1144
      crop_h as isize - stripe_y - start_wi as isize
1145
    } else {
1146
      stripe_h as isize - start_wi as isize
1147
    },
1148
  ) as usize;
1149
1150
  let mut out_slice =
1151
    out.mut_slice(PlaneOffset { x: 0, y: start_yi as isize });
1152
1153
  for xi in stripe_x..stripe_x + stripe_w {
1154
    let n = cmp::min(7, crop_w as isize + 3 - xi as isize);
1155
    for yi in stripe_y - 3..stripe_y + stripe_h as isize + 4 {
1156
      let mut acc = 0;
1157
      let src = if yi < stripe_y {
1158
        let ly = cmp::max(clamp(yi, 0, crop_h as isize - 1), stripe_y - 2);
1159
        deblocked.row(ly)
1160
      } else if yi < stripe_y + stripe_h as isize {
1161
        let ly = clamp(yi, 0, crop_h as isize - 1);
1162
        cdeffed.row(ly)
1163
      } else {
1164
        let ly = cmp::min(
1165
          clamp(yi, 0, crop_h as isize - 1),
1166
          stripe_y + stripe_h as isize + 1,
1167
        );
1168
        deblocked.row(ly)
1169
      };
1170
      let start = i32::cast_from(src[0]);
1171
      let end = i32::cast_from(src[crop_w - 1]);
1172
      for i in 0..3 - xi as isize {
1173
        acc += hfilter[i as usize] * start;
1174
      }
1175
1176
      let off = 3 - (xi as isize);
1177
      let s = cmp::max(0, off) as usize;
1178
      let s1 = (s as isize - off) as usize;
1179
      let n1 = (n - off) as usize;
1180
1181
      for (hf, &v) in hfilter[s..n as usize].iter().zip(src[s1..n1].iter()) {
1182
        acc += hf * i32::cast_from(v);
1183
      }
1184
1185
      for i in n..7 {
1186
        acc += hfilter[i as usize] * end;
1187
      }
1188
1189
      acc = (acc + (1 << round_h >> 1)) >> round_h;
1190
      work[(yi - stripe_y + 3) as usize] = clamp(acc, -offset, limit - offset);
1191
    }
1192
1193
    for (wi, dst) in (start_wi..start_wi + end_i)
1194
0
      .zip(out_slice.rows_iter_mut().map(|row| &mut row[xi]).take(end_i))
Unexecuted instantiation: rav1e::lrf::wiener_stripe_filter::<u16>::{closure#0}
Unexecuted instantiation: rav1e::lrf::wiener_stripe_filter::<u8>::{closure#0}
1195
    {
1196
      let mut acc = 0;
1197
      for (i, src) in (0..7).zip(work[wi..wi + 7].iter_mut()) {
1198
        acc += vfilter[i] * *src;
1199
      }
1200
      *dst = T::cast_from(clamp(
1201
        (acc + (1 << round_v >> 1)) >> round_v,
1202
        0,
1203
        (1 << bit_depth) - 1,
1204
      ));
1205
    }
1206
  }
1207
}
1208
1209
#[derive(Copy, Clone, Debug, Default)]
1210
pub struct RestorationUnit {
1211
  pub filter: RestorationFilter,
1212
}
1213
1214
#[derive(Clone, Debug)]
1215
pub struct FrameRestorationUnits {
1216
  units: Box<[RestorationUnit]>,
1217
  pub cols: usize,
1218
  pub rows: usize,
1219
}
1220
1221
impl FrameRestorationUnits {
1222
0
  pub fn new(cols: usize, rows: usize) -> Self {
1223
0
    Self {
1224
0
      units: vec![RestorationUnit::default(); cols * rows].into_boxed_slice(),
1225
0
      cols,
1226
0
      rows,
1227
0
    }
1228
0
  }
1229
}
1230
1231
impl Index<usize> for FrameRestorationUnits {
1232
  type Output = [RestorationUnit];
1233
  #[inline(always)]
1234
0
  fn index(&self, index: usize) -> &Self::Output {
1235
0
    &self.units[index * self.cols..(index + 1) * self.cols]
1236
0
  }
1237
}
1238
1239
impl IndexMut<usize> for FrameRestorationUnits {
1240
  #[inline(always)]
1241
0
  fn index_mut(&mut self, index: usize) -> &mut Self::Output {
1242
0
    &mut self.units[index * self.cols..(index + 1) * self.cols]
1243
0
  }
1244
}
1245
1246
#[derive(Clone, Debug)]
1247
pub struct RestorationPlaneConfig {
1248
  pub lrf_type: u8,
1249
  pub unit_size: usize,
1250
  // (1 << sb_x_shift) gives the number of superblocks horizontally or
1251
  // vertically in a restoration unit, not accounting for RU stretching
1252
  pub sb_h_shift: usize,
1253
  pub sb_v_shift: usize,
1254
  pub sb_cols: usize, // actual number of SB cols in this LRU (accounting for stretch and crop)
1255
  pub sb_rows: usize, // actual number of SB rows in this LRU (accounting for stretch and crop)
1256
  // stripe height is 64 in all cases except 4:2:0 chroma planes where
1257
  // it is 32.  This is independent of all other setup parameters
1258
  pub stripe_height: usize,
1259
  pub cols: usize,
1260
  pub rows: usize,
1261
}
1262
1263
#[derive(Clone, Debug)]
1264
pub struct RestorationPlane {
1265
  pub cfg: RestorationPlaneConfig,
1266
  pub units: FrameRestorationUnits,
1267
}
1268
1269
impl RestorationPlane {
1270
0
  pub fn new(
1271
0
    lrf_type: u8, unit_size: usize, sb_h_shift: usize, sb_v_shift: usize,
1272
0
    sb_cols: usize, sb_rows: usize, stripe_decimate: usize, cols: usize,
1273
0
    rows: usize,
1274
0
  ) -> RestorationPlane {
1275
0
    let stripe_height = if stripe_decimate != 0 { 32 } else { 64 };
1276
0
    RestorationPlane {
1277
0
      cfg: RestorationPlaneConfig {
1278
0
        lrf_type,
1279
0
        unit_size,
1280
0
        sb_h_shift,
1281
0
        sb_v_shift,
1282
0
        sb_cols,
1283
0
        sb_rows,
1284
0
        stripe_height,
1285
0
        cols,
1286
0
        rows,
1287
0
      },
1288
0
      units: FrameRestorationUnits::new(cols, rows),
1289
0
    }
1290
0
  }
1291
1292
  // Stripes are always 64 pixels high in a non-subsampled
1293
  // frame, and decimated from 64 pixels in chroma.  When
1294
  // filtering, they are not co-located on Y with superblocks.
1295
0
  fn restoration_unit_index_by_stripe(
1296
0
    &self, stripenum: usize, rux: usize,
1297
0
  ) -> (usize, usize) {
1298
0
    (
1299
0
      cmp::min(rux, self.cfg.cols - 1),
1300
0
      cmp::min(
1301
0
        stripenum * self.cfg.stripe_height / self.cfg.unit_size,
1302
0
        self.cfg.rows - 1,
1303
0
      ),
1304
0
    )
1305
0
  }
1306
1307
0
  pub fn restoration_unit_by_stripe(
1308
0
    &self, stripenum: usize, rux: usize,
1309
0
  ) -> &RestorationUnit {
1310
0
    let (x, y) = self.restoration_unit_index_by_stripe(stripenum, rux);
1311
0
    &self.units[y][x]
1312
0
  }
1313
}
1314
1315
#[derive(Clone, Debug)]
1316
pub struct RestorationState {
1317
  pub planes: [RestorationPlane; MAX_PLANES],
1318
}
1319
1320
impl RestorationState {
1321
0
  pub fn new<T: Pixel>(fi: &FrameInvariants<T>, input: &Frame<T>) -> Self {
1322
0
    let PlaneConfig { xdec, ydec, .. } = input.planes[1].cfg;
1323
    // stripe size is decimated in 4:2:0 (and only 4:2:0)
1324
0
    let stripe_uv_decimate = usize::from(xdec > 0 && ydec > 0);
1325
0
    let y_sb_log2 = if fi.sequence.use_128x128_superblock { 7 } else { 6 };
1326
0
    let uv_sb_h_log2 = y_sb_log2 - xdec;
1327
0
    let uv_sb_v_log2 = y_sb_log2 - ydec;
1328
1329
0
    let (lrf_y_shift, lrf_uv_shift) = if fi.sequence.enable_large_lru
1330
0
      && fi.sequence.enable_restoration
1331
    {
1332
0
      assert!(
1333
0
        fi.width > 1 && fi.height > 1,
1334
0
        "Width and height must be higher than 1 for LRF setup"
1335
      );
1336
1337
      // Specific content does affect optimal LRU size choice, but the
1338
      // quantizer in use is a surprisingly strong selector.
1339
0
      let lrf_base_shift = if fi.base_q_idx > 200 {
1340
0
        0 // big
1341
0
      } else if fi.base_q_idx > 160 {
1342
0
        1
1343
      } else {
1344
0
        2 // small
1345
      };
1346
0
      let lrf_chroma_shift = if stripe_uv_decimate > 0 {
1347
        // 4:2:0 only
1348
0
        if lrf_base_shift == 2 {
1349
0
          1 // smallest chroma LRU is a win at low quant
1350
        } else {
1351
          // Will a down-shifted chroma LRU eliminate stretch in chroma?
1352
          // If so, that's generally a win.
1353
0
          let lrf_unit_size =
1354
0
            1 << (RESTORATION_TILESIZE_MAX_LOG2 - lrf_base_shift);
1355
0
          let unshifted_stretch = ((fi.width >> xdec) - 1) % lrf_unit_size
1356
0
            <= lrf_unit_size / 2
1357
0
            || ((fi.height >> ydec) - 1) % lrf_unit_size <= lrf_unit_size / 2;
1358
0
          let shifted_stretch = ((fi.width >> xdec) - 1)
1359
0
            % (lrf_unit_size >> 1)
1360
0
            <= lrf_unit_size / 4
1361
0
            || ((fi.height >> ydec) - 1) % (lrf_unit_size >> 1)
1362
0
              <= lrf_unit_size / 4;
1363
          // shift to eliminate stretch if needed,
1364
          // otherwise do not shift and save the signaling bits
1365
0
          usize::from(unshifted_stretch && !shifted_stretch)
1366
        }
1367
      } else {
1368
0
        0
1369
      };
1370
0
      (lrf_base_shift, lrf_base_shift + lrf_chroma_shift)
1371
    } else {
1372
      // Explicit request to tie LRU size to superblock size ==
1373
      // smallest possible LRU size
1374
0
      let lrf_y_shift = if fi.sequence.use_128x128_superblock { 1 } else { 2 };
1375
0
      (lrf_y_shift, lrf_y_shift + stripe_uv_decimate)
1376
    };
1377
1378
0
    let mut y_unit_size = 1 << (RESTORATION_TILESIZE_MAX_LOG2 - lrf_y_shift);
1379
0
    let mut uv_unit_size = 1 << (RESTORATION_TILESIZE_MAX_LOG2 - lrf_uv_shift);
1380
1381
0
    let tiling = fi.sequence.tiling;
1382
    // Right now we defer to tiling setup: don't choose an LRU size
1383
    // large enough that a tile is not an integer number of LRUs
1384
    // wide/high.
1385
0
    if tiling.cols > 1 || tiling.rows > 1 {
1386
0
      // despite suggestions to the contrary, tiles can be
1387
0
      // non-powers-of-2.
1388
0
      let trailing_h_zeros = tiling.tile_width_sb.trailing_zeros() as usize;
1389
0
      let trailing_v_zeros = tiling.tile_height_sb.trailing_zeros() as usize;
1390
0
      let tile_aligned_y_unit_size =
1391
0
        1 << (y_sb_log2 + trailing_h_zeros.min(trailing_v_zeros));
1392
0
      let tile_aligned_uv_h_unit_size = 1 << (uv_sb_h_log2 + trailing_h_zeros);
1393
0
      let tile_aligned_uv_v_unit_size = 1 << (uv_sb_v_log2 + trailing_v_zeros);
1394
0
      y_unit_size = y_unit_size.min(tile_aligned_y_unit_size);
1395
0
      uv_unit_size = uv_unit_size
1396
0
        .min(tile_aligned_uv_h_unit_size.min(tile_aligned_uv_v_unit_size));
1397
0
1398
0
      // But it's actually worse: LRUs can't span tiles (in our
1399
0
      // one-pass design that is, spec allows it).  However, the spec
1400
0
      // mandates the last LRU stretches forward into any
1401
0
      // less-than-half-LRU span of superblocks at the right and
1402
0
      // bottom of a frame.  These superblocks may well be in a
1403
0
      // different tile!  Even if LRUs are minimum size (one
1404
0
      // superblock), when the right or bottom edge of the frame is a
1405
0
      // superblock that's less than half the width/height of a normal
1406
0
      // superblock, the LRU is forced by the spec to span into it
1407
0
      // (and thus a different tile).  Tiling is under no such
1408
0
      // restriction; it could decide the right/left sliver will be in
1409
0
      // its own tile row/column.  We can't disallow the combination
1410
0
      // here.  The tiling code will have to either prevent it or
1411
0
      // tolerate it.  (prayer mechanic == Issue #1629).
1412
0
    }
1413
1414
    // When coding 4:2:2 and 4:4:4, spec requires Y and UV LRU sizes
1415
    // to be the same*. If they differ at this
1416
    // point, it's due to a tiling restriction enforcing a maximum
1417
    // size, so force both to the smaller value.
1418
    //
1419
    // *see sec 5.9.20, "Loop restoration params syntax".  The
1420
    // bitstream provides means of coding a different UV LRU size only
1421
    // when chroma is in use and both x and y are subsampled in the
1422
    // chroma planes.
1423
0
    if ydec == 0 && y_unit_size != uv_unit_size {
1424
0
      y_unit_size = uv_unit_size.min(y_unit_size);
1425
0
      uv_unit_size = y_unit_size;
1426
0
    }
1427
1428
    // derive the rest
1429
0
    let y_unit_log2 = y_unit_size.ilog() - 1;
1430
0
    let uv_unit_log2 = uv_unit_size.ilog() - 1;
1431
0
    let y_cols = ((fi.width + (y_unit_size >> 1)) / y_unit_size).max(1);
1432
0
    let y_rows = ((fi.height + (y_unit_size >> 1)) / y_unit_size).max(1);
1433
0
    let uv_cols = ((((fi.width + (1 << xdec >> 1)) >> xdec)
1434
0
      + (uv_unit_size >> 1))
1435
0
      / uv_unit_size)
1436
0
      .max(1);
1437
0
    let uv_rows = ((((fi.height + (1 << ydec >> 1)) >> ydec)
1438
0
      + (uv_unit_size >> 1))
1439
0
      / uv_unit_size)
1440
0
      .max(1);
1441
1442
0
    RestorationState {
1443
0
      planes: [
1444
0
        RestorationPlane::new(
1445
0
          RESTORE_SWITCHABLE,
1446
0
          y_unit_size,
1447
0
          y_unit_log2 - y_sb_log2,
1448
0
          y_unit_log2 - y_sb_log2,
1449
0
          fi.sb_width,
1450
0
          fi.sb_height,
1451
0
          0,
1452
0
          y_cols,
1453
0
          y_rows,
1454
0
        ),
1455
0
        RestorationPlane::new(
1456
0
          RESTORE_SWITCHABLE,
1457
0
          uv_unit_size,
1458
0
          uv_unit_log2 - uv_sb_h_log2,
1459
0
          uv_unit_log2 - uv_sb_v_log2,
1460
0
          fi.sb_width,
1461
0
          fi.sb_height,
1462
0
          stripe_uv_decimate,
1463
0
          uv_cols,
1464
0
          uv_rows,
1465
0
        ),
1466
0
        RestorationPlane::new(
1467
0
          RESTORE_SWITCHABLE,
1468
0
          uv_unit_size,
1469
0
          uv_unit_log2 - uv_sb_h_log2,
1470
0
          uv_unit_log2 - uv_sb_v_log2,
1471
0
          fi.sb_width,
1472
0
          fi.sb_height,
1473
0
          stripe_uv_decimate,
1474
0
          uv_cols,
1475
0
          uv_rows,
1476
0
        ),
1477
0
      ],
1478
0
    }
1479
0
  }
Unexecuted instantiation: <rav1e::lrf::RestorationState>::new::<u16>
Unexecuted instantiation: <rav1e::lrf::RestorationState>::new::<u8>
1480
1481
  #[profiling::function]
1482
  pub fn lrf_filter_frame<T: Pixel>(
1483
    &mut self, out: &mut Frame<T>, pre_cdef: &Frame<T>,
1484
    fi: &FrameInvariants<T>,
1485
  ) {
1486
    let cdeffed = out.clone();
1487
    let planes =
1488
      if fi.sequence.chroma_sampling == Cs400 { 1 } else { MAX_PLANES };
1489
1490
    // unlike the other loop filters that operate over the padded
1491
    // frame dimensions, restoration filtering and source pixel
1492
    // accesses are clipped to the original frame dimensions
1493
    // that's why we use fi.width and fi.height instead of PlaneConfig fields
1494
1495
    // number of stripes (counted according to colocated Y luma position)
1496
    let stripe_n = (fi.height + 7) / 64 + 1;
1497
1498
    // Buffers for the stripe filter.
1499
    let mut stripe_filter_buffer =
1500
      IntegralImageBuffer::zeroed(STRIPE_IMAGE_SIZE);
1501
1502
    for pli in 0..planes {
1503
      let rp = &self.planes[pli];
1504
      let xdec = out.planes[pli].cfg.xdec;
1505
      let ydec = out.planes[pli].cfg.ydec;
1506
      let crop_w = (fi.width + (1 << xdec >> 1)) >> xdec;
1507
      let crop_h = (fi.height + (1 << ydec >> 1)) >> ydec;
1508
1509
      for si in 0..stripe_n {
1510
        let (stripe_start_y, stripe_size) = if si == 0 {
1511
          (0, (64 - 8) >> ydec)
1512
        } else {
1513
          let start = (si * 64 - 8) >> ydec;
1514
          (
1515
            start as isize,
1516
            // one past, unlike spec
1517
            (64 >> ydec).min(crop_h - start),
1518
          )
1519
        };
1520
1521
        // horizontally, go rdu-by-rdu
1522
        for rux in 0..rp.cfg.cols {
1523
          // stripe x pixel locations must be clipped to frame, last may need to stretch
1524
          let x = rux * rp.cfg.unit_size;
1525
          let size =
1526
            if rux == rp.cfg.cols - 1 { crop_w - x } else { rp.cfg.unit_size };
1527
          let ru = rp.restoration_unit_by_stripe(si, rux);
1528
          match ru.filter {
1529
            RestorationFilter::Wiener { coeffs } => {
1530
              wiener_stripe_filter(
1531
                coeffs,
1532
                fi,
1533
                crop_w,
1534
                crop_h,
1535
                size,
1536
                stripe_size,
1537
                x,
1538
                stripe_start_y,
1539
                &cdeffed.planes[pli],
1540
                &pre_cdef.planes[pli],
1541
                &mut out.planes[pli],
1542
              );
1543
            }
1544
            RestorationFilter::Sgrproj { set, xqd } => {
1545
              if !fi.sequence.enable_cdef {
1546
                continue;
1547
              }
1548
1549
              setup_integral_image(
1550
                &mut stripe_filter_buffer,
1551
                STRIPE_IMAGE_STRIDE,
1552
                crop_w - x,
1553
                (crop_h as isize - stripe_start_y) as usize,
1554
                size,
1555
                stripe_size,
1556
                &cdeffed.planes[pli]
1557
                  .slice(PlaneOffset { x: x as isize, y: stripe_start_y }),
1558
                &pre_cdef.planes[pli]
1559
                  .slice(PlaneOffset { x: x as isize, y: stripe_start_y }),
1560
              );
1561
1562
              sgrproj_stripe_filter(
1563
                set,
1564
                xqd,
1565
                fi,
1566
                &stripe_filter_buffer,
1567
                STRIPE_IMAGE_STRIDE,
1568
                &cdeffed.planes[pli]
1569
                  .slice(PlaneOffset { x: x as isize, y: stripe_start_y }),
1570
                &mut out.planes[pli].region_mut(Area::Rect {
1571
                  x: x as isize,
1572
                  y: stripe_start_y,
1573
                  width: size,
1574
                  height: stripe_size,
1575
                }),
1576
              );
1577
            }
1578
            RestorationFilter::None => {
1579
              // do nothing
1580
            }
1581
          }
1582
        }
1583
      }
1584
    }
1585
  }
1586
}