Coverage Report

Created: 2026-01-10 07:01

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/rust/registry/src/index.crates.io-1949cf8c6b5b557f/rav1e-0.8.1/src/rdo.rs
Line
Count
Source
1
// Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved
2
// Copyright (c) 2017-2022, The rav1e contributors. All rights reserved
3
//
4
// This source code is subject to the terms of the BSD 2 Clause License and
5
// the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6
// was not distributed with this source code in the LICENSE file, you can
7
// obtain it at www.aomedia.org/license/software. If the Alliance for Open
8
// Media Patent License 1.0 was not distributed with this source code in the
9
// PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10
11
#![allow(non_camel_case_types)]
12
13
use std::fmt;
14
use std::mem::MaybeUninit;
15
16
use arrayvec::*;
17
use itertools::izip;
18
19
use crate::api::*;
20
use crate::cdef::*;
21
use crate::context::*;
22
use crate::cpu_features::CpuFeatureLevel;
23
use crate::deblock::*;
24
use crate::dist::*;
25
use crate::ec::{Writer, WriterCounter, OD_BITRES};
26
use crate::encode_block_with_modes;
27
use crate::encoder::{FrameInvariants, IMPORTANCE_BLOCK_SIZE};
28
use crate::frame::*;
29
use crate::header::ReferenceMode;
30
use crate::lrf::*;
31
use crate::mc::MotionVector;
32
use crate::me::estimate_motion;
33
use crate::me::MVSamplingMode;
34
use crate::me::MotionSearchResult;
35
use crate::motion_compensate;
36
use crate::partition::PartitionType::*;
37
use crate::partition::RefType::*;
38
use crate::partition::*;
39
use crate::predict::{
40
  luma_ac, AngleDelta, IntraEdgeFilterParameters, IntraParam, PredictionMode,
41
  RAV1E_INTER_COMPOUND_MODES, RAV1E_INTER_MODES_MINIMAL, RAV1E_INTRA_MODES,
42
};
43
use crate::rdo_tables::*;
44
use crate::tiling::*;
45
use crate::transform::{TxSet, TxSize, TxType, RAV1E_TX_TYPES};
46
use crate::util::{init_slice_repeat_mut, Aligned, Pixel};
47
use crate::write_tx_blocks;
48
use crate::write_tx_tree;
49
use crate::Tune;
50
use crate::{encode_block_post_cdef, encode_block_pre_cdef};
51
52
#[derive(Copy, Clone, PartialEq, Eq)]
53
pub enum RDOType {
54
  PixelDistRealRate,
55
  TxDistRealRate,
56
  TxDistEstRate,
57
}
58
59
impl RDOType {
60
  #[inline]
61
0
  pub const fn needs_tx_dist(self) -> bool {
62
0
    match self {
63
      // Pixel-domain distortion and exact ec rate
64
0
      RDOType::PixelDistRealRate => false,
65
      // Tx-domain distortion and exact ec rate
66
0
      RDOType::TxDistRealRate => true,
67
      // Tx-domain distortion and txdist-based rate
68
0
      RDOType::TxDistEstRate => true,
69
    }
70
0
  }
Unexecuted instantiation: <rav1e::rdo::RDOType>::needs_tx_dist
Unexecuted instantiation: <rav1e::rdo::RDOType>::needs_tx_dist
71
  #[inline]
72
0
  pub const fn needs_coeff_rate(self) -> bool {
73
0
    match self {
74
0
      RDOType::PixelDistRealRate => true,
75
0
      RDOType::TxDistRealRate => true,
76
0
      RDOType::TxDistEstRate => false,
77
    }
78
0
  }
Unexecuted instantiation: <rav1e::rdo::RDOType>::needs_coeff_rate
Unexecuted instantiation: <rav1e::rdo::RDOType>::needs_coeff_rate
79
}
80
81
#[derive(Clone)]
82
pub struct PartitionGroupParameters {
83
  pub rd_cost: f64,
84
  pub part_type: PartitionType,
85
  pub part_modes: ArrayVec<PartitionParameters, 4>,
86
}
87
88
#[derive(Clone, Debug)]
89
pub struct PartitionParameters {
90
  pub rd_cost: f64,
91
  pub bo: TileBlockOffset,
92
  pub bsize: BlockSize,
93
  pub pred_mode_luma: PredictionMode,
94
  pub pred_mode_chroma: PredictionMode,
95
  pub pred_cfl_params: CFLParams,
96
  pub angle_delta: AngleDelta,
97
  pub ref_frames: [RefType; 2],
98
  pub mvs: [MotionVector; 2],
99
  pub skip: bool,
100
  pub has_coeff: bool,
101
  pub tx_size: TxSize,
102
  pub tx_type: TxType,
103
  pub sidx: u8,
104
}
105
106
impl Default for PartitionParameters {
107
0
  fn default() -> Self {
108
0
    PartitionParameters {
109
0
      rd_cost: f64::MAX,
110
0
      bo: TileBlockOffset::default(),
111
0
      bsize: BlockSize::BLOCK_32X32,
112
0
      pred_mode_luma: PredictionMode::default(),
113
0
      pred_mode_chroma: PredictionMode::default(),
114
0
      pred_cfl_params: CFLParams::default(),
115
0
      angle_delta: AngleDelta::default(),
116
0
      ref_frames: [RefType::INTRA_FRAME, RefType::NONE_FRAME],
117
0
      mvs: [MotionVector::default(); 2],
118
0
      skip: false,
119
0
      has_coeff: true,
120
0
      tx_size: TxSize::TX_4X4,
121
0
      tx_type: TxType::DCT_DCT,
122
0
      sidx: 0,
123
0
    }
124
0
  }
125
}
126
127
0
pub fn estimate_rate(qindex: u8, ts: TxSize, fast_distortion: u64) -> u64 {
128
0
  let bs_index = ts as usize;
129
0
  let q_bin_idx = (qindex as usize) / RDO_QUANT_DIV;
130
0
  let bin_idx_down =
131
0
    ((fast_distortion) / RATE_EST_BIN_SIZE).min((RDO_NUM_BINS - 2) as u64);
132
0
  let bin_idx_up = (bin_idx_down + 1).min((RDO_NUM_BINS - 1) as u64);
133
0
  let x0 = (bin_idx_down * RATE_EST_BIN_SIZE) as i64;
134
0
  let x1 = (bin_idx_up * RATE_EST_BIN_SIZE) as i64;
135
0
  let y0 = RDO_RATE_TABLE[q_bin_idx][bs_index][bin_idx_down as usize] as i64;
136
0
  let y1 = RDO_RATE_TABLE[q_bin_idx][bs_index][bin_idx_up as usize] as i64;
137
0
  let slope = ((y1 - y0) << 8) / (x1 - x0);
138
0
  (y0 + (((fast_distortion as i64 - x0) * slope) >> 8)).max(0) as u64
139
0
}
140
141
#[allow(unused)]
142
0
pub fn cdef_dist_wxh<T: Pixel, F: Fn(Area, BlockSize) -> DistortionScale>(
143
0
  src1: &PlaneRegion<'_, T>, src2: &PlaneRegion<'_, T>, w: usize, h: usize,
144
0
  bit_depth: usize, compute_bias: F, cpu: CpuFeatureLevel,
145
0
) -> Distortion {
146
0
  debug_assert!(src1.plane_cfg.xdec == 0);
147
0
  debug_assert!(src1.plane_cfg.ydec == 0);
148
0
  debug_assert!(src2.plane_cfg.xdec == 0);
149
0
  debug_assert!(src2.plane_cfg.ydec == 0);
150
151
0
  let mut sum = Distortion::zero();
152
0
  for y in (0..h).step_by(8) {
153
0
    for x in (0..w).step_by(8) {
154
0
      let kernel_h = (h - y).min(8);
155
0
      let kernel_w = (w - x).min(8);
156
0
      let area = Area::StartingAt { x: x as isize, y: y as isize };
157
0
158
0
      let value = RawDistortion(cdef_dist_kernel(
159
0
        &src1.subregion(area),
160
0
        &src2.subregion(area),
161
0
        kernel_w,
162
0
        kernel_h,
163
0
        bit_depth,
164
0
        cpu,
165
0
      ) as u64);
166
0
167
0
      // cdef is always called on non-subsampled planes, so BLOCK_8X8 is
168
0
      // correct here.
169
0
      sum += value * compute_bias(area, BlockSize::BLOCK_8X8);
170
0
    }
171
  }
172
0
  sum
173
0
}
Unexecuted instantiation: rav1e::rdo::cdef_dist_wxh::<u16, rav1e::rdo::compute_distortion<u16>::{closure#0}>
Unexecuted instantiation: rav1e::rdo::cdef_dist_wxh::<u8, rav1e::rdo::compute_distortion<u8>::{closure#0}>
174
175
/// Sum of Squared Error for a wxh block
176
/// Currently limited to w and h of valid blocks
177
0
pub fn sse_wxh<T: Pixel, F: Fn(Area, BlockSize) -> DistortionScale>(
178
0
  src1: &PlaneRegion<'_, T>, src2: &PlaneRegion<'_, T>, w: usize, h: usize,
179
0
  compute_bias: F, bit_depth: usize, cpu: CpuFeatureLevel,
180
0
) -> Distortion {
181
  // See get_weighted_sse in src/dist.rs.
182
  // Provide a scale to get_weighted_sse for each square region of this size.
183
  const CHUNK_SIZE: usize = IMPORTANCE_BLOCK_SIZE >> 1;
184
185
  // To bias the distortion correctly, compute it in blocks up to the size
186
  // importance block size in a non-subsampled plane.
187
0
  let imp_block_w = CHUNK_SIZE << src1.plane_cfg.xdec;
188
0
  let imp_block_h = CHUNK_SIZE << src1.plane_cfg.ydec;
189
190
0
  let imp_bsize = BlockSize::from_width_and_height(imp_block_w, imp_block_h);
191
192
0
  let n_imp_blocks_w = w.div_ceil(CHUNK_SIZE);
193
0
  let n_imp_blocks_h = h.div_ceil(CHUNK_SIZE);
194
195
  // TODO: Copying biases into a buffer is slow. It would be best if biases were
196
  // passed directly. To do this, we would need different versions of the
197
  // weighted sse function for decimated/subsampled data. Also requires
198
  // eliminating use of unbiased sse.
199
  // It should also be noted that the current copy code does not auto-vectorize.
200
201
  // Copy biases into a buffer.
202
0
  let mut buf_storage = Aligned::new(
203
0
    [MaybeUninit::<u32>::uninit(); 128 / CHUNK_SIZE * 128 / CHUNK_SIZE],
204
  );
205
0
  let buf_stride = n_imp_blocks_w.next_power_of_two();
206
0
  let buf = init_slice_repeat_mut(
207
0
    &mut buf_storage.data[..buf_stride * n_imp_blocks_h],
208
    0,
209
  );
210
211
0
  for block_y in 0..n_imp_blocks_h {
212
0
    for block_x in 0..n_imp_blocks_w {
213
0
      let block = Area::StartingAt {
214
0
        x: (block_x * CHUNK_SIZE) as isize,
215
0
        y: (block_y * CHUNK_SIZE) as isize,
216
0
      };
217
0
      buf[block_y * buf_stride + block_x] = compute_bias(block, imp_bsize).0;
218
0
    }
219
  }
220
221
0
  Distortion(get_weighted_sse(
222
0
    src1, src2, buf, buf_stride, w, h, bit_depth, cpu,
223
0
  ))
224
0
}
Unexecuted instantiation: rav1e::rdo::sse_wxh::<u16, rav1e::rdo::compute_distortion<u16>::{closure#2}>
Unexecuted instantiation: rav1e::rdo::sse_wxh::<u16, rav1e::rdo::compute_distortion<u16>::{closure#1}>
Unexecuted instantiation: rav1e::rdo::sse_wxh::<u16, rav1e::rdo::rdo_loop_plane_error<u16>::{closure#0}>
Unexecuted instantiation: rav1e::rdo::sse_wxh::<u16, rav1e::rdo::compute_tx_distortion<u16>::{closure#0}>
Unexecuted instantiation: rav1e::rdo::sse_wxh::<u16, rav1e::rdo::compute_tx_distortion<u16>::{closure#1}>
Unexecuted instantiation: rav1e::rdo::sse_wxh::<u16, rav1e::rdo::rdo_cfl_alpha<u16>::{closure#0}::{closure#0}::{closure#0}>
Unexecuted instantiation: rav1e::rdo::sse_wxh::<u8, rav1e::rdo::compute_distortion<u8>::{closure#2}>
Unexecuted instantiation: rav1e::rdo::sse_wxh::<u8, rav1e::rdo::compute_distortion<u8>::{closure#1}>
Unexecuted instantiation: rav1e::rdo::sse_wxh::<u8, rav1e::rdo::rdo_loop_plane_error<u8>::{closure#0}>
Unexecuted instantiation: rav1e::rdo::sse_wxh::<u8, rav1e::rdo::compute_tx_distortion<u8>::{closure#0}>
Unexecuted instantiation: rav1e::rdo::sse_wxh::<u8, rav1e::rdo::compute_tx_distortion<u8>::{closure#1}>
Unexecuted instantiation: rav1e::rdo::sse_wxh::<u8, rav1e::rdo::rdo_cfl_alpha<u8>::{closure#0}::{closure#0}::{closure#0}>
225
226
// TODO consider saturating_sub later
227
#[allow(clippy::implicit_saturating_sub)]
228
0
pub const fn clip_visible_bsize(
229
0
  frame_w: usize, frame_h: usize, bsize: BlockSize, x: usize, y: usize,
230
0
) -> (usize, usize) {
231
0
  let blk_w = bsize.width();
232
0
  let blk_h = bsize.height();
233
234
0
  let visible_w: usize = if x + blk_w <= frame_w {
235
0
    blk_w
236
0
  } else if x >= frame_w {
237
0
    0
238
  } else {
239
0
    frame_w - x
240
  };
241
242
0
  let visible_h: usize = if y + blk_h <= frame_h {
243
0
    blk_h
244
0
  } else if y >= frame_h {
245
0
    0
246
  } else {
247
0
    frame_h - y
248
  };
249
250
0
  (visible_w, visible_h)
251
0
}
252
253
// Compute the pixel-domain distortion for an encode
254
0
fn compute_distortion<T: Pixel>(
255
0
  fi: &FrameInvariants<T>, ts: &TileStateMut<'_, T>, bsize: BlockSize,
256
0
  is_chroma_block: bool, tile_bo: TileBlockOffset, luma_only: bool,
257
0
) -> ScaledDistortion {
258
0
  let area = Area::BlockStartingAt { bo: tile_bo.0 };
259
0
  let input_region = ts.input_tile.planes[0].subregion(area);
260
0
  let rec_region = ts.rec.planes[0].subregion(area);
261
262
  // clip a block to have visible pixles only
263
0
  let frame_bo = ts.to_frame_block_offset(tile_bo);
264
0
  let (visible_w, visible_h) = clip_visible_bsize(
265
0
    fi.width,
266
0
    fi.height,
267
0
    bsize,
268
0
    frame_bo.0.x << MI_SIZE_LOG2,
269
0
    frame_bo.0.y << MI_SIZE_LOG2,
270
0
  );
271
272
0
  if visible_w == 0 || visible_h == 0 {
273
0
    return ScaledDistortion::zero();
274
0
  }
275
276
0
  let mut distortion = match fi.config.tune {
277
0
    Tune::Psychovisual => cdef_dist_wxh(
278
0
      &input_region,
279
0
      &rec_region,
280
0
      visible_w,
281
0
      visible_h,
282
0
      fi.sequence.bit_depth,
283
0
      |bias_area, bsize| {
284
0
        distortion_scale(
285
0
          fi,
286
0
          input_region.subregion(bias_area).frame_block_offset(),
287
0
          bsize,
288
        )
289
0
      },
Unexecuted instantiation: rav1e::rdo::compute_distortion::<u16>::{closure#0}
Unexecuted instantiation: rav1e::rdo::compute_distortion::<u8>::{closure#0}
290
0
      fi.cpu_feature_level,
291
    ),
292
0
    Tune::Psnr => sse_wxh(
293
0
      &input_region,
294
0
      &rec_region,
295
0
      visible_w,
296
0
      visible_h,
297
0
      |bias_area, bsize| {
298
0
        distortion_scale(
299
0
          fi,
300
0
          input_region.subregion(bias_area).frame_block_offset(),
301
0
          bsize,
302
        )
303
0
      },
Unexecuted instantiation: rav1e::rdo::compute_distortion::<u16>::{closure#1}
Unexecuted instantiation: rav1e::rdo::compute_distortion::<u8>::{closure#1}
304
0
      fi.sequence.bit_depth,
305
0
      fi.cpu_feature_level,
306
    ),
307
0
  } * fi.dist_scale[0];
308
309
0
  if is_chroma_block
310
0
    && !luma_only
311
0
    && fi.sequence.chroma_sampling != ChromaSampling::Cs400
312
  {
313
0
    let PlaneConfig { xdec, ydec, .. } = ts.input.planes[1].cfg;
314
0
    let chroma_w = if bsize.width() >= 8 || xdec == 0 {
315
0
      (visible_w + xdec) >> xdec
316
    } else {
317
0
      (4 + visible_w + xdec) >> xdec
318
    };
319
0
    let chroma_h = if bsize.height() >= 8 || ydec == 0 {
320
0
      (visible_h + ydec) >> ydec
321
    } else {
322
0
      (4 + visible_h + ydec) >> ydec
323
    };
324
325
0
    for p in 1..3 {
326
0
      let input_region = ts.input_tile.planes[p].subregion(area);
327
0
      let rec_region = ts.rec.planes[p].subregion(area);
328
0
      distortion += sse_wxh(
329
0
        &input_region,
330
0
        &rec_region,
331
0
        chroma_w,
332
0
        chroma_h,
333
0
        |bias_area, bsize| {
334
0
          distortion_scale(
335
0
            fi,
336
0
            input_region.subregion(bias_area).frame_block_offset(),
337
0
            bsize,
338
          )
339
0
        },
Unexecuted instantiation: rav1e::rdo::compute_distortion::<u16>::{closure#2}
Unexecuted instantiation: rav1e::rdo::compute_distortion::<u8>::{closure#2}
340
0
        fi.sequence.bit_depth,
341
0
        fi.cpu_feature_level,
342
0
      ) * fi.dist_scale[p];
343
    }
344
0
  }
345
0
  distortion
346
0
}
Unexecuted instantiation: rav1e::rdo::compute_distortion::<u16>
Unexecuted instantiation: rav1e::rdo::compute_distortion::<u8>
347
348
// Compute the transform-domain distortion for an encode
349
0
fn compute_tx_distortion<T: Pixel>(
350
0
  fi: &FrameInvariants<T>, ts: &TileStateMut<'_, T>, bsize: BlockSize,
351
0
  is_chroma_block: bool, tile_bo: TileBlockOffset, tx_dist: ScaledDistortion,
352
0
  skip: bool, luma_only: bool,
353
0
) -> ScaledDistortion {
354
0
  assert!(fi.config.tune == Tune::Psnr);
355
0
  let area = Area::BlockStartingAt { bo: tile_bo.0 };
356
0
  let input_region = ts.input_tile.planes[0].subregion(area);
357
0
  let rec_region = ts.rec.planes[0].subregion(area);
358
359
0
  let (visible_w, visible_h) = if !skip {
360
0
    (bsize.width(), bsize.height())
361
  } else {
362
0
    let frame_bo = ts.to_frame_block_offset(tile_bo);
363
0
    clip_visible_bsize(
364
0
      fi.width,
365
0
      fi.height,
366
0
      bsize,
367
0
      frame_bo.0.x << MI_SIZE_LOG2,
368
0
      frame_bo.0.y << MI_SIZE_LOG2,
369
    )
370
  };
371
372
0
  if visible_w == 0 || visible_h == 0 {
373
0
    return ScaledDistortion::zero();
374
0
  }
375
376
0
  let mut distortion = if skip {
377
0
    sse_wxh(
378
0
      &input_region,
379
0
      &rec_region,
380
0
      visible_w,
381
0
      visible_h,
382
0
      |bias_area, bsize| {
383
0
        distortion_scale(
384
0
          fi,
385
0
          input_region.subregion(bias_area).frame_block_offset(),
386
0
          bsize,
387
        )
388
0
      },
Unexecuted instantiation: rav1e::rdo::compute_tx_distortion::<u16>::{closure#0}
Unexecuted instantiation: rav1e::rdo::compute_tx_distortion::<u8>::{closure#0}
389
0
      fi.sequence.bit_depth,
390
0
      fi.cpu_feature_level,
391
0
    ) * fi.dist_scale[0]
392
  } else {
393
0
    tx_dist
394
  };
395
396
0
  if is_chroma_block
397
0
    && !luma_only
398
0
    && skip
399
0
    && fi.sequence.chroma_sampling != ChromaSampling::Cs400
400
  {
401
0
    let PlaneConfig { xdec, ydec, .. } = ts.input.planes[1].cfg;
402
0
    let chroma_w = if bsize.width() >= 8 || xdec == 0 {
403
0
      (visible_w + xdec) >> xdec
404
    } else {
405
0
      (4 + visible_w + xdec) >> xdec
406
    };
407
0
    let chroma_h = if bsize.height() >= 8 || ydec == 0 {
408
0
      (visible_h + ydec) >> ydec
409
    } else {
410
0
      (4 + visible_h + ydec) >> ydec
411
    };
412
413
0
    for p in 1..3 {
414
0
      let input_region = ts.input_tile.planes[p].subregion(area);
415
0
      let rec_region = ts.rec.planes[p].subregion(area);
416
0
      distortion += sse_wxh(
417
0
        &input_region,
418
0
        &rec_region,
419
0
        chroma_w,
420
0
        chroma_h,
421
0
        |bias_area, bsize| {
422
0
          distortion_scale(
423
0
            fi,
424
0
            input_region.subregion(bias_area).frame_block_offset(),
425
0
            bsize,
426
          )
427
0
        },
Unexecuted instantiation: rav1e::rdo::compute_tx_distortion::<u16>::{closure#1}
Unexecuted instantiation: rav1e::rdo::compute_tx_distortion::<u8>::{closure#1}
428
0
        fi.sequence.bit_depth,
429
0
        fi.cpu_feature_level,
430
0
      ) * fi.dist_scale[p];
431
    }
432
0
  }
433
0
  distortion
434
0
}
Unexecuted instantiation: rav1e::rdo::compute_tx_distortion::<u16>
Unexecuted instantiation: rav1e::rdo::compute_tx_distortion::<u8>
435
436
/// Compute a scaling factor to multiply the distortion of a block by,
437
/// this factor is determined using temporal RDO.
438
///
439
/// # Panics
440
///
441
/// - If called with `bsize` of 8x8 or smaller
442
/// - If the coded frame data doesn't exist on the `FrameInvariants`
443
0
pub fn distortion_scale<T: Pixel>(
444
0
  fi: &FrameInvariants<T>, frame_bo: PlaneBlockOffset, bsize: BlockSize,
445
0
) -> DistortionScale {
446
0
  if !fi.config.temporal_rdo() {
447
0
    return DistortionScale::default();
448
0
  }
449
  // EncoderConfig::temporal_rdo() should always return false in situations
450
  // where distortion is computed on > 8x8 blocks, so we should never hit this
451
  // assert.
452
0
  assert!(bsize <= BlockSize::BLOCK_8X8);
453
454
0
  let x = frame_bo.0.x >> IMPORTANCE_BLOCK_TO_BLOCK_SHIFT;
455
0
  let y = frame_bo.0.y >> IMPORTANCE_BLOCK_TO_BLOCK_SHIFT;
456
457
0
  let coded_data = fi.coded_frame_data.as_ref().unwrap();
458
0
  coded_data.distortion_scales[y * coded_data.w_in_imp_b + x]
459
0
}
Unexecuted instantiation: rav1e::rdo::distortion_scale::<u16>
Unexecuted instantiation: rav1e::rdo::distortion_scale::<u8>
460
461
/// # Panics
462
///
463
/// - If the coded frame data doesn't exist on the `FrameInvariants`
464
0
pub fn spatiotemporal_scale<T: Pixel>(
465
0
  fi: &FrameInvariants<T>, frame_bo: PlaneBlockOffset, bsize: BlockSize,
466
0
) -> DistortionScale {
467
0
  if !fi.config.temporal_rdo() && fi.config.tune != Tune::Psychovisual {
468
0
    return DistortionScale::default();
469
0
  }
470
471
0
  let coded_data = fi.coded_frame_data.as_ref().unwrap();
472
473
0
  let x0 = frame_bo.0.x >> IMPORTANCE_BLOCK_TO_BLOCK_SHIFT;
474
0
  let y0 = frame_bo.0.y >> IMPORTANCE_BLOCK_TO_BLOCK_SHIFT;
475
0
  let x1 = (x0 + bsize.width_imp_b()).min(coded_data.w_in_imp_b);
476
0
  let y1 = (y0 + bsize.height_imp_b()).min(coded_data.h_in_imp_b);
477
0
  let den = (((x1 - x0) * (y1 - y0)) as u64) << DistortionScale::SHIFT;
478
479
  // calling this on each slice individually improves autovectorization
480
  // compared to using `Iterator::take`
481
  #[inline(always)]
482
0
  fn take_slice<T>(slice: &[T], n: usize) -> &[T] {
483
0
    slice.get(..n).unwrap_or(slice)
484
0
  }
Unexecuted instantiation: rav1e::rdo::spatiotemporal_scale::take_slice::<rav1e::rdo::DistortionScale>
Unexecuted instantiation: rav1e::rdo::spatiotemporal_scale::take_slice::<rav1e::rdo::DistortionScale>
485
486
0
  let mut sum = 0;
487
0
  for y in y0..y1 {
488
0
    sum += take_slice(
489
0
      &coded_data.distortion_scales[y * coded_data.w_in_imp_b..][x0..x1],
490
0
      MAX_SB_IN_IMP_B,
491
0
    )
492
0
    .iter()
493
0
    .zip(
494
0
      take_slice(
495
0
        &coded_data.activity_scales[y * coded_data.w_in_imp_b..][x0..x1],
496
0
        MAX_SB_IN_IMP_B,
497
0
      )
498
0
      .iter(),
499
    )
500
0
    .map(|(d, a)| d.0 as u64 * a.0 as u64)
Unexecuted instantiation: rav1e::rdo::spatiotemporal_scale::<u16>::{closure#0}
Unexecuted instantiation: rav1e::rdo::spatiotemporal_scale::<u8>::{closure#0}
501
0
    .sum::<u64>();
502
  }
503
0
  DistortionScale(((sum + (den >> 1)) / den) as u32)
504
0
}
Unexecuted instantiation: rav1e::rdo::spatiotemporal_scale::<u16>
Unexecuted instantiation: rav1e::rdo::spatiotemporal_scale::<u8>
505
506
0
pub fn distortion_scale_for(
507
0
  propagate_cost: f64, intra_cost: f64,
508
0
) -> DistortionScale {
509
  // The mbtree paper \cite{mbtree} uses the following formula:
510
  //
511
  //     QP_delta = -strength * log2(1 + (propagate_cost / intra_cost))
512
  //
513
  // Since this is H.264, this corresponds to the following quantizer:
514
  //
515
  //     Q' = Q * 2^(QP_delta/6)
516
  //
517
  // Since lambda is proportial to Q^2, this means we want to minimize:
518
  //
519
  //     D + lambda' * R
520
  //   = D + 2^(QP_delta / 3) * lambda * R
521
  //
522
  // If we want to keep lambda fixed, we can instead scale distortion and
523
  // minimize:
524
  //
525
  //     D * scale + lambda * R
526
  //
527
  // where:
528
  //
529
  //     scale = 2^(QP_delta / -3)
530
  //           = (1 + (propagate_cost / intra_cost))^(strength / 3)
531
  //
532
  //  The original paper empirically chooses strength = 2.0, but strength = 1.0
533
  //  seems to work best in rav1e currently, this may have something to do with
534
  //  the fact that they use 16x16 blocks whereas our "importance blocks" are
535
  //  8x8, but everything should be scale invariant here so that's weird.
536
  //
537
  // @article{mbtree,
538
  //   title={A novel macroblock-tree algorithm for high-performance
539
  //    optimization of dependent video coding in H.264/AVC},
540
  //   author={Garrett-Glaser, Jason},
541
  //   journal={Tech. Rep.},
542
  //   year={2009},
543
  //   url={https://pdfs.semanticscholar.org/032f/1ab7d9db385780a02eb2d579af8303b266d2.pdf}
544
  // }
545
546
0
  if intra_cost == 0. {
547
0
    return DistortionScale::default(); // no scaling
548
0
  }
549
550
0
  let strength = 1.0; // empirical, see comment above
551
0
  let frac = (intra_cost + propagate_cost) / intra_cost;
552
0
  frac.powf(strength / 3.0).into()
553
0
}
554
555
/// Fixed point arithmetic version of distortion scale
556
#[repr(transparent)]
557
#[derive(Copy, Clone)]
558
pub struct DistortionScale(pub u32);
559
560
#[repr(transparent)]
561
pub struct RawDistortion(u64);
562
563
#[repr(transparent)]
564
pub struct Distortion(pub u64);
565
566
#[repr(transparent)]
567
pub struct ScaledDistortion(u64);
568
569
impl DistortionScale {
570
  /// Bits past the radix point
571
  const SHIFT: u32 = 14;
572
  /// Number of bits used. Determines the max value.
573
  /// 28 bits is quite excessive.
574
  const BITS: u32 = 28;
575
  /// Maximum internal value
576
  const MAX: u64 = (1 << Self::BITS) - 1;
577
578
  #[inline]
579
0
  pub const fn new(num: u64, den: u64) -> Self {
580
0
    let raw = (num << Self::SHIFT).saturating_add(den / 2) / den;
581
0
    let mask = (raw <= Self::MAX) as u64;
582
0
    Self((mask * raw + (1 - mask) * Self::MAX) as u32)
583
0
  }
Unexecuted instantiation: <rav1e::rdo::DistortionScale>::new
Unexecuted instantiation: <rav1e::rdo::DistortionScale>::new
584
585
0
  pub fn inv_mean(slice: &[Self]) -> Self {
586
    use crate::util::{bexp64, blog32_q11};
587
0
    let sum = slice.iter().map(|&s| blog32_q11(s.0) as i64).sum::<i64>();
588
0
    let log_inv_mean_q11 =
589
0
      (Self::SHIFT << 11) as i64 - sum / slice.len() as i64;
590
0
    Self(
591
0
      bexp64((log_inv_mean_q11 + (Self::SHIFT << 11) as i64) << (57 - 11))
592
0
        .clamp(1, (1 << Self::BITS) - 1) as u32,
593
0
    )
594
0
  }
595
596
  /// Binary logarithm in Q11
597
  #[inline]
598
0
  pub const fn blog16(self) -> i16 {
599
    use crate::util::blog32_q11;
600
0
    (blog32_q11(self.0) - ((Self::SHIFT as i32) << 11)) as i16
601
0
  }
Unexecuted instantiation: <rav1e::rdo::DistortionScale>::blog16
Unexecuted instantiation: <rav1e::rdo::DistortionScale>::blog16
602
603
  /// Binary logarithm in Q57
604
  #[inline]
605
0
  pub const fn blog64(self) -> i64 {
606
    use crate::util::{blog64, q57};
607
0
    blog64(self.0 as i64) - q57(Self::SHIFT as i32)
608
0
  }
Unexecuted instantiation: <rav1e::rdo::DistortionScale>::blog64
Unexecuted instantiation: <rav1e::rdo::DistortionScale>::blog64
609
610
  /// Multiply, round and shift
611
  /// Internal implementation, so don't use multiply trait.
612
  #[inline]
613
0
  pub const fn mul_u64(self, dist: u64) -> u64 {
614
0
    (self.0 as u64 * dist + (1 << Self::SHIFT >> 1)) >> Self::SHIFT
615
0
  }
Unexecuted instantiation: <rav1e::rdo::DistortionScale>::mul_u64
Unexecuted instantiation: <rav1e::rdo::DistortionScale>::mul_u64
616
}
617
618
impl std::ops::Mul for DistortionScale {
619
  type Output = Self;
620
621
  /// Multiply, round and shift
622
  #[inline]
623
0
  fn mul(self, rhs: Self) -> Self {
624
0
    Self(
625
0
      (((self.0 as u64 * rhs.0 as u64) + (1 << (Self::SHIFT - 1)))
626
0
        >> Self::SHIFT)
627
0
        .clamp(1, (1 << Self::BITS) - 1) as u32,
628
0
    )
629
0
  }
Unexecuted instantiation: <rav1e::rdo::DistortionScale as core::ops::arith::Mul>::mul
Unexecuted instantiation: <rav1e::rdo::DistortionScale as core::ops::arith::Mul>::mul
630
}
631
632
impl std::ops::MulAssign for DistortionScale {
633
0
  fn mul_assign(&mut self, rhs: Self) {
634
0
    *self = *self * rhs;
635
0
  }
636
}
637
638
// Default value for DistortionScale is a fixed point 1
639
impl Default for DistortionScale {
640
  #[inline]
641
0
  fn default() -> Self {
642
0
    Self(1 << Self::SHIFT)
643
0
  }
Unexecuted instantiation: <rav1e::rdo::DistortionScale as core::default::Default>::default
Unexecuted instantiation: <rav1e::rdo::DistortionScale as core::default::Default>::default
644
}
645
646
impl fmt::Debug for DistortionScale {
647
0
  fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
648
0
    write!(f, "{}", f64::from(*self))
649
0
  }
650
}
651
652
impl From<f64> for DistortionScale {
653
  #[inline]
654
0
  fn from(scale: f64) -> Self {
655
0
    let den = 1 << (Self::SHIFT + 1);
656
0
    Self::new((scale * den as f64) as u64, den)
657
0
  }
658
}
659
660
impl From<DistortionScale> for f64 {
661
  #[inline]
662
0
  fn from(scale: DistortionScale) -> Self {
663
0
    scale.0 as f64 / (1 << DistortionScale::SHIFT) as f64
664
0
  }
665
}
666
667
impl RawDistortion {
668
  #[inline]
669
0
  pub const fn new(dist: u64) -> Self {
670
0
    Self(dist)
671
0
  }
Unexecuted instantiation: <rav1e::rdo::RawDistortion>::new
Unexecuted instantiation: <rav1e::rdo::RawDistortion>::new
672
}
673
674
impl std::ops::Mul<DistortionScale> for RawDistortion {
675
  type Output = Distortion;
676
  #[inline]
677
0
  fn mul(self, rhs: DistortionScale) -> Distortion {
678
0
    Distortion(rhs.mul_u64(self.0))
679
0
  }
Unexecuted instantiation: <rav1e::rdo::RawDistortion as core::ops::arith::Mul<rav1e::rdo::DistortionScale>>::mul
Unexecuted instantiation: <rav1e::rdo::RawDistortion as core::ops::arith::Mul<rav1e::rdo::DistortionScale>>::mul
680
}
681
682
impl Distortion {
683
  #[inline]
684
0
  pub const fn zero() -> Self {
685
0
    Self(0)
686
0
  }
Unexecuted instantiation: <rav1e::rdo::Distortion>::zero
Unexecuted instantiation: <rav1e::rdo::Distortion>::zero
687
}
688
689
impl std::ops::Mul<DistortionScale> for Distortion {
690
  type Output = ScaledDistortion;
691
  #[inline]
692
0
  fn mul(self, rhs: DistortionScale) -> ScaledDistortion {
693
0
    ScaledDistortion(rhs.mul_u64(self.0))
694
0
  }
Unexecuted instantiation: <rav1e::rdo::Distortion as core::ops::arith::Mul<rav1e::rdo::DistortionScale>>::mul
Unexecuted instantiation: <rav1e::rdo::Distortion as core::ops::arith::Mul<rav1e::rdo::DistortionScale>>::mul
695
}
696
697
impl std::ops::AddAssign for Distortion {
698
  #[inline]
699
0
  fn add_assign(&mut self, other: Self) {
700
0
    self.0 += other.0;
701
0
  }
Unexecuted instantiation: <rav1e::rdo::Distortion as core::ops::arith::AddAssign>::add_assign
Unexecuted instantiation: <rav1e::rdo::Distortion as core::ops::arith::AddAssign>::add_assign
702
}
703
704
impl ScaledDistortion {
705
  #[inline]
706
0
  pub const fn zero() -> Self {
707
0
    Self(0)
708
0
  }
Unexecuted instantiation: <rav1e::rdo::ScaledDistortion>::zero
Unexecuted instantiation: <rav1e::rdo::ScaledDistortion>::zero
709
}
710
711
impl std::ops::AddAssign for ScaledDistortion {
712
  #[inline]
713
0
  fn add_assign(&mut self, other: Self) {
714
0
    self.0 += other.0;
715
0
  }
Unexecuted instantiation: <rav1e::rdo::ScaledDistortion as core::ops::arith::AddAssign>::add_assign
Unexecuted instantiation: <rav1e::rdo::ScaledDistortion as core::ops::arith::AddAssign>::add_assign
716
}
717
718
0
pub fn compute_rd_cost<T: Pixel>(
719
0
  fi: &FrameInvariants<T>, rate: u32, distortion: ScaledDistortion,
720
0
) -> f64 {
721
0
  let rate_in_bits = (rate as f64) / ((1 << OD_BITRES) as f64);
722
0
  fi.lambda.mul_add(rate_in_bits, distortion.0 as f64)
723
0
}
Unexecuted instantiation: rav1e::rdo::compute_rd_cost::<u16>
Unexecuted instantiation: rav1e::rdo::compute_rd_cost::<u8>
724
725
0
pub fn rdo_tx_size_type<T: Pixel>(
726
0
  fi: &FrameInvariants<T>, ts: &mut TileStateMut<'_, T>,
727
0
  cw: &mut ContextWriter, bsize: BlockSize, tile_bo: TileBlockOffset,
728
0
  luma_mode: PredictionMode, ref_frames: [RefType; 2], mvs: [MotionVector; 2],
729
0
  skip: bool,
730
0
) -> (TxSize, TxType) {
731
0
  let is_inter = !luma_mode.is_intra();
732
0
  let mut tx_size = max_txsize_rect_lookup[bsize as usize];
733
734
0
  if fi.enable_inter_txfm_split && is_inter && !skip {
735
0
    tx_size = sub_tx_size_map[tx_size as usize]; // Always choose one level split size
736
0
  }
737
738
0
  let mut best_tx_type = TxType::DCT_DCT;
739
0
  let mut best_tx_size = tx_size;
740
0
  let mut best_rd = f64::MAX;
741
742
0
  let do_rdo_tx_size = fi.tx_mode_select
743
0
    && fi.config.speed_settings.transform.rdo_tx_decision
744
0
    && !is_inter;
745
0
  let rdo_tx_depth = if do_rdo_tx_size { 2 } else { 0 };
746
0
  let mut cw_checkpoint: Option<ContextWriterCheckpoint> = None;
747
748
0
  for _ in 0..=rdo_tx_depth {
749
0
    let tx_set = get_tx_set(tx_size, is_inter, fi.use_reduced_tx_set);
750
751
0
    let do_rdo_tx_type = tx_set > TxSet::TX_SET_DCTONLY
752
0
      && fi.config.speed_settings.transform.rdo_tx_decision
753
0
      && !is_inter
754
0
      && !skip;
755
756
0
    if !do_rdo_tx_size && !do_rdo_tx_type {
757
0
      return (best_tx_size, best_tx_type);
758
0
    };
759
760
0
    let tx_types =
761
0
      if do_rdo_tx_type { RAV1E_TX_TYPES } else { &[TxType::DCT_DCT] };
762
763
    // Luma plane transform type decision
764
0
    let (tx_type, rd_cost) = rdo_tx_type_decision(
765
0
      fi,
766
0
      ts,
767
0
      cw,
768
0
      &mut cw_checkpoint,
769
0
      luma_mode,
770
0
      ref_frames,
771
0
      mvs,
772
0
      bsize,
773
0
      tile_bo,
774
0
      tx_size,
775
0
      tx_set,
776
0
      tx_types,
777
0
      best_rd,
778
0
    );
779
780
0
    if rd_cost < best_rd {
781
0
      best_tx_size = tx_size;
782
0
      best_tx_type = tx_type;
783
0
      best_rd = rd_cost;
784
0
    }
785
786
0
    debug_assert!(tx_size.width_log2() <= bsize.width_log2());
787
0
    debug_assert!(tx_size.height_log2() <= bsize.height_log2());
788
0
    debug_assert!(
789
0
      tx_size.sqr() <= TxSize::TX_32X32 || tx_type == TxType::DCT_DCT
790
    );
791
792
0
    let next_tx_size = sub_tx_size_map[tx_size as usize];
793
794
0
    if next_tx_size == tx_size {
795
0
      break;
796
0
    } else {
797
0
      tx_size = next_tx_size;
798
0
    };
799
  }
800
801
0
  (best_tx_size, best_tx_type)
802
0
}
Unexecuted instantiation: rav1e::rdo::rdo_tx_size_type::<u16>
Unexecuted instantiation: rav1e::rdo::rdo_tx_size_type::<u8>
803
804
#[inline]
805
0
const fn dmv_in_range(mv: MotionVector, ref_mv: MotionVector) -> bool {
806
0
  let diff_row = mv.row as i32 - ref_mv.row as i32;
807
0
  let diff_col = mv.col as i32 - ref_mv.col as i32;
808
0
  diff_row >= MV_LOW
809
0
    && diff_row <= MV_UPP
810
0
    && diff_col >= MV_LOW
811
0
    && diff_col <= MV_UPP
812
0
}
Unexecuted instantiation: rav1e::rdo::dmv_in_range
Unexecuted instantiation: rav1e::rdo::dmv_in_range
813
814
#[inline]
815
#[profiling::function]
816
fn luma_chroma_mode_rdo<T: Pixel>(
817
  luma_mode: PredictionMode, fi: &FrameInvariants<T>, bsize: BlockSize,
818
  tile_bo: TileBlockOffset, ts: &mut TileStateMut<'_, T>,
819
  cw: &mut ContextWriter, rdo_type: RDOType,
820
  cw_checkpoint: &ContextWriterCheckpoint, best: &mut PartitionParameters,
821
  mvs: [MotionVector; 2], ref_frames: [RefType; 2],
822
  mode_set_chroma: &[PredictionMode], luma_mode_is_intra: bool,
823
  mode_context: usize, mv_stack: &ArrayVec<CandidateMV, 9>,
824
  angle_delta: AngleDelta,
825
) {
826
  let PlaneConfig { xdec, ydec, .. } = ts.input.planes[1].cfg;
827
828
  let is_chroma_block =
829
    has_chroma(tile_bo, bsize, xdec, ydec, fi.sequence.chroma_sampling);
830
831
  if !luma_mode_is_intra {
832
    let ref_mvs = if mv_stack.is_empty() {
833
      [MotionVector::default(); 2]
834
    } else {
835
      [mv_stack[0].this_mv, mv_stack[0].comp_mv]
836
    };
837
838
    if (luma_mode == PredictionMode::NEWMV
839
      || luma_mode == PredictionMode::NEW_NEWMV
840
      || luma_mode == PredictionMode::NEW_NEARESTMV)
841
      && !dmv_in_range(mvs[0], ref_mvs[0])
842
    {
843
      return;
844
    }
845
846
    if (luma_mode == PredictionMode::NEW_NEWMV
847
      || luma_mode == PredictionMode::NEAREST_NEWMV)
848
      && !dmv_in_range(mvs[1], ref_mvs[1])
849
    {
850
      return;
851
    }
852
  }
853
854
  // Find the best chroma prediction mode for the current luma prediction mode
855
0
  let mut chroma_rdo = |skip: bool| -> bool {
856
    use crate::segmentation::select_segment;
857
858
0
    let mut zero_distortion = false;
859
860
0
    for sidx in select_segment(fi, ts, tile_bo, bsize, skip) {
861
0
      cw.bc.blocks.set_segmentation_idx(tile_bo, bsize, sidx);
862
863
0
      let (tx_size, tx_type) = rdo_tx_size_type(
864
0
        fi, ts, cw, bsize, tile_bo, luma_mode, ref_frames, mvs, skip,
865
0
      );
866
0
      for &chroma_mode in mode_set_chroma.iter() {
867
0
        let wr = &mut WriterCounter::new();
868
0
        let tell = wr.tell_frac();
869
870
0
        if bsize >= BlockSize::BLOCK_8X8 && bsize.is_sqr() {
871
0
          cw.write_partition(
872
0
            wr,
873
0
            tile_bo,
874
0
            PartitionType::PARTITION_NONE,
875
0
            bsize,
876
0
          );
877
0
        }
878
879
        // TODO(yushin): luma and chroma would have different decision based on chroma format
880
0
        let need_recon_pixel =
881
0
          luma_mode_is_intra && tx_size.block_size() != bsize;
882
883
0
        encode_block_pre_cdef(&fi.sequence, ts, cw, wr, bsize, tile_bo, skip);
884
0
        let (has_coeff, tx_dist) = encode_block_post_cdef(
885
0
          fi,
886
0
          ts,
887
0
          cw,
888
0
          wr,
889
0
          luma_mode,
890
0
          chroma_mode,
891
0
          angle_delta,
892
0
          ref_frames,
893
0
          mvs,
894
0
          bsize,
895
0
          tile_bo,
896
0
          skip,
897
0
          CFLParams::default(),
898
0
          tx_size,
899
0
          tx_type,
900
0
          mode_context,
901
0
          mv_stack,
902
0
          rdo_type,
903
0
          need_recon_pixel,
904
0
          None,
905
0
        );
906
907
0
        let rate = wr.tell_frac() - tell;
908
0
        let distortion = if fi.use_tx_domain_distortion && !need_recon_pixel {
909
0
          compute_tx_distortion(
910
0
            fi,
911
0
            ts,
912
0
            bsize,
913
0
            is_chroma_block,
914
0
            tile_bo,
915
0
            tx_dist,
916
0
            skip,
917
            false,
918
          )
919
        } else {
920
0
          compute_distortion(fi, ts, bsize, is_chroma_block, tile_bo, false)
921
        };
922
0
        let is_zero_dist = distortion.0 == 0;
923
0
        let rd = compute_rd_cost(fi, rate, distortion);
924
0
        if rd < best.rd_cost {
925
0
          //if rd < best.rd_cost || luma_mode == PredictionMode::NEW_NEWMV {
926
0
          best.rd_cost = rd;
927
0
          best.pred_mode_luma = luma_mode;
928
0
          best.pred_mode_chroma = chroma_mode;
929
0
          best.angle_delta = angle_delta;
930
0
          best.ref_frames = ref_frames;
931
0
          best.mvs = mvs;
932
0
          best.skip = skip;
933
0
          best.has_coeff = has_coeff;
934
0
          best.tx_size = tx_size;
935
0
          best.tx_type = tx_type;
936
0
          best.sidx = sidx;
937
0
          zero_distortion = is_zero_dist;
938
0
        }
939
940
0
        cw.rollback(cw_checkpoint);
941
      }
942
    }
943
944
0
    zero_distortion
945
0
  };
Unexecuted instantiation: rav1e::rdo::luma_chroma_mode_rdo::<u16>::{closure#0}
Unexecuted instantiation: rav1e::rdo::luma_chroma_mode_rdo::<u8>::{closure#0}
946
947
  // Don't skip when using intra modes
948
  let zero_distortion =
949
    if !luma_mode_is_intra { chroma_rdo(true) } else { false };
950
  // early skip
951
  if !zero_distortion {
952
    chroma_rdo(false);
953
  }
954
}
955
956
/// RDO-based mode decision
957
///
958
/// # Panics
959
///
960
/// - If the best RD found is negative.
961
///   This should never happen and indicates a development error.
962
#[profiling::function]
963
pub fn rdo_mode_decision<T: Pixel>(
964
  fi: &FrameInvariants<T>, ts: &mut TileStateMut<'_, T>,
965
  cw: &mut ContextWriter, bsize: BlockSize, tile_bo: TileBlockOffset,
966
  inter_cfg: &InterConfig,
967
) -> PartitionParameters {
968
  let PlaneConfig { xdec, ydec, .. } = ts.input.planes[1].cfg;
969
  let cw_checkpoint = cw.checkpoint(&tile_bo, fi.sequence.chroma_sampling);
970
971
  let rdo_type = if fi.use_tx_domain_rate {
972
    RDOType::TxDistEstRate
973
  } else if fi.use_tx_domain_distortion {
974
    RDOType::TxDistRealRate
975
  } else {
976
    RDOType::PixelDistRealRate
977
  };
978
979
  let mut best = if fi.frame_type.has_inter() {
980
    assert!(fi.frame_type != FrameType::KEY);
981
982
    inter_frame_rdo_mode_decision(
983
      fi,
984
      ts,
985
      cw,
986
      bsize,
987
      tile_bo,
988
      inter_cfg,
989
      &cw_checkpoint,
990
      rdo_type,
991
    )
992
  } else {
993
    PartitionParameters::default()
994
  };
995
996
  let is_chroma_block =
997
    has_chroma(tile_bo, bsize, xdec, ydec, fi.sequence.chroma_sampling);
998
999
  if !best.skip {
1000
    best = intra_frame_rdo_mode_decision(
1001
      fi,
1002
      ts,
1003
      cw,
1004
      bsize,
1005
      tile_bo,
1006
      &cw_checkpoint,
1007
      rdo_type,
1008
      best,
1009
      is_chroma_block,
1010
    );
1011
  }
1012
1013
  if best.pred_mode_luma.is_intra() && is_chroma_block && bsize.cfl_allowed() {
1014
    cw.bc.blocks.set_segmentation_idx(tile_bo, bsize, best.sidx);
1015
1016
    let chroma_mode = PredictionMode::UV_CFL_PRED;
1017
    let cw_checkpoint = cw.checkpoint(&tile_bo, fi.sequence.chroma_sampling);
1018
    let mut wr = WriterCounter::new();
1019
    let angle_delta = AngleDelta { y: best.angle_delta.y, uv: 0 };
1020
1021
    write_tx_blocks(
1022
      fi,
1023
      ts,
1024
      cw,
1025
      &mut wr,
1026
      best.pred_mode_luma,
1027
      best.pred_mode_luma,
1028
      angle_delta,
1029
      tile_bo,
1030
      bsize,
1031
      best.tx_size,
1032
      best.tx_type,
1033
      false,
1034
      CFLParams::default(),
1035
      true,
1036
      rdo_type,
1037
      true,
1038
    );
1039
    cw.rollback(&cw_checkpoint);
1040
    if fi.sequence.chroma_sampling != ChromaSampling::Cs400 {
1041
      if let Some(cfl) = rdo_cfl_alpha(ts, tile_bo, bsize, best.tx_size, fi) {
1042
        let mut wr = WriterCounter::new();
1043
        let tell = wr.tell_frac();
1044
1045
        encode_block_pre_cdef(
1046
          &fi.sequence,
1047
          ts,
1048
          cw,
1049
          &mut wr,
1050
          bsize,
1051
          tile_bo,
1052
          best.skip,
1053
        );
1054
        let (has_coeff, _) = encode_block_post_cdef(
1055
          fi,
1056
          ts,
1057
          cw,
1058
          &mut wr,
1059
          best.pred_mode_luma,
1060
          chroma_mode,
1061
          angle_delta,
1062
          best.ref_frames,
1063
          best.mvs,
1064
          bsize,
1065
          tile_bo,
1066
          best.skip,
1067
          cfl,
1068
          best.tx_size,
1069
          best.tx_type,
1070
          0,
1071
          &[],
1072
          rdo_type,
1073
          true, // For CFL, luma should be always reconstructed.
1074
          None,
1075
        );
1076
1077
        let rate = wr.tell_frac() - tell;
1078
1079
        // For CFL, tx-domain distortion is not an option.
1080
        let distortion =
1081
          compute_distortion(fi, ts, bsize, is_chroma_block, tile_bo, false);
1082
        let rd = compute_rd_cost(fi, rate, distortion);
1083
        if rd < best.rd_cost {
1084
          best.rd_cost = rd;
1085
          best.pred_mode_chroma = chroma_mode;
1086
          best.angle_delta = angle_delta;
1087
          best.has_coeff = has_coeff;
1088
          best.pred_cfl_params = cfl;
1089
        }
1090
1091
        cw.rollback(&cw_checkpoint);
1092
      }
1093
    }
1094
  }
1095
1096
  cw.bc.blocks.set_mode(tile_bo, bsize, best.pred_mode_luma);
1097
  cw.bc.blocks.set_ref_frames(tile_bo, bsize, best.ref_frames);
1098
  cw.bc.blocks.set_motion_vectors(tile_bo, bsize, best.mvs);
1099
1100
  assert!(best.rd_cost >= 0_f64);
1101
1102
  PartitionParameters {
1103
    bo: tile_bo,
1104
    bsize,
1105
    pred_mode_luma: best.pred_mode_luma,
1106
    pred_mode_chroma: best.pred_mode_chroma,
1107
    pred_cfl_params: best.pred_cfl_params,
1108
    angle_delta: best.angle_delta,
1109
    ref_frames: best.ref_frames,
1110
    mvs: best.mvs,
1111
    rd_cost: best.rd_cost,
1112
    skip: best.skip,
1113
    has_coeff: best.has_coeff,
1114
    tx_size: best.tx_size,
1115
    tx_type: best.tx_type,
1116
    sidx: best.sidx,
1117
  }
1118
}
1119
1120
#[profiling::function]
1121
fn inter_frame_rdo_mode_decision<T: Pixel>(
1122
  fi: &FrameInvariants<T>, ts: &mut TileStateMut<'_, T>,
1123
  cw: &mut ContextWriter, bsize: BlockSize, tile_bo: TileBlockOffset,
1124
  inter_cfg: &InterConfig, cw_checkpoint: &ContextWriterCheckpoint,
1125
  rdo_type: RDOType,
1126
) -> PartitionParameters {
1127
  let mut best = PartitionParameters::default();
1128
1129
  // we can never have more than 7 reference frame sets
1130
  let mut ref_frames_set = ArrayVec::<_, 7>::new();
1131
  // again, max of 7 ref slots
1132
  let mut ref_slot_set = ArrayVec::<_, 7>::new();
1133
  // our implementation never returns more than 3 at the moment
1134
  let mut mvs_from_me = ArrayVec::<_, 3>::new();
1135
  let mut fwdref = None;
1136
  let mut bwdref = None;
1137
1138
  for i in inter_cfg.allowed_ref_frames().iter().copied() {
1139
    // Don't search LAST3 since it's used only for probs
1140
    if i == LAST3_FRAME {
1141
      continue;
1142
    }
1143
1144
    if !ref_slot_set.contains(&fi.ref_frames[i.to_index()]) {
1145
      if fwdref.is_none() && i.is_fwd_ref() {
1146
        fwdref = Some(ref_frames_set.len());
1147
      }
1148
      if bwdref.is_none() && i.is_bwd_ref() {
1149
        bwdref = Some(ref_frames_set.len());
1150
      }
1151
      ref_frames_set.push([i, NONE_FRAME]);
1152
      let slot_idx = fi.ref_frames[i.to_index()];
1153
      ref_slot_set.push(slot_idx);
1154
    }
1155
  }
1156
  assert!(!ref_frames_set.is_empty());
1157
1158
  let mut inter_mode_set = ArrayVec::<(PredictionMode, usize), 20>::new();
1159
  let mut mvs_set = ArrayVec::<[MotionVector; 2], 20>::new();
1160
  let mut satds = ArrayVec::<u32, 20>::new();
1161
  let mut mv_stacks = ArrayVec::<_, 20>::new();
1162
  let mut mode_contexts = ArrayVec::<_, 7>::new();
1163
1164
  for (i, &ref_frames) in ref_frames_set.iter().enumerate() {
1165
    let mut mv_stack = ArrayVec::<CandidateMV, 9>::new();
1166
    mode_contexts.push(cw.find_mvrefs(
1167
      tile_bo,
1168
      ref_frames,
1169
      &mut mv_stack,
1170
      bsize,
1171
      fi,
1172
      false,
1173
    ));
1174
1175
    let mut pmv = [MotionVector::default(); 2];
1176
    if !mv_stack.is_empty() {
1177
      pmv[0] = mv_stack[0].this_mv;
1178
    }
1179
    if mv_stack.len() > 1 {
1180
      pmv[1] = mv_stack[1].this_mv;
1181
    }
1182
1183
    let res = estimate_motion(
1184
      fi,
1185
      ts,
1186
      bsize.width(),
1187
      bsize.height(),
1188
      tile_bo,
1189
      ref_frames[0],
1190
      Some(pmv),
1191
      MVSamplingMode::CORNER { right: true, bottom: true },
1192
      false,
1193
      0,
1194
      None,
1195
    )
1196
    .unwrap_or_else(MotionSearchResult::empty);
1197
    let b_me = res.mv;
1198
1199
    mvs_from_me.push([b_me, MotionVector::default()]);
1200
1201
    for &x in RAV1E_INTER_MODES_MINIMAL {
1202
      inter_mode_set.push((x, i));
1203
    }
1204
    if !mv_stack.is_empty() {
1205
      inter_mode_set.push((PredictionMode::NEAR0MV, i));
1206
    }
1207
    if mv_stack.len() >= 2 {
1208
      inter_mode_set.push((PredictionMode::GLOBALMV, i));
1209
    }
1210
    let include_near_mvs = fi.config.speed_settings.motion.include_near_mvs;
1211
    if include_near_mvs {
1212
      if mv_stack.len() >= 3 {
1213
        inter_mode_set.push((PredictionMode::NEAR1MV, i));
1214
      }
1215
      if mv_stack.len() >= 4 {
1216
        inter_mode_set.push((PredictionMode::NEAR2MV, i));
1217
      }
1218
    }
1219
0
    let same_row_col = |x: &CandidateMV| {
1220
0
      x.this_mv.row == mvs_from_me[i][0].row
1221
0
        && x.this_mv.col == mvs_from_me[i][0].col
1222
0
    };
Unexecuted instantiation: rav1e::rdo::inter_frame_rdo_mode_decision::<u16>::{closure#0}
Unexecuted instantiation: rav1e::rdo::inter_frame_rdo_mode_decision::<u8>::{closure#0}
1223
    if !mv_stack
1224
      .iter()
1225
      .take(if include_near_mvs { 4 } else { 2 })
1226
      .any(same_row_col)
1227
      && (mvs_from_me[i][0].row != 0 || mvs_from_me[i][0].col != 0)
1228
    {
1229
      inter_mode_set.push((PredictionMode::NEWMV, i));
1230
    }
1231
1232
    mv_stacks.push(mv_stack);
1233
  }
1234
1235
  let sz = bsize.width_mi().min(bsize.height_mi());
1236
1237
  // To use non single reference modes, block width and height must be greater than 4.
1238
  if fi.reference_mode != ReferenceMode::SINGLE && sz >= 2 {
1239
    // Adding compound candidate
1240
    if let Some(r0) = fwdref {
1241
      if let Some(r1) = bwdref {
1242
        let ref_frames = [ref_frames_set[r0][0], ref_frames_set[r1][0]];
1243
        ref_frames_set.push(ref_frames);
1244
        let mv0 = mvs_from_me[r0][0];
1245
        let mv1 = mvs_from_me[r1][0];
1246
        mvs_from_me.push([mv0, mv1]);
1247
        let mut mv_stack = ArrayVec::<CandidateMV, 9>::new();
1248
        mode_contexts.push(cw.find_mvrefs(
1249
          tile_bo,
1250
          ref_frames,
1251
          &mut mv_stack,
1252
          bsize,
1253
          fi,
1254
          true,
1255
        ));
1256
        for &x in RAV1E_INTER_COMPOUND_MODES {
1257
          // exclude any NEAR mode based on speed setting
1258
          if fi.config.speed_settings.motion.include_near_mvs
1259
            || !x.has_nearmv()
1260
          {
1261
            let mv_stack_idx = ref_frames_set.len() - 1;
1262
            // exclude NEAR modes if the mv_stack is too short
1263
            if !(x.has_nearmv() && x.ref_mv_idx() >= mv_stack.len()) {
1264
              inter_mode_set.push((x, mv_stack_idx));
1265
            }
1266
          }
1267
        }
1268
        mv_stacks.push(mv_stack);
1269
      }
1270
    }
1271
  }
1272
1273
  let num_modes_rdo = if fi.config.speed_settings.prediction.prediction_modes
1274
    >= PredictionModesSetting::ComplexAll
1275
  {
1276
    inter_mode_set.len()
1277
  } else {
1278
    9 // This number is determined by AWCY test
1279
  };
1280
1281
0
  inter_mode_set.iter().for_each(|&(luma_mode, i)| {
1282
0
    let mvs = match luma_mode {
1283
0
      PredictionMode::NEWMV | PredictionMode::NEW_NEWMV => mvs_from_me[i],
1284
      PredictionMode::NEARESTMV | PredictionMode::NEAREST_NEARESTMV => {
1285
0
        if !mv_stacks[i].is_empty() {
1286
0
          [mv_stacks[i][0].this_mv, mv_stacks[i][0].comp_mv]
1287
        } else {
1288
0
          [MotionVector::default(); 2]
1289
        }
1290
      }
1291
      PredictionMode::NEAR0MV | PredictionMode::NEAR_NEAR0MV => {
1292
0
        if mv_stacks[i].len() > 1 {
1293
0
          [mv_stacks[i][1].this_mv, mv_stacks[i][1].comp_mv]
1294
        } else {
1295
0
          [MotionVector::default(); 2]
1296
        }
1297
      }
1298
      PredictionMode::NEAR1MV
1299
      | PredictionMode::NEAR2MV
1300
      | PredictionMode::NEAR_NEAR1MV
1301
0
      | PredictionMode::NEAR_NEAR2MV => [
1302
0
        mv_stacks[i][luma_mode.ref_mv_idx()].this_mv,
1303
0
        mv_stacks[i][luma_mode.ref_mv_idx()].comp_mv,
1304
0
      ],
1305
      PredictionMode::NEAREST_NEWMV => {
1306
0
        [mv_stacks[i][0].this_mv, mvs_from_me[i][1]]
1307
      }
1308
      PredictionMode::NEW_NEARESTMV => {
1309
0
        [mvs_from_me[i][0], mv_stacks[i][0].comp_mv]
1310
      }
1311
      PredictionMode::GLOBALMV | PredictionMode::GLOBAL_GLOBALMV => {
1312
0
        [MotionVector::default(); 2]
1313
      }
1314
      _ => {
1315
0
        unimplemented!();
1316
      }
1317
    };
1318
0
    mvs_set.push(mvs);
1319
1320
    // Calculate SATD for each mode
1321
0
    if num_modes_rdo != inter_mode_set.len() {
1322
0
      let tile_rect = ts.tile_rect();
1323
0
      let rec = &mut ts.rec.planes[0];
1324
0
      let po = tile_bo.plane_offset(rec.plane_cfg);
1325
0
      let mut rec_region =
1326
0
        rec.subregion_mut(Area::BlockStartingAt { bo: tile_bo.0 });
1327
0
1328
0
      luma_mode.predict_inter(
1329
0
        fi,
1330
0
        tile_rect,
1331
0
        0,
1332
0
        po,
1333
0
        &mut rec_region,
1334
0
        bsize.width(),
1335
0
        bsize.height(),
1336
0
        ref_frames_set[i],
1337
0
        mvs,
1338
0
        &mut ts.inter_compound_buffers,
1339
0
      );
1340
0
1341
0
      let plane_org = ts.input_tile.planes[0]
1342
0
        .subregion(Area::BlockStartingAt { bo: tile_bo.0 });
1343
0
      let plane_ref = rec_region.as_const();
1344
0
1345
0
      let satd = get_satd(
1346
0
        &plane_org,
1347
0
        &plane_ref,
1348
0
        bsize.width(),
1349
0
        bsize.height(),
1350
0
        fi.sequence.bit_depth,
1351
0
        fi.cpu_feature_level,
1352
0
      );
1353
0
      satds.push(satd);
1354
0
    } else {
1355
0
      satds.push(0);
1356
0
    }
1357
0
  });
Unexecuted instantiation: rav1e::rdo::inter_frame_rdo_mode_decision::<u16>::{closure#1}
Unexecuted instantiation: rav1e::rdo::inter_frame_rdo_mode_decision::<u8>::{closure#1}
1358
1359
  let mut sorted =
1360
    izip!(inter_mode_set, mvs_set, satds).collect::<ArrayVec<_, 20>>();
1361
  if num_modes_rdo != sorted.len() {
1362
    sorted.sort_by_key(|((_mode, _i), _mvs, satd)| *satd);
1363
  }
1364
1365
  sorted.iter().take(num_modes_rdo).for_each(
1366
0
    |&((luma_mode, i), mvs, _satd)| {
1367
0
      let mode_set_chroma = ArrayVec::from([luma_mode]);
1368
1369
0
      luma_chroma_mode_rdo(
1370
0
        luma_mode,
1371
0
        fi,
1372
0
        bsize,
1373
0
        tile_bo,
1374
0
        ts,
1375
0
        cw,
1376
0
        rdo_type,
1377
0
        cw_checkpoint,
1378
0
        &mut best,
1379
0
        mvs,
1380
0
        ref_frames_set[i],
1381
0
        &mode_set_chroma,
1382
        false,
1383
0
        mode_contexts[i],
1384
0
        &mv_stacks[i],
1385
0
        AngleDelta::default(),
1386
      );
1387
0
    },
Unexecuted instantiation: rav1e::rdo::inter_frame_rdo_mode_decision::<u16>::{closure#3}
Unexecuted instantiation: rav1e::rdo::inter_frame_rdo_mode_decision::<u8>::{closure#3}
1388
  );
1389
1390
  best
1391
}
1392
1393
#[profiling::function]
1394
fn intra_frame_rdo_mode_decision<T: Pixel>(
1395
  fi: &FrameInvariants<T>, ts: &mut TileStateMut<'_, T>,
1396
  cw: &mut ContextWriter, bsize: BlockSize, tile_bo: TileBlockOffset,
1397
  cw_checkpoint: &ContextWriterCheckpoint, rdo_type: RDOType,
1398
  mut best: PartitionParameters, is_chroma_block: bool,
1399
) -> PartitionParameters {
1400
  let mut modes = ArrayVec::<_, INTRA_MODES>::new();
1401
1402
  // Reduce number of prediction modes at higher speed levels
1403
  let num_modes_rdo = if (fi.frame_type == FrameType::KEY
1404
    && fi.config.speed_settings.prediction.prediction_modes
1405
      >= PredictionModesSetting::ComplexKeyframes)
1406
    || (fi.frame_type.has_inter()
1407
      && fi.config.speed_settings.prediction.prediction_modes
1408
        >= PredictionModesSetting::ComplexAll)
1409
  {
1410
    7
1411
  } else {
1412
    3
1413
  };
1414
1415
  let intra_mode_set = RAV1E_INTRA_MODES;
1416
1417
  // Find mode with lowest rate cost
1418
  {
1419
    use crate::ec::cdf_to_pdf;
1420
1421
    let probs_all = cdf_to_pdf(if fi.frame_type.has_inter() {
1422
      cw.get_cdf_intra_mode(bsize)
1423
    } else {
1424
      cw.get_cdf_intra_mode_kf(tile_bo)
1425
    });
1426
1427
    modes.try_extend_from_slice(intra_mode_set).unwrap();
1428
0
    modes.sort_by_key(|&a| !probs_all[a as usize]);
Unexecuted instantiation: rav1e::rdo::intra_frame_rdo_mode_decision::<u16>::{closure#0}
Unexecuted instantiation: rav1e::rdo::intra_frame_rdo_mode_decision::<u8>::{closure#0}
1429
  }
1430
1431
  // If tx partition (i.e. fi.tx_mode_select) is enabled, the below intra prediction screening
1432
  // may be improved by emulating prediction for each tx block.
1433
  {
1434
    let satds = {
1435
      // FIXME: If tx partition is used, this whole sads block should be fixed
1436
      let tx_size = bsize.tx_size();
1437
      let mut edge_buf = Aligned::uninit_array();
1438
      let edge_buf = {
1439
        let rec = &ts.rec.planes[0].as_const();
1440
        let po = tile_bo.plane_offset(rec.plane_cfg);
1441
        // FIXME: If tx partition is used, get_intra_edges() should be called for each tx block
1442
        get_intra_edges(
1443
          &mut edge_buf,
1444
          rec,
1445
          tile_bo,
1446
          0,
1447
          0,
1448
          bsize,
1449
          po,
1450
          tx_size,
1451
          fi.sequence.bit_depth,
1452
          None,
1453
          fi.sequence.enable_intra_edge_filter,
1454
          IntraParam::None,
1455
        )
1456
      };
1457
1458
      let ief_params = if fi.sequence.enable_intra_edge_filter {
1459
        let above_block_info = ts.above_block_info(tile_bo, 0, 0);
1460
        let left_block_info = ts.left_block_info(tile_bo, 0, 0);
1461
        Some(IntraEdgeFilterParameters::new(
1462
          0,
1463
          above_block_info,
1464
          left_block_info,
1465
        ))
1466
      } else {
1467
        None
1468
      };
1469
1470
      let mut satds_all = [0; INTRA_MODES];
1471
      for &luma_mode in modes.iter().skip(num_modes_rdo / 2) {
1472
        let tile_rect = ts.tile_rect();
1473
        let rec = &mut ts.rec.planes[0];
1474
        let mut rec_region =
1475
          rec.subregion_mut(Area::BlockStartingAt { bo: tile_bo.0 });
1476
        // FIXME: If tx partition is used, luma_mode.predict_intra() should be called for each tx block
1477
        luma_mode.predict_intra(
1478
          tile_rect,
1479
          &mut rec_region,
1480
          tx_size,
1481
          fi.sequence.bit_depth,
1482
          &[0i16; 2],
1483
          IntraParam::None,
1484
          if luma_mode.is_directional() { ief_params } else { None },
1485
          &edge_buf,
1486
          fi.cpu_feature_level,
1487
        );
1488
1489
        let plane_org = ts.input_tile.planes[0]
1490
          .subregion(Area::BlockStartingAt { bo: tile_bo.0 });
1491
        let plane_ref = rec_region.as_const();
1492
1493
        satds_all[luma_mode as usize] = get_satd(
1494
          &plane_org,
1495
          &plane_ref,
1496
          tx_size.width(),
1497
          tx_size.height(),
1498
          fi.sequence.bit_depth,
1499
          fi.cpu_feature_level,
1500
        );
1501
      }
1502
      satds_all
1503
    };
1504
1505
0
    modes[num_modes_rdo / 2..].sort_by_key(|&a| satds[a as usize]);
Unexecuted instantiation: rav1e::rdo::intra_frame_rdo_mode_decision::<u16>::{closure#1}
Unexecuted instantiation: rav1e::rdo::intra_frame_rdo_mode_decision::<u8>::{closure#1}
1506
  }
1507
1508
  debug_assert!(num_modes_rdo >= 1);
1509
1510
0
  modes.iter().take(num_modes_rdo).for_each(|&luma_mode| {
1511
0
    let mvs = [MotionVector::default(); 2];
1512
0
    let ref_frames = [INTRA_FRAME, NONE_FRAME];
1513
0
    let mut mode_set_chroma = ArrayVec::<_, 2>::new();
1514
0
    mode_set_chroma.push(luma_mode);
1515
0
    if is_chroma_block && luma_mode != PredictionMode::DC_PRED {
1516
0
      mode_set_chroma.push(PredictionMode::DC_PRED);
1517
0
    }
1518
0
    luma_chroma_mode_rdo(
1519
0
      luma_mode,
1520
0
      fi,
1521
0
      bsize,
1522
0
      tile_bo,
1523
0
      ts,
1524
0
      cw,
1525
0
      rdo_type,
1526
0
      cw_checkpoint,
1527
0
      &mut best,
1528
0
      mvs,
1529
0
      ref_frames,
1530
0
      &mode_set_chroma,
1531
      true,
1532
      0,
1533
0
      &ArrayVec::<CandidateMV, 9>::new(),
1534
0
      AngleDelta::default(),
1535
    );
1536
0
  });
Unexecuted instantiation: rav1e::rdo::intra_frame_rdo_mode_decision::<u16>::{closure#2}
Unexecuted instantiation: rav1e::rdo::intra_frame_rdo_mode_decision::<u8>::{closure#2}
1537
1538
  if fi.config.speed_settings.prediction.fine_directional_intra
1539
    && bsize >= BlockSize::BLOCK_8X8
1540
  {
1541
    // Find the best angle delta for the current best prediction mode
1542
    let luma_deltas = best.pred_mode_luma.angle_delta_count();
1543
    let chroma_deltas = best.pred_mode_chroma.angle_delta_count();
1544
1545
    let mvs = [MotionVector::default(); 2];
1546
    let ref_frames = [INTRA_FRAME, NONE_FRAME];
1547
    let mode_set_chroma = [best.pred_mode_chroma];
1548
    let mv_stack = ArrayVec::<_, 9>::new();
1549
    let mut best_angle_delta = best.angle_delta;
1550
0
    let mut angle_delta_rdo = |y, uv| -> AngleDelta {
1551
0
      if best.angle_delta.y != y || best.angle_delta.uv != uv {
1552
0
        luma_chroma_mode_rdo(
1553
0
          best.pred_mode_luma,
1554
0
          fi,
1555
0
          bsize,
1556
0
          tile_bo,
1557
0
          ts,
1558
0
          cw,
1559
0
          rdo_type,
1560
0
          cw_checkpoint,
1561
0
          &mut best,
1562
0
          mvs,
1563
0
          ref_frames,
1564
0
          &mode_set_chroma,
1565
0
          true,
1566
0
          0,
1567
0
          &mv_stack,
1568
0
          AngleDelta { y, uv },
1569
0
        );
1570
0
      }
1571
0
      best.angle_delta
1572
0
    };
Unexecuted instantiation: rav1e::rdo::intra_frame_rdo_mode_decision::<u16>::{closure#3}
Unexecuted instantiation: rav1e::rdo::intra_frame_rdo_mode_decision::<u8>::{closure#3}
1573
1574
    for i in 0..luma_deltas {
1575
      let angle_delta_y =
1576
        if luma_deltas == 1 { 0 } else { i - MAX_ANGLE_DELTA as i8 };
1577
      best_angle_delta = angle_delta_rdo(angle_delta_y, best_angle_delta.uv);
1578
    }
1579
    for j in 0..chroma_deltas {
1580
      let angle_delta_uv =
1581
        if chroma_deltas == 1 { 0 } else { j - MAX_ANGLE_DELTA as i8 };
1582
      best_angle_delta = angle_delta_rdo(best_angle_delta.y, angle_delta_uv);
1583
    }
1584
  }
1585
1586
  best
1587
}
1588
1589
/// # Panics
1590
///
1591
/// - If the block size is invalid for subsampling.
1592
#[profiling::function]
1593
pub fn rdo_cfl_alpha<T: Pixel>(
1594
  ts: &mut TileStateMut<'_, T>, tile_bo: TileBlockOffset, bsize: BlockSize,
1595
  luma_tx_size: TxSize, fi: &FrameInvariants<T>,
1596
) -> Option<CFLParams> {
1597
  let PlaneConfig { xdec, ydec, .. } = ts.input.planes[1].cfg;
1598
  let uv_tx_size = bsize.largest_chroma_tx_size(xdec, ydec);
1599
  debug_assert!(
1600
    bsize.subsampled_size(xdec, ydec).unwrap() == uv_tx_size.block_size()
1601
  );
1602
1603
  let frame_bo = ts.to_frame_block_offset(tile_bo);
1604
  let (visible_tx_w, visible_tx_h) = clip_visible_bsize(
1605
    (fi.width + xdec) >> xdec,
1606
    (fi.height + ydec) >> ydec,
1607
    uv_tx_size.block_size(),
1608
    (frame_bo.0.x << MI_SIZE_LOG2) >> xdec,
1609
    (frame_bo.0.y << MI_SIZE_LOG2) >> ydec,
1610
  );
1611
1612
  if visible_tx_w == 0 || visible_tx_h == 0 {
1613
    return None;
1614
  };
1615
  let mut ac = Aligned::<[MaybeUninit<i16>; 32 * 32]>::uninit_array();
1616
  let ac = luma_ac(&mut ac.data, ts, tile_bo, bsize, luma_tx_size, fi);
1617
  let best_alpha: ArrayVec<i16, 2> = (1..3)
1618
0
    .map(|p| {
1619
0
      let &PlaneConfig { xdec, ydec, .. } = ts.rec.planes[p].plane_cfg;
1620
0
      let tile_rect = ts.tile_rect().decimated(xdec, ydec);
1621
0
      let rec = &mut ts.rec.planes[p];
1622
0
      let input = &ts.input_tile.planes[p];
1623
0
      let po = tile_bo.plane_offset(rec.plane_cfg);
1624
0
      let mut edge_buf = Aligned::uninit_array();
1625
0
      let edge_buf = get_intra_edges(
1626
0
        &mut edge_buf,
1627
0
        &rec.as_const(),
1628
0
        tile_bo,
1629
        0,
1630
        0,
1631
0
        bsize,
1632
0
        po,
1633
0
        uv_tx_size,
1634
0
        fi.sequence.bit_depth,
1635
0
        Some(PredictionMode::UV_CFL_PRED),
1636
0
        fi.sequence.enable_intra_edge_filter,
1637
0
        IntraParam::None,
1638
      );
1639
0
      let mut alpha_cost = |alpha: i16| -> u64 {
1640
0
        let mut rec_region =
1641
0
          rec.subregion_mut(Area::BlockStartingAt { bo: tile_bo.0 });
1642
0
        PredictionMode::UV_CFL_PRED.predict_intra(
1643
0
          tile_rect,
1644
0
          &mut rec_region,
1645
0
          uv_tx_size,
1646
0
          fi.sequence.bit_depth,
1647
0
          ac,
1648
0
          IntraParam::Alpha(alpha),
1649
0
          None,
1650
0
          &edge_buf,
1651
0
          fi.cpu_feature_level,
1652
        );
1653
0
        sse_wxh(
1654
0
          &input.subregion(Area::BlockStartingAt { bo: tile_bo.0 }),
1655
0
          &rec_region.as_const(),
1656
0
          visible_tx_w,
1657
0
          visible_tx_h,
1658
0
          |_, _| DistortionScale::default(), // We're not doing RDO here.
Unexecuted instantiation: rav1e::rdo::rdo_cfl_alpha::<u16>::{closure#0}::{closure#0}::{closure#0}
Unexecuted instantiation: rav1e::rdo::rdo_cfl_alpha::<u8>::{closure#0}::{closure#0}::{closure#0}
1659
0
          fi.sequence.bit_depth,
1660
0
          fi.cpu_feature_level,
1661
        )
1662
        .0
1663
0
      };
Unexecuted instantiation: rav1e::rdo::rdo_cfl_alpha::<u16>::{closure#0}::{closure#0}
Unexecuted instantiation: rav1e::rdo::rdo_cfl_alpha::<u8>::{closure#0}::{closure#0}
1664
0
      let mut best = (alpha_cost(0), 0);
1665
0
      let mut count = 2;
1666
0
      for alpha in 1i16..=16i16 {
1667
0
        let cost = (alpha_cost(alpha), alpha_cost(-alpha));
1668
0
        if cost.0 < best.0 {
1669
0
          best = (cost.0, alpha);
1670
0
          count += 2;
1671
0
        }
1672
0
        if cost.1 < best.0 {
1673
0
          best = (cost.1, -alpha);
1674
0
          count += 2;
1675
0
        }
1676
0
        if count < alpha {
1677
0
          break;
1678
0
        }
1679
      }
1680
0
      best.1
1681
0
    })
Unexecuted instantiation: rav1e::rdo::rdo_cfl_alpha::<u16>::{closure#0}
Unexecuted instantiation: rav1e::rdo::rdo_cfl_alpha::<u8>::{closure#0}
1682
    .collect();
1683
1684
  if best_alpha[0] == 0 && best_alpha[1] == 0 {
1685
    None
1686
  } else {
1687
    Some(CFLParams::from_alpha(best_alpha[0], best_alpha[1]))
1688
  }
1689
}
1690
1691
/// RDO-based transform type decision
1692
/// If `cw_checkpoint` is `None`, a checkpoint for cw's (`ContextWriter`) current
1693
/// state is created and stored for later use.
1694
///
1695
/// # Panics
1696
///
1697
/// - If a writer checkpoint is never created before or within the function.
1698
///   This should never happen and indicates a development error.
1699
/// - If the best RD found is negative.
1700
///   This should never happen and indicates a development error.
1701
0
pub fn rdo_tx_type_decision<T: Pixel>(
1702
0
  fi: &FrameInvariants<T>, ts: &mut TileStateMut<'_, T>,
1703
0
  cw: &mut ContextWriter, cw_checkpoint: &mut Option<ContextWriterCheckpoint>,
1704
0
  mode: PredictionMode, ref_frames: [RefType; 2], mvs: [MotionVector; 2],
1705
0
  bsize: BlockSize, tile_bo: TileBlockOffset, tx_size: TxSize, tx_set: TxSet,
1706
0
  tx_types: &[TxType], cur_best_rd: f64,
1707
0
) -> (TxType, f64) {
1708
0
  let mut best_type = TxType::DCT_DCT;
1709
0
  let mut best_rd = f64::MAX;
1710
1711
0
  let PlaneConfig { xdec, ydec, .. } = ts.input.planes[1].cfg;
1712
0
  let is_chroma_block =
1713
0
    has_chroma(tile_bo, bsize, xdec, ydec, fi.sequence.chroma_sampling);
1714
1715
0
  let is_inter = !mode.is_intra();
1716
1717
0
  if cw_checkpoint.is_none() {
1718
0
    // Only run the first call
1719
0
    // Prevents creating multiple checkpoints for own version of cw
1720
0
    *cw_checkpoint =
1721
0
      Some(cw.checkpoint(&tile_bo, fi.sequence.chroma_sampling));
1722
0
  }
1723
1724
0
  let rdo_type = if fi.use_tx_domain_distortion {
1725
0
    RDOType::TxDistRealRate
1726
  } else {
1727
0
    RDOType::PixelDistRealRate
1728
  };
1729
0
  let need_recon_pixel = tx_size.block_size() != bsize && !is_inter;
1730
1731
0
  let mut first_iteration = true;
1732
0
  for &tx_type in tx_types {
1733
    // Skip unsupported transform types
1734
0
    if av1_tx_used[tx_set as usize][tx_type as usize] == 0 {
1735
0
      continue;
1736
0
    }
1737
1738
0
    if is_inter {
1739
0
      motion_compensate(
1740
0
        fi, ts, cw, mode, ref_frames, mvs, bsize, tile_bo, true,
1741
0
      );
1742
0
    }
1743
1744
0
    let mut wr = WriterCounter::new();
1745
0
    let tell = wr.tell_frac();
1746
0
    let (_, tx_dist) = if is_inter {
1747
0
      write_tx_tree(
1748
0
        fi,
1749
0
        ts,
1750
0
        cw,
1751
0
        &mut wr,
1752
0
        mode,
1753
        0,
1754
0
        tile_bo,
1755
0
        bsize,
1756
0
        tx_size,
1757
0
        tx_type,
1758
        false,
1759
        true,
1760
0
        rdo_type,
1761
0
        need_recon_pixel,
1762
      )
1763
    } else {
1764
0
      write_tx_blocks(
1765
0
        fi,
1766
0
        ts,
1767
0
        cw,
1768
0
        &mut wr,
1769
0
        mode,
1770
0
        mode,
1771
0
        AngleDelta::default(),
1772
0
        tile_bo,
1773
0
        bsize,
1774
0
        tx_size,
1775
0
        tx_type,
1776
        false,
1777
0
        CFLParams::default(), // Unused.
1778
        true,
1779
0
        rdo_type,
1780
0
        need_recon_pixel,
1781
      )
1782
    };
1783
1784
0
    let rate = wr.tell_frac() - tell;
1785
0
    let distortion = if fi.use_tx_domain_distortion {
1786
0
      compute_tx_distortion(
1787
0
        fi,
1788
0
        ts,
1789
0
        bsize,
1790
0
        is_chroma_block,
1791
0
        tile_bo,
1792
0
        tx_dist,
1793
        false,
1794
        true,
1795
      )
1796
    } else {
1797
0
      compute_distortion(fi, ts, bsize, is_chroma_block, tile_bo, true)
1798
    };
1799
0
    cw.rollback(cw_checkpoint.as_ref().unwrap());
1800
1801
0
    let rd = compute_rd_cost(fi, rate, distortion);
1802
1803
0
    if first_iteration {
1804
      // We use an optimization to early exit after testing the first
1805
      // transform type if the cost is higher than the existing best.
1806
      // The idea is that if this transform size is not better than he
1807
      // previous size, it is not worth testing remaining modes for this size.
1808
0
      if rd > cur_best_rd {
1809
0
        break;
1810
0
      }
1811
0
      first_iteration = false;
1812
0
    }
1813
1814
0
    if rd < best_rd {
1815
0
      best_rd = rd;
1816
0
      best_type = tx_type;
1817
0
    }
1818
  }
1819
1820
0
  assert!(best_rd >= 0_f64);
1821
1822
0
  (best_type, best_rd)
1823
0
}
Unexecuted instantiation: rav1e::rdo::rdo_tx_type_decision::<u16>
Unexecuted instantiation: rav1e::rdo::rdo_tx_type_decision::<u8>
1824
1825
0
pub fn get_sub_partitions(
1826
0
  four_partitions: &[TileBlockOffset; 4], partition: PartitionType,
1827
0
) -> ArrayVec<TileBlockOffset, 4> {
1828
0
  let mut partition_offsets = ArrayVec::<TileBlockOffset, 4>::new();
1829
1830
0
  partition_offsets.push(four_partitions[0]);
1831
1832
0
  if partition == PARTITION_NONE {
1833
0
    return partition_offsets;
1834
0
  }
1835
0
  if partition == PARTITION_VERT || partition == PARTITION_SPLIT {
1836
0
    partition_offsets.push(four_partitions[1]);
1837
0
  };
1838
0
  if partition == PARTITION_HORZ || partition == PARTITION_SPLIT {
1839
0
    partition_offsets.push(four_partitions[2]);
1840
0
  };
1841
0
  if partition == PARTITION_SPLIT {
1842
0
    partition_offsets.push(four_partitions[3]);
1843
0
  };
1844
1845
0
  partition_offsets
1846
0
}
1847
1848
#[inline(always)]
1849
0
fn rdo_partition_none<T: Pixel>(
1850
0
  fi: &FrameInvariants<T>, ts: &mut TileStateMut<'_, T>,
1851
0
  cw: &mut ContextWriter, bsize: BlockSize, tile_bo: TileBlockOffset,
1852
0
  inter_cfg: &InterConfig, child_modes: &mut ArrayVec<PartitionParameters, 4>,
1853
0
) -> f64 {
1854
0
  debug_assert!(tile_bo.0.x < ts.mi_width && tile_bo.0.y < ts.mi_height);
1855
1856
0
  let mode = rdo_mode_decision(fi, ts, cw, bsize, tile_bo, inter_cfg);
1857
0
  let cost = mode.rd_cost;
1858
1859
0
  child_modes.push(mode);
1860
1861
0
  cost
1862
0
}
Unexecuted instantiation: rav1e::rdo::rdo_partition_none::<u16>
Unexecuted instantiation: rav1e::rdo::rdo_partition_none::<u8>
1863
1864
// VERTICAL, HORIZONTAL or simple SPLIT
1865
#[inline(always)]
1866
0
fn rdo_partition_simple<T: Pixel, W: Writer>(
1867
0
  fi: &FrameInvariants<T>, ts: &mut TileStateMut<'_, T>,
1868
0
  cw: &mut ContextWriter, w_pre_cdef: &mut W, w_post_cdef: &mut W,
1869
0
  bsize: BlockSize, tile_bo: TileBlockOffset, inter_cfg: &InterConfig,
1870
0
  partition: PartitionType, rdo_type: RDOType, best_rd: f64,
1871
0
  child_modes: &mut ArrayVec<PartitionParameters, 4>,
1872
0
) -> Option<f64> {
1873
0
  debug_assert!(tile_bo.0.x < ts.mi_width && tile_bo.0.y < ts.mi_height);
1874
0
  let subsize = bsize.subsize(partition).unwrap();
1875
1876
0
  let cost = if bsize >= BlockSize::BLOCK_8X8 {
1877
0
    let w: &mut W = if cw.bc.cdef_coded { w_post_cdef } else { w_pre_cdef };
1878
0
    let tell = w.tell_frac();
1879
0
    cw.write_partition(w, tile_bo, partition, bsize);
1880
0
    compute_rd_cost(fi, w.tell_frac() - tell, ScaledDistortion::zero())
1881
  } else {
1882
0
    0.0
1883
  };
1884
1885
0
  let hbsw = subsize.width_mi(); // Half the block size width in blocks
1886
0
  let hbsh = subsize.height_mi(); // Half the block size height in blocks
1887
0
  let four_partitions = [
1888
0
    tile_bo,
1889
0
    TileBlockOffset(BlockOffset { x: tile_bo.0.x + hbsw, y: tile_bo.0.y }),
1890
0
    TileBlockOffset(BlockOffset { x: tile_bo.0.x, y: tile_bo.0.y + hbsh }),
1891
0
    TileBlockOffset(BlockOffset {
1892
0
      x: tile_bo.0.x + hbsw,
1893
0
      y: tile_bo.0.y + hbsh,
1894
0
    }),
1895
0
  ];
1896
1897
0
  let partitions = get_sub_partitions(&four_partitions, partition);
1898
1899
0
  let mut rd_cost_sum = 0.0;
1900
1901
0
  for offset in partitions {
1902
0
    let hbs = subsize.width_mi() >> 1;
1903
0
    let has_cols = offset.0.x + hbs < ts.mi_width;
1904
0
    let has_rows = offset.0.y + hbs < ts.mi_height;
1905
1906
0
    if has_cols && has_rows {
1907
0
      let mode_decision =
1908
0
        rdo_mode_decision(fi, ts, cw, subsize, offset, inter_cfg);
1909
1910
0
      rd_cost_sum += mode_decision.rd_cost;
1911
1912
0
      if fi.enable_early_exit && rd_cost_sum > best_rd {
1913
0
        return None;
1914
0
      }
1915
0
      if subsize >= BlockSize::BLOCK_8X8 && subsize.is_sqr() {
1916
0
        let w: &mut W =
1917
0
          if cw.bc.cdef_coded { w_post_cdef } else { w_pre_cdef };
1918
0
        cw.write_partition(w, offset, PartitionType::PARTITION_NONE, subsize);
1919
0
      }
1920
0
      encode_block_with_modes(
1921
0
        fi,
1922
0
        ts,
1923
0
        cw,
1924
0
        w_pre_cdef,
1925
0
        w_post_cdef,
1926
0
        subsize,
1927
0
        offset,
1928
0
        &mode_decision,
1929
0
        rdo_type,
1930
0
        None,
1931
      );
1932
0
      child_modes.push(mode_decision);
1933
    } else {
1934
      //rd_cost_sum += f64::MAX;
1935
0
      return None;
1936
    }
1937
  }
1938
1939
0
  Some(cost + rd_cost_sum)
1940
0
}
Unexecuted instantiation: rav1e::rdo::rdo_partition_simple::<u16, rav1e::ec::WriterBase<rav1e::ec::WriterRecorder>>
Unexecuted instantiation: rav1e::rdo::rdo_partition_simple::<u8, rav1e::ec::WriterBase<rav1e::ec::WriterRecorder>>
1941
1942
/// RDO-based single level partitioning decision
1943
///
1944
/// # Panics
1945
///
1946
/// - If the best RD found is negative.
1947
///   This should never happen, and indicates a development error.
1948
#[profiling::function]
1949
pub fn rdo_partition_decision<T: Pixel, W: Writer>(
1950
  fi: &FrameInvariants<T>, ts: &mut TileStateMut<'_, T>,
1951
  cw: &mut ContextWriter, w_pre_cdef: &mut W, w_post_cdef: &mut W,
1952
  bsize: BlockSize, tile_bo: TileBlockOffset,
1953
  cached_block: &PartitionGroupParameters, partition_types: &[PartitionType],
1954
  rdo_type: RDOType, inter_cfg: &InterConfig,
1955
) -> PartitionGroupParameters {
1956
  let mut best_partition = cached_block.part_type;
1957
  let mut best_rd = cached_block.rd_cost;
1958
  let mut best_pred_modes = cached_block.part_modes.clone();
1959
1960
  let cw_checkpoint = cw.checkpoint(&tile_bo, fi.sequence.chroma_sampling);
1961
  let w_pre_checkpoint = w_pre_cdef.checkpoint();
1962
  let w_post_checkpoint = w_post_cdef.checkpoint();
1963
1964
  for &partition in partition_types {
1965
    // Do not re-encode results we already have
1966
    if partition == cached_block.part_type {
1967
      continue;
1968
    }
1969
1970
    let mut child_modes = ArrayVec::<_, 4>::new();
1971
1972
    let cost = match partition {
1973
      PARTITION_NONE if bsize <= BlockSize::BLOCK_64X64 => {
1974
        Some(rdo_partition_none(
1975
          fi,
1976
          ts,
1977
          cw,
1978
          bsize,
1979
          tile_bo,
1980
          inter_cfg,
1981
          &mut child_modes,
1982
        ))
1983
      }
1984
      PARTITION_SPLIT | PARTITION_HORZ | PARTITION_VERT => {
1985
        rdo_partition_simple(
1986
          fi,
1987
          ts,
1988
          cw,
1989
          w_pre_cdef,
1990
          w_post_cdef,
1991
          bsize,
1992
          tile_bo,
1993
          inter_cfg,
1994
          partition,
1995
          rdo_type,
1996
          best_rd,
1997
          &mut child_modes,
1998
        )
1999
      }
2000
      _ => {
2001
        unreachable!();
2002
      }
2003
    };
2004
2005
    if let Some(rd) = cost {
2006
      if rd < best_rd {
2007
        best_rd = rd;
2008
        best_partition = partition;
2009
        best_pred_modes.clone_from(&child_modes);
2010
      }
2011
    }
2012
    cw.rollback(&cw_checkpoint);
2013
    w_pre_cdef.rollback(&w_pre_checkpoint);
2014
    w_post_cdef.rollback(&w_post_checkpoint);
2015
  }
2016
2017
  assert!(best_rd >= 0_f64);
2018
2019
  PartitionGroupParameters {
2020
    rd_cost: best_rd,
2021
    part_type: best_partition,
2022
    part_modes: best_pred_modes,
2023
  }
2024
}
2025
2026
#[profiling::function]
2027
fn rdo_loop_plane_error<T: Pixel>(
2028
  base_sbo: TileSuperBlockOffset, offset_sbo: TileSuperBlockOffset,
2029
  sb_w: usize, sb_h: usize, fi: &FrameInvariants<T>, ts: &TileStateMut<'_, T>,
2030
  blocks: &TileBlocks<'_>, test: &Frame<T>, src: &Tile<'_, T>, pli: usize,
2031
) -> ScaledDistortion {
2032
  let sb_w_blocks =
2033
    if fi.sequence.use_128x128_superblock { 16 } else { 8 } * sb_w;
2034
  let sb_h_blocks =
2035
    if fi.sequence.use_128x128_superblock { 16 } else { 8 } * sb_h;
2036
  // Each direction block is 8x8 in y, potentially smaller if subsampled in chroma
2037
  // accumulating in-frame and unpadded
2038
  let mut err = Distortion::zero();
2039
  for by in 0..sb_h_blocks {
2040
    for bx in 0..sb_w_blocks {
2041
      let loop_bo = offset_sbo.block_offset(bx << 1, by << 1);
2042
      if loop_bo.0.x < blocks.cols() && loop_bo.0.y < blocks.rows() {
2043
        let src_plane = &src.planes[pli];
2044
        let test_plane = &test.planes[pli];
2045
        let PlaneConfig { xdec, ydec, .. } = *src_plane.plane_cfg;
2046
        debug_assert_eq!(xdec, test_plane.cfg.xdec);
2047
        debug_assert_eq!(ydec, test_plane.cfg.ydec);
2048
2049
        // Unfortunately, our distortion biases are only available via
2050
        // Frame-absolute addressing, so we need a block offset
2051
        // relative to the full frame origin (not the tile or analysis
2052
        // area)
2053
        let frame_bo = (base_sbo + offset_sbo).block_offset(bx << 1, by << 1);
2054
        let bias = distortion_scale(
2055
          fi,
2056
          ts.to_frame_block_offset(frame_bo),
2057
          BlockSize::BLOCK_8X8,
2058
        );
2059
2060
        let src_region =
2061
          src_plane.subregion(Area::BlockStartingAt { bo: loop_bo.0 });
2062
        let test_region =
2063
          test_plane.region(Area::BlockStartingAt { bo: loop_bo.0 });
2064
2065
        err += if pli == 0 {
2066
          // For loop filters, We intentionally use cdef_dist even with
2067
          // `--tune Psnr`. Using SSE instead gives no PSNR gain but has a
2068
          // significant negative impact on other metrics and visual quality.
2069
          RawDistortion(cdef_dist_kernel(
2070
            &src_region,
2071
            &test_region,
2072
            8,
2073
            8,
2074
            fi.sequence.bit_depth,
2075
            fi.cpu_feature_level,
2076
          ) as u64)
2077
            * bias
2078
        } else {
2079
          sse_wxh(
2080
            &src_region,
2081
            &test_region,
2082
            8 >> xdec,
2083
            8 >> ydec,
2084
            |_, _| bias,
2085
            fi.sequence.bit_depth,
2086
            fi.cpu_feature_level,
2087
          )
2088
        };
2089
      }
2090
    }
2091
  }
2092
  err * fi.dist_scale[pli]
2093
}
2094
2095
/// Passed in a superblock offset representing the upper left corner of
2096
/// the LRU area we're optimizing.  This area covers the largest LRU in
2097
/// any of the present planes, but may consist of a number of
2098
/// superblocks and full, smaller LRUs in the other planes
2099
///
2100
/// # Panics
2101
///
2102
/// - If both CDEF and LRF are disabled.
2103
#[profiling::function]
2104
pub fn rdo_loop_decision<T: Pixel, W: Writer>(
2105
  base_sbo: TileSuperBlockOffset, fi: &FrameInvariants<T>,
2106
  ts: &mut TileStateMut<'_, T>, cw: &mut ContextWriter, w: &mut W,
2107
  deblock_p: bool,
2108
) {
2109
  let planes = if fi.sequence.chroma_sampling == ChromaSampling::Cs400 {
2110
    1
2111
  } else {
2112
    MAX_PLANES
2113
  };
2114
  assert!(fi.sequence.enable_cdef || fi.sequence.enable_restoration);
2115
  // Determine area of optimization: Which plane has the largest LRUs?
2116
  // How many LRUs for each?
2117
  let mut sb_w = 1; // how many superblocks wide the largest LRU
2118
                    // is/how many SBs we're processing (same thing)
2119
  let mut sb_h = 1; // how many superblocks wide the largest LRU
2120
                    // is/how many SBs we're processing (same thing)
2121
  let mut lru_w = [0; MAX_PLANES]; // how many LRUs we're processing
2122
  let mut lru_h = [0; MAX_PLANES]; // how many LRUs we're processing
2123
  for pli in 0..planes {
2124
    let sb_h_shift = ts.restoration.planes[pli].rp_cfg.sb_h_shift;
2125
    let sb_v_shift = ts.restoration.planes[pli].rp_cfg.sb_v_shift;
2126
    if sb_w < (1 << sb_h_shift) {
2127
      sb_w = 1 << sb_h_shift;
2128
    }
2129
    if sb_h < (1 << sb_v_shift) {
2130
      sb_h = 1 << sb_v_shift;
2131
    }
2132
  }
2133
  for pli in 0..planes {
2134
    let sb_h_shift = ts.restoration.planes[pli].rp_cfg.sb_h_shift;
2135
    let sb_v_shift = ts.restoration.planes[pli].rp_cfg.sb_v_shift;
2136
    lru_w[pli] = sb_w / (1 << sb_h_shift);
2137
    lru_h[pli] = sb_h / (1 << sb_v_shift);
2138
  }
2139
2140
  // The superblock width/height determinations may be calling for us
2141
  // to compute over superblocks that do not actually exist in the
2142
  // frame (off the right or lower edge).  Trim sb width/height down
2143
  // to actual superblocks.  Note that these last superblocks on the
2144
  // right/bottom may themselves still span the edge of the frame, but
2145
  // they do hold at least some visible pixels.
2146
  sb_w = sb_w.min(ts.sb_width - base_sbo.0.x);
2147
  sb_h = sb_h.min(ts.sb_height - base_sbo.0.y);
2148
2149
  // We have need to know the Y visible pixel limits as well (the
2150
  // sb_w/sb_h figures above can be used to determine how many
2151
  // allocated pixels, possibly beyond the visible frame, exist).
2152
  let crop_w =
2153
    fi.width - ((ts.sbo.0.x + base_sbo.0.x) << SUPERBLOCK_TO_PLANE_SHIFT);
2154
  let crop_h =
2155
    fi.height - ((ts.sbo.0.y + base_sbo.0.y) << SUPERBLOCK_TO_PLANE_SHIFT);
2156
  let pixel_w = crop_w.min(sb_w << SUPERBLOCK_TO_PLANE_SHIFT);
2157
  let pixel_h = crop_h.min(sb_h << SUPERBLOCK_TO_PLANE_SHIFT);
2158
2159
  // Based on `RestorationState::new`
2160
  const MAX_SB_SHIFT: usize = 4;
2161
  const MAX_SB_SIZE: usize = 1 << MAX_SB_SHIFT;
2162
  const MAX_LRU_SIZE: usize = MAX_SB_SIZE;
2163
2164
  // Static allocation relies on the "minimal LRU area for all N planes" invariant.
2165
  let mut best_index = [-1; MAX_SB_SIZE * MAX_SB_SIZE];
2166
  let mut best_lrf =
2167
    [[RestorationFilter::None; MAX_PLANES]; MAX_LRU_SIZE * MAX_LRU_SIZE];
2168
2169
  // due to imprecision in the reconstruction parameter solver, we
2170
  // need to make sure we don't fall into a limit cycle.  Track our
2171
  // best cost at LRF so that we can break if we get a solution that doesn't
2172
  // improve at the reconstruction stage.
2173
  let mut best_lrf_cost = [[-1.0; MAX_PLANES]; MAX_LRU_SIZE * MAX_LRU_SIZE];
2174
2175
  // sub-setted region of the TileBlocks for our working frame area.
2176
  // Note that the size of this subset is what signals CDEF as to the
2177
  // actual coded size.
2178
  let mut tileblocks_subset = cw.bc.blocks.subregion_mut(
2179
    base_sbo.block_offset(0, 0).0.x,
2180
    base_sbo.block_offset(0, 0).0.y,
2181
    sb_w << SUPERBLOCK_TO_BLOCK_SHIFT,
2182
    sb_h << SUPERBLOCK_TO_BLOCK_SHIFT,
2183
  );
2184
2185
  // cdef doesn't run on superblocks that are completely skipped.
2186
  // Determine which super blocks are marked as skipped so we can avoid running
2187
  // them. If all blocks are skipped, we can avoid some of the overhead related
2188
  // to setting up for cdef.
2189
  let mut cdef_skip = [true; MAX_SB_SIZE * MAX_SB_SIZE];
2190
  let mut cdef_skip_all = true;
2191
  if fi.sequence.enable_cdef {
2192
    for sby in 0..sb_h {
2193
      for sbx in 0..sb_w {
2194
        let blocks = tileblocks_subset.subregion(16 * sbx, 16 * sby, 16, 16);
2195
        let mut skip = true;
2196
        for y in 0..blocks.rows() {
2197
          for block in blocks[y].iter() {
2198
            skip &= block.skip;
2199
          }
2200
        }
2201
        cdef_skip[sby * MAX_SB_SIZE + sbx] = skip;
2202
        cdef_skip_all &= skip;
2203
      }
2204
    }
2205
  }
2206
2207
  // Unlike cdef, loop restoration will run regardless of whether blocks are
2208
  // skipped or not. At the same time, the most significant improvement will
2209
  // generally be from un-skipped blocks, so lru is only performed if there are
2210
  // un-skipped blocks.
2211
  // This should be the same as `cdef_skip_all`, except when cdef is disabled.
2212
  let mut lru_skip_all = true;
2213
  let mut lru_skip = [[true; MAX_PLANES]; MAX_LRU_SIZE * MAX_LRU_SIZE];
2214
  if fi.sequence.enable_restoration {
2215
    if fi.config.speed_settings.lru_on_skip {
2216
      lru_skip_all = false;
2217
      lru_skip = [[false; MAX_PLANES]; MAX_LRU_SIZE * MAX_LRU_SIZE];
2218
    } else {
2219
      for pli in 0..planes {
2220
        // width, in sb, of an LRU in this plane
2221
        let lru_sb_w = 1 << ts.restoration.planes[pli].rp_cfg.sb_h_shift;
2222
        // height, in sb, of an LRU in this plane
2223
        let lru_sb_h = 1 << ts.restoration.planes[pli].rp_cfg.sb_v_shift;
2224
        for lru_y in 0..lru_h[pli] {
2225
          // number of LRUs vertically
2226
          for lru_x in 0..lru_w[pli] {
2227
            // number of LRUs horizontally
2228
2229
            let loop_sbo = TileSuperBlockOffset(SuperBlockOffset {
2230
              x: lru_x * lru_sb_w,
2231
              y: lru_y * lru_sb_h,
2232
            });
2233
2234
            if !ts.restoration.has_restoration_unit(
2235
              base_sbo + loop_sbo,
2236
              pli,
2237
              false,
2238
            ) {
2239
              continue;
2240
            }
2241
2242
            let start = loop_sbo.block_offset(0, 0).0;
2243
            let size = TileSuperBlockOffset(SuperBlockOffset {
2244
              x: lru_sb_w,
2245
              y: lru_sb_h,
2246
            })
2247
            .block_offset(0, 0)
2248
            .0;
2249
2250
            let blocks =
2251
              tileblocks_subset.subregion(start.x, start.y, size.x, size.y);
2252
            let mut skip = true;
2253
            for y in 0..blocks.rows() {
2254
              for block in blocks[y].iter() {
2255
                skip &= block.skip;
2256
              }
2257
            }
2258
            lru_skip[lru_y * MAX_LRU_SIZE + lru_x][pli] = skip;
2259
            lru_skip_all &= skip;
2260
          }
2261
        }
2262
      }
2263
    }
2264
  }
2265
2266
  // Return early if all blocks are skipped for lru and cdef.
2267
  if lru_skip_all && cdef_skip_all {
2268
    return;
2269
  }
2270
2271
  // Loop filter RDO is an iterative process and we need temporary
2272
  // scratch data to hold the results of deblocking, cdef, and the
2273
  // loop reconstruction filter so that each can be partially updated
2274
  // without recomputing the entire stack.  Construct
2275
  // largest-LRU-sized frames for each, accounting for padding
2276
  // required by deblocking, cdef and [optionally] LR.
2277
  let mut rec_subset = ts
2278
    .rec
2279
    .subregion(Area::BlockRect {
2280
      bo: base_sbo.block_offset(0, 0).0,
2281
      width: (pixel_w + 7) >> 3 << 3,
2282
      height: (pixel_h + 7) >> 3 << 3,
2283
    })
2284
    .scratch_copy();
2285
2286
  // const, no need to copy, just need the subregion (but do zero the
2287
  // origin to match the other copies/new backing frames).
2288
  let src_subset = ts
2289
    .input_tile
2290
    .subregion(Area::BlockRect {
2291
      bo: base_sbo.block_offset(0, 0).0,
2292
      width: (pixel_w + 7) >> 3 << 3,
2293
      height: (pixel_h + 7) >> 3 << 3,
2294
    })
2295
    .home();
2296
2297
  if deblock_p {
2298
    // Find a good deblocking filter solution for the passed in area.
2299
    // This is not RDO of deblocking itself, merely a solution to get
2300
    // better results from CDEF/LRF RDO.
2301
    let deblock_levels = deblock_filter_optimize(
2302
      fi,
2303
      &rec_subset.as_tile(),
2304
      &src_subset,
2305
      &tileblocks_subset.as_const(),
2306
      crop_w,
2307
      crop_h,
2308
    );
2309
2310
    // Deblock the contents of our reconstruction copy.
2311
    if deblock_levels[0] != 0 || deblock_levels[1] != 0 {
2312
      // copy ts.deblock because we need to set some of our own values here
2313
      let mut deblock_copy = *ts.deblock;
2314
      deblock_copy.levels = deblock_levels;
2315
2316
      // finally, deblock the temp frame
2317
      deblock_filter_frame(
2318
        &deblock_copy,
2319
        &mut rec_subset.as_tile_mut(),
2320
        &tileblocks_subset.as_const(),
2321
        crop_w,
2322
        crop_h,
2323
        fi.sequence.bit_depth,
2324
        planes,
2325
      );
2326
    }
2327
  }
2328
2329
  let mut cdef_work =
2330
    if !cdef_skip_all { Some(rec_subset.clone()) } else { None };
2331
  let mut lrf_work = if !lru_skip_all {
2332
    Some(Frame {
2333
      planes: {
2334
0
        let new_plane = |pli: usize| {
2335
0
          let PlaneConfig { xdec, ydec, width, height, .. } =
2336
0
            rec_subset.planes[pli].cfg;
2337
0
          Plane::new(width, height, xdec, ydec, 0, 0)
2338
0
        };
Unexecuted instantiation: rav1e::rdo::rdo_loop_decision::<u16, rav1e::ec::WriterBase<rav1e::ec::WriterEncoder>>::{closure#0}
Unexecuted instantiation: rav1e::rdo::rdo_loop_decision::<u8, rav1e::ec::WriterBase<rav1e::ec::WriterEncoder>>::{closure#0}
2339
        [new_plane(0), new_plane(1), new_plane(2)]
2340
      },
2341
    })
2342
  } else {
2343
    None
2344
  };
2345
2346
  // Precompute directional analysis for CDEF
2347
  let cdef_data = {
2348
    if cdef_work.is_some() {
2349
      Some((
2350
        &rec_subset,
2351
        cdef_analyze_superblock_range(
2352
          fi,
2353
          &rec_subset,
2354
          &tileblocks_subset.as_const(),
2355
          sb_w,
2356
          sb_h,
2357
        ),
2358
      ))
2359
    } else {
2360
      None
2361
    }
2362
  };
2363
2364
  // CDEF/LRF decision iteration
2365
  // Start with a default of CDEF 0 and RestorationFilter::None
2366
  // Try all CDEF options for each sb with current LRF; if new CDEF+LRF choice is better, select it.
2367
  // Then try all LRF options with current CDEFs; if new CDEFs+LRF choice is better, select it.
2368
  // If LRF choice changed for any plane, repeat until no changes
2369
  // Limit iterations and where we break based on speed setting (in the TODO list ;-)
2370
  let mut cdef_change = true;
2371
  let mut lrf_change = true;
2372
  while cdef_change || lrf_change {
2373
    // search for improved cdef indices, superblock by superblock, if cdef is enabled.
2374
    if let (Some((rec_copy, cdef_dirs)), Some(cdef_ref)) =
2375
      (&cdef_data, &mut cdef_work.as_mut())
2376
    {
2377
      for sby in 0..sb_h {
2378
        for sbx in 0..sb_w {
2379
          // determine whether this superblock can be skipped
2380
          if cdef_skip[sby * MAX_SB_SIZE + sbx] {
2381
            continue;
2382
          }
2383
2384
          let prev_best_index = best_index[sby * sb_w + sbx];
2385
          let mut best_cost = -1.;
2386
          let mut best_new_index = -1i8;
2387
2388
          /* offset of the superblock we're currently testing within the larger
2389
          analysis area */
2390
          let loop_sbo =
2391
            TileSuperBlockOffset(SuperBlockOffset { x: sbx, y: sby });
2392
2393
          /* cdef index testing loop */
2394
          for cdef_index in 0..(1 << fi.cdef_bits) {
2395
            let mut err = ScaledDistortion::zero();
2396
            let mut rate = 0;
2397
2398
            cdef_filter_superblock(
2399
              fi,
2400
              &rec_subset,
2401
              &mut cdef_ref.as_tile_mut(),
2402
              &tileblocks_subset.as_const(),
2403
              loop_sbo,
2404
              cdef_index,
2405
              &cdef_dirs[sby * sb_w + sbx],
2406
            );
2407
            // apply LRF if any
2408
            for pli in 0..planes {
2409
              // We need the cropped-to-visible-frame area of this SB
2410
              let wh =
2411
                if fi.sequence.use_128x128_superblock { 128 } else { 64 };
2412
              let PlaneConfig { xdec, ydec, .. } = cdef_ref.planes[pli].cfg;
2413
              let vis_width = (wh >> xdec).min(
2414
                (crop_w >> xdec)
2415
                  - loop_sbo.plane_offset(&cdef_ref.planes[pli].cfg).x
2416
                    as usize,
2417
              );
2418
              let vis_height = (wh >> ydec).min(
2419
                (crop_h >> ydec)
2420
                  - loop_sbo.plane_offset(&cdef_ref.planes[pli].cfg).y
2421
                    as usize,
2422
              );
2423
              // which LRU are we currently testing against?
2424
              if let (Some((lru_x, lru_y)), Some(lrf_ref)) = {
2425
                let rp = &ts.restoration.planes[pli];
2426
                (
2427
                  rp.restoration_unit_offset(base_sbo, loop_sbo, false),
2428
                  &mut lrf_work,
2429
                )
2430
              } {
2431
                // We have a valid LRU, apply LRF, compute error
2432
                match best_lrf[lru_y * lru_w[pli] + lru_x][pli] {
2433
                  RestorationFilter::None => {
2434
                    err += rdo_loop_plane_error(
2435
                      base_sbo,
2436
                      loop_sbo,
2437
                      1,
2438
                      1,
2439
                      fi,
2440
                      ts,
2441
                      &tileblocks_subset.as_const(),
2442
                      cdef_ref,
2443
                      &src_subset,
2444
                      pli,
2445
                    );
2446
                    rate += if fi.sequence.enable_restoration {
2447
                      cw.fc.count_lrf_switchable(
2448
                        w,
2449
                        &ts.restoration.as_const(),
2450
                        best_lrf[lru_y * lru_w[pli] + lru_x][pli],
2451
                        pli,
2452
                      )
2453
                    } else {
2454
                      0 // no relative cost differeneces to different
2455
                        // CDEF params.  If cdef is on, it's a wash.
2456
                    };
2457
                  }
2458
                  RestorationFilter::Sgrproj { set, xqd } => {
2459
                    // only run on this single superblock
2460
                    let loop_po =
2461
                      loop_sbo.plane_offset(&cdef_ref.planes[pli].cfg);
2462
                    // todo: experiment with borrowing border pixels
2463
                    // rather than edge-extending. Right now this is
2464
                    // hard-clipping to the superblock boundary.
2465
                    setup_integral_image(
2466
                      &mut ts.integral_buffer,
2467
                      SOLVE_IMAGE_STRIDE,
2468
                      vis_width,
2469
                      vis_height,
2470
                      vis_width,
2471
                      vis_height,
2472
                      &cdef_ref.planes[pli].slice(loop_po),
2473
                      &cdef_ref.planes[pli].slice(loop_po),
2474
                    );
2475
                    sgrproj_stripe_filter(
2476
                      set,
2477
                      xqd,
2478
                      fi,
2479
                      &ts.integral_buffer,
2480
                      SOLVE_IMAGE_STRIDE,
2481
                      &cdef_ref.planes[pli].slice(loop_po),
2482
                      &mut lrf_ref.planes[pli].region_mut(Area::Rect {
2483
                        x: loop_po.x,
2484
                        y: loop_po.y,
2485
                        width: vis_width,
2486
                        height: vis_height,
2487
                      }),
2488
                    );
2489
                    err += rdo_loop_plane_error(
2490
                      base_sbo,
2491
                      loop_sbo,
2492
                      1,
2493
                      1,
2494
                      fi,
2495
                      ts,
2496
                      &tileblocks_subset.as_const(),
2497
                      lrf_ref,
2498
                      &src_subset,
2499
                      pli,
2500
                    );
2501
                    rate += cw.fc.count_lrf_switchable(
2502
                      w,
2503
                      &ts.restoration.as_const(),
2504
                      best_lrf[lru_y * lru_w[pli] + lru_x][pli],
2505
                      pli,
2506
                    );
2507
                  }
2508
                  RestorationFilter::Wiener { .. } => unreachable!(), // coming soon
2509
                }
2510
              } else {
2511
                // No actual LRU here, compute error directly from CDEF output.
2512
                err += rdo_loop_plane_error(
2513
                  base_sbo,
2514
                  loop_sbo,
2515
                  1,
2516
                  1,
2517
                  fi,
2518
                  ts,
2519
                  &tileblocks_subset.as_const(),
2520
                  cdef_ref,
2521
                  &src_subset,
2522
                  pli,
2523
                );
2524
                // no relative cost differeneces to different
2525
                // CDEF params.  If cdef is on, it's a wash.
2526
                // rate += 0;
2527
              }
2528
            }
2529
2530
            let cost = compute_rd_cost(fi, rate, err);
2531
            if best_cost < 0. || cost < best_cost {
2532
              best_cost = cost;
2533
              best_new_index = cdef_index as i8;
2534
            }
2535
          }
2536
2537
          // Did we change any preexisting choices?
2538
          if best_new_index != prev_best_index {
2539
            cdef_change = true;
2540
            best_index[sby * sb_w + sbx] = best_new_index;
2541
            tileblocks_subset.set_cdef(loop_sbo, best_new_index as u8);
2542
          }
2543
2544
          let mut cdef_ref_tm = TileMut::new(
2545
            cdef_ref,
2546
            TileRect {
2547
              x: 0,
2548
              y: 0,
2549
              width: cdef_ref.planes[0].cfg.width,
2550
              height: cdef_ref.planes[0].cfg.height,
2551
            },
2552
          );
2553
2554
          // Keep cdef output up to date; we need it for restoration
2555
          // both below and above (padding)
2556
          cdef_filter_superblock(
2557
            fi,
2558
            rec_copy,
2559
            &mut cdef_ref_tm,
2560
            &tileblocks_subset.as_const(),
2561
            loop_sbo,
2562
            best_index[sby * sb_w + sbx] as u8,
2563
            &cdef_dirs[sby * sb_w + sbx],
2564
          );
2565
        }
2566
      }
2567
    }
2568
2569
    if !cdef_change {
2570
      break;
2571
    }
2572
    cdef_change = false;
2573
    lrf_change = false;
2574
2575
    // search for improved restoration filter parameters if restoration is enabled
2576
    if let Some(lrf_ref) = &mut lrf_work.as_mut() {
2577
      let lrf_input = if cdef_work.is_some() {
2578
        // When CDEF is enabled, we pull from the CDEF output
2579
        cdef_work.as_ref().unwrap()
2580
      } else {
2581
        // When CDEF is disabled, we pull from the [optionally
2582
        // deblocked] reconstruction
2583
        &rec_subset
2584
      };
2585
      for pli in 0..planes {
2586
        // Nominal size of LRU in pixels before clipping to visible frame
2587
        let unit_size = ts.restoration.planes[pli].rp_cfg.unit_size;
2588
        // width, in sb, of an LRU in this plane
2589
        let lru_sb_w = 1 << ts.restoration.planes[pli].rp_cfg.sb_h_shift;
2590
        // height, in sb, of an LRU in this plane
2591
        let lru_sb_h = 1 << ts.restoration.planes[pli].rp_cfg.sb_v_shift;
2592
        let PlaneConfig { xdec, ydec, .. } = lrf_ref.planes[pli].cfg;
2593
        for lru_y in 0..lru_h[pli] {
2594
          // number of LRUs vertically
2595
          for lru_x in 0..lru_w[pli] {
2596
            // number of LRUs horizontally
2597
2598
            // determine whether this lru should be skipped
2599
            if lru_skip[lru_y * MAX_LRU_SIZE + lru_x][pli] {
2600
              continue;
2601
            }
2602
2603
            let loop_sbo = TileSuperBlockOffset(SuperBlockOffset {
2604
              x: lru_x * lru_sb_w,
2605
              y: lru_y * lru_sb_h,
2606
            });
2607
            if ts.restoration.has_restoration_unit(
2608
              base_sbo + loop_sbo,
2609
              pli,
2610
              false,
2611
            ) {
2612
              let src_plane = &src_subset.planes[pli]; // uncompressed input for reference
2613
              let lrf_in_plane = &lrf_input.planes[pli];
2614
              let lrf_po = loop_sbo.plane_offset(src_plane.plane_cfg);
2615
              let mut best_new_lrf = best_lrf[lru_y * lru_w[pli] + lru_x][pli];
2616
              let mut best_cost =
2617
                best_lrf_cost[lru_y * lru_w[pli] + lru_x][pli];
2618
2619
              // Check the no filter option
2620
              {
2621
                let err = rdo_loop_plane_error(
2622
                  base_sbo,
2623
                  loop_sbo,
2624
                  lru_sb_w,
2625
                  lru_sb_h,
2626
                  fi,
2627
                  ts,
2628
                  &tileblocks_subset.as_const(),
2629
                  lrf_input,
2630
                  &src_subset,
2631
                  pli,
2632
                );
2633
                let rate = cw.fc.count_lrf_switchable(
2634
                  w,
2635
                  &ts.restoration.as_const(),
2636
                  best_new_lrf,
2637
                  pli,
2638
                );
2639
2640
                let cost = compute_rd_cost(fi, rate, err);
2641
                // Was this choice actually an improvement?
2642
                if best_cost < 0. || cost < best_cost {
2643
                  best_cost = cost;
2644
                  best_lrf_cost[lru_y * lru_w[pli] + lru_x][pli] = cost;
2645
                  best_new_lrf = RestorationFilter::None;
2646
                }
2647
              }
2648
2649
              // Look for a self guided filter
2650
              // We need the cropped-to-visible-frame computation area of this LRU
2651
              let vis_width = unit_size.min(
2652
                (crop_w >> xdec)
2653
                  - loop_sbo.plane_offset(&lrf_ref.planes[pli].cfg).x as usize,
2654
              );
2655
              let vis_height = unit_size.min(
2656
                (crop_h >> ydec)
2657
                  - loop_sbo.plane_offset(&lrf_ref.planes[pli].cfg).y as usize,
2658
              );
2659
2660
              // todo: experiment with borrowing border pixels
2661
              // rather than edge-extending. Right now this is
2662
              // hard-clipping to the superblock boundary.
2663
              setup_integral_image(
2664
                &mut ts.integral_buffer,
2665
                SOLVE_IMAGE_STRIDE,
2666
                vis_width,
2667
                vis_height,
2668
                vis_width,
2669
                vis_height,
2670
                &lrf_in_plane.slice(lrf_po),
2671
                &lrf_in_plane.slice(lrf_po),
2672
              );
2673
2674
              for &set in get_sgr_sets(fi.config.speed_settings.sgr_complexity)
2675
              {
2676
                let (xqd0, xqd1) = sgrproj_solve(
2677
                  set,
2678
                  fi,
2679
                  &ts.integral_buffer,
2680
                  &src_plane
2681
                    .subregion(Area::StartingAt { x: lrf_po.x, y: lrf_po.y }),
2682
                  &lrf_in_plane.slice(lrf_po),
2683
                  vis_width,
2684
                  vis_height,
2685
                );
2686
                let current_lrf =
2687
                  RestorationFilter::Sgrproj { set, xqd: [xqd0, xqd1] };
2688
                if let RestorationFilter::Sgrproj { set, xqd } = current_lrf {
2689
                  sgrproj_stripe_filter(
2690
                    set,
2691
                    xqd,
2692
                    fi,
2693
                    &ts.integral_buffer,
2694
                    SOLVE_IMAGE_STRIDE,
2695
                    &lrf_in_plane.slice(lrf_po),
2696
                    &mut lrf_ref.planes[pli].region_mut(Area::Rect {
2697
                      x: lrf_po.x,
2698
                      y: lrf_po.y,
2699
                      width: vis_width,
2700
                      height: vis_height,
2701
                    }),
2702
                  );
2703
                }
2704
                let err = rdo_loop_plane_error(
2705
                  base_sbo,
2706
                  loop_sbo,
2707
                  lru_sb_w,
2708
                  lru_sb_h,
2709
                  fi,
2710
                  ts,
2711
                  &tileblocks_subset.as_const(),
2712
                  lrf_ref,
2713
                  &src_subset,
2714
                  pli,
2715
                );
2716
                let rate = cw.fc.count_lrf_switchable(
2717
                  w,
2718
                  &ts.restoration.as_const(),
2719
                  current_lrf,
2720
                  pli,
2721
                );
2722
                let cost = compute_rd_cost(fi, rate, err);
2723
                if cost < best_cost {
2724
                  best_cost = cost;
2725
                  best_lrf_cost[lru_y * lru_w[pli] + lru_x][pli] = cost;
2726
                  best_new_lrf = current_lrf;
2727
                }
2728
              }
2729
2730
              if best_lrf[lru_y * lru_w[pli] + lru_x][pli]
2731
                .notequal(best_new_lrf)
2732
              {
2733
                best_lrf[lru_y * lru_w[pli] + lru_x][pli] = best_new_lrf;
2734
                lrf_change = true;
2735
                if let Some(ru) = ts.restoration.planes[pli]
2736
                  .restoration_unit_mut(base_sbo + loop_sbo)
2737
                {
2738
                  ru.filter = best_new_lrf;
2739
                }
2740
              }
2741
            }
2742
          }
2743
        }
2744
      }
2745
    }
2746
  }
2747
}
2748
2749
#[test]
2750
fn estimate_rate_test() {
2751
  assert_eq!(estimate_rate(0, TxSize::TX_4X4, 0), RDO_RATE_TABLE[0][0][0]);
2752
}