/rust/registry/src/index.crates.io-1949cf8c6b5b557f/rav1e-0.8.1/src/rdo.rs
Line | Count | Source |
1 | | // Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved |
2 | | // Copyright (c) 2017-2022, The rav1e contributors. All rights reserved |
3 | | // |
4 | | // This source code is subject to the terms of the BSD 2 Clause License and |
5 | | // the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License |
6 | | // was not distributed with this source code in the LICENSE file, you can |
7 | | // obtain it at www.aomedia.org/license/software. If the Alliance for Open |
8 | | // Media Patent License 1.0 was not distributed with this source code in the |
9 | | // PATENTS file, you can obtain it at www.aomedia.org/license/patent. |
10 | | |
11 | | #![allow(non_camel_case_types)] |
12 | | |
13 | | use std::fmt; |
14 | | use std::mem::MaybeUninit; |
15 | | |
16 | | use arrayvec::*; |
17 | | use itertools::izip; |
18 | | |
19 | | use crate::api::*; |
20 | | use crate::cdef::*; |
21 | | use crate::context::*; |
22 | | use crate::cpu_features::CpuFeatureLevel; |
23 | | use crate::deblock::*; |
24 | | use crate::dist::*; |
25 | | use crate::ec::{Writer, WriterCounter, OD_BITRES}; |
26 | | use crate::encode_block_with_modes; |
27 | | use crate::encoder::{FrameInvariants, IMPORTANCE_BLOCK_SIZE}; |
28 | | use crate::frame::*; |
29 | | use crate::header::ReferenceMode; |
30 | | use crate::lrf::*; |
31 | | use crate::mc::MotionVector; |
32 | | use crate::me::estimate_motion; |
33 | | use crate::me::MVSamplingMode; |
34 | | use crate::me::MotionSearchResult; |
35 | | use crate::motion_compensate; |
36 | | use crate::partition::PartitionType::*; |
37 | | use crate::partition::RefType::*; |
38 | | use crate::partition::*; |
39 | | use crate::predict::{ |
40 | | luma_ac, AngleDelta, IntraEdgeFilterParameters, IntraParam, PredictionMode, |
41 | | RAV1E_INTER_COMPOUND_MODES, RAV1E_INTER_MODES_MINIMAL, RAV1E_INTRA_MODES, |
42 | | }; |
43 | | use crate::rdo_tables::*; |
44 | | use crate::tiling::*; |
45 | | use crate::transform::{TxSet, TxSize, TxType, RAV1E_TX_TYPES}; |
46 | | use crate::util::{init_slice_repeat_mut, Aligned, Pixel}; |
47 | | use crate::write_tx_blocks; |
48 | | use crate::write_tx_tree; |
49 | | use crate::Tune; |
50 | | use crate::{encode_block_post_cdef, encode_block_pre_cdef}; |
51 | | |
52 | | #[derive(Copy, Clone, PartialEq, Eq)] |
53 | | pub enum RDOType { |
54 | | PixelDistRealRate, |
55 | | TxDistRealRate, |
56 | | TxDistEstRate, |
57 | | } |
58 | | |
59 | | impl RDOType { |
60 | | #[inline] |
61 | 0 | pub const fn needs_tx_dist(self) -> bool { |
62 | 0 | match self { |
63 | | // Pixel-domain distortion and exact ec rate |
64 | 0 | RDOType::PixelDistRealRate => false, |
65 | | // Tx-domain distortion and exact ec rate |
66 | 0 | RDOType::TxDistRealRate => true, |
67 | | // Tx-domain distortion and txdist-based rate |
68 | 0 | RDOType::TxDistEstRate => true, |
69 | | } |
70 | 0 | } Unexecuted instantiation: <rav1e::rdo::RDOType>::needs_tx_dist Unexecuted instantiation: <rav1e::rdo::RDOType>::needs_tx_dist |
71 | | #[inline] |
72 | 0 | pub const fn needs_coeff_rate(self) -> bool { |
73 | 0 | match self { |
74 | 0 | RDOType::PixelDistRealRate => true, |
75 | 0 | RDOType::TxDistRealRate => true, |
76 | 0 | RDOType::TxDistEstRate => false, |
77 | | } |
78 | 0 | } Unexecuted instantiation: <rav1e::rdo::RDOType>::needs_coeff_rate Unexecuted instantiation: <rav1e::rdo::RDOType>::needs_coeff_rate |
79 | | } |
80 | | |
81 | | #[derive(Clone)] |
82 | | pub struct PartitionGroupParameters { |
83 | | pub rd_cost: f64, |
84 | | pub part_type: PartitionType, |
85 | | pub part_modes: ArrayVec<PartitionParameters, 4>, |
86 | | } |
87 | | |
88 | | #[derive(Clone, Debug)] |
89 | | pub struct PartitionParameters { |
90 | | pub rd_cost: f64, |
91 | | pub bo: TileBlockOffset, |
92 | | pub bsize: BlockSize, |
93 | | pub pred_mode_luma: PredictionMode, |
94 | | pub pred_mode_chroma: PredictionMode, |
95 | | pub pred_cfl_params: CFLParams, |
96 | | pub angle_delta: AngleDelta, |
97 | | pub ref_frames: [RefType; 2], |
98 | | pub mvs: [MotionVector; 2], |
99 | | pub skip: bool, |
100 | | pub has_coeff: bool, |
101 | | pub tx_size: TxSize, |
102 | | pub tx_type: TxType, |
103 | | pub sidx: u8, |
104 | | } |
105 | | |
106 | | impl Default for PartitionParameters { |
107 | 0 | fn default() -> Self { |
108 | 0 | PartitionParameters { |
109 | 0 | rd_cost: f64::MAX, |
110 | 0 | bo: TileBlockOffset::default(), |
111 | 0 | bsize: BlockSize::BLOCK_32X32, |
112 | 0 | pred_mode_luma: PredictionMode::default(), |
113 | 0 | pred_mode_chroma: PredictionMode::default(), |
114 | 0 | pred_cfl_params: CFLParams::default(), |
115 | 0 | angle_delta: AngleDelta::default(), |
116 | 0 | ref_frames: [RefType::INTRA_FRAME, RefType::NONE_FRAME], |
117 | 0 | mvs: [MotionVector::default(); 2], |
118 | 0 | skip: false, |
119 | 0 | has_coeff: true, |
120 | 0 | tx_size: TxSize::TX_4X4, |
121 | 0 | tx_type: TxType::DCT_DCT, |
122 | 0 | sidx: 0, |
123 | 0 | } |
124 | 0 | } |
125 | | } |
126 | | |
127 | 0 | pub fn estimate_rate(qindex: u8, ts: TxSize, fast_distortion: u64) -> u64 { |
128 | 0 | let bs_index = ts as usize; |
129 | 0 | let q_bin_idx = (qindex as usize) / RDO_QUANT_DIV; |
130 | 0 | let bin_idx_down = |
131 | 0 | ((fast_distortion) / RATE_EST_BIN_SIZE).min((RDO_NUM_BINS - 2) as u64); |
132 | 0 | let bin_idx_up = (bin_idx_down + 1).min((RDO_NUM_BINS - 1) as u64); |
133 | 0 | let x0 = (bin_idx_down * RATE_EST_BIN_SIZE) as i64; |
134 | 0 | let x1 = (bin_idx_up * RATE_EST_BIN_SIZE) as i64; |
135 | 0 | let y0 = RDO_RATE_TABLE[q_bin_idx][bs_index][bin_idx_down as usize] as i64; |
136 | 0 | let y1 = RDO_RATE_TABLE[q_bin_idx][bs_index][bin_idx_up as usize] as i64; |
137 | 0 | let slope = ((y1 - y0) << 8) / (x1 - x0); |
138 | 0 | (y0 + (((fast_distortion as i64 - x0) * slope) >> 8)).max(0) as u64 |
139 | 0 | } |
140 | | |
141 | | #[allow(unused)] |
142 | 0 | pub fn cdef_dist_wxh<T: Pixel, F: Fn(Area, BlockSize) -> DistortionScale>( |
143 | 0 | src1: &PlaneRegion<'_, T>, src2: &PlaneRegion<'_, T>, w: usize, h: usize, |
144 | 0 | bit_depth: usize, compute_bias: F, cpu: CpuFeatureLevel, |
145 | 0 | ) -> Distortion { |
146 | 0 | debug_assert!(src1.plane_cfg.xdec == 0); |
147 | 0 | debug_assert!(src1.plane_cfg.ydec == 0); |
148 | 0 | debug_assert!(src2.plane_cfg.xdec == 0); |
149 | 0 | debug_assert!(src2.plane_cfg.ydec == 0); |
150 | | |
151 | 0 | let mut sum = Distortion::zero(); |
152 | 0 | for y in (0..h).step_by(8) { |
153 | 0 | for x in (0..w).step_by(8) { |
154 | 0 | let kernel_h = (h - y).min(8); |
155 | 0 | let kernel_w = (w - x).min(8); |
156 | 0 | let area = Area::StartingAt { x: x as isize, y: y as isize }; |
157 | 0 |
|
158 | 0 | let value = RawDistortion(cdef_dist_kernel( |
159 | 0 | &src1.subregion(area), |
160 | 0 | &src2.subregion(area), |
161 | 0 | kernel_w, |
162 | 0 | kernel_h, |
163 | 0 | bit_depth, |
164 | 0 | cpu, |
165 | 0 | ) as u64); |
166 | 0 |
|
167 | 0 | // cdef is always called on non-subsampled planes, so BLOCK_8X8 is |
168 | 0 | // correct here. |
169 | 0 | sum += value * compute_bias(area, BlockSize::BLOCK_8X8); |
170 | 0 | } |
171 | | } |
172 | 0 | sum |
173 | 0 | } Unexecuted instantiation: rav1e::rdo::cdef_dist_wxh::<u16, rav1e::rdo::compute_distortion<u16>::{closure#0}>Unexecuted instantiation: rav1e::rdo::cdef_dist_wxh::<u8, rav1e::rdo::compute_distortion<u8>::{closure#0}> |
174 | | |
175 | | /// Sum of Squared Error for a wxh block |
176 | | /// Currently limited to w and h of valid blocks |
177 | 0 | pub fn sse_wxh<T: Pixel, F: Fn(Area, BlockSize) -> DistortionScale>( |
178 | 0 | src1: &PlaneRegion<'_, T>, src2: &PlaneRegion<'_, T>, w: usize, h: usize, |
179 | 0 | compute_bias: F, bit_depth: usize, cpu: CpuFeatureLevel, |
180 | 0 | ) -> Distortion { |
181 | | // See get_weighted_sse in src/dist.rs. |
182 | | // Provide a scale to get_weighted_sse for each square region of this size. |
183 | | const CHUNK_SIZE: usize = IMPORTANCE_BLOCK_SIZE >> 1; |
184 | | |
185 | | // To bias the distortion correctly, compute it in blocks up to the size |
186 | | // importance block size in a non-subsampled plane. |
187 | 0 | let imp_block_w = CHUNK_SIZE << src1.plane_cfg.xdec; |
188 | 0 | let imp_block_h = CHUNK_SIZE << src1.plane_cfg.ydec; |
189 | | |
190 | 0 | let imp_bsize = BlockSize::from_width_and_height(imp_block_w, imp_block_h); |
191 | | |
192 | 0 | let n_imp_blocks_w = w.div_ceil(CHUNK_SIZE); |
193 | 0 | let n_imp_blocks_h = h.div_ceil(CHUNK_SIZE); |
194 | | |
195 | | // TODO: Copying biases into a buffer is slow. It would be best if biases were |
196 | | // passed directly. To do this, we would need different versions of the |
197 | | // weighted sse function for decimated/subsampled data. Also requires |
198 | | // eliminating use of unbiased sse. |
199 | | // It should also be noted that the current copy code does not auto-vectorize. |
200 | | |
201 | | // Copy biases into a buffer. |
202 | 0 | let mut buf_storage = Aligned::new( |
203 | 0 | [MaybeUninit::<u32>::uninit(); 128 / CHUNK_SIZE * 128 / CHUNK_SIZE], |
204 | | ); |
205 | 0 | let buf_stride = n_imp_blocks_w.next_power_of_two(); |
206 | 0 | let buf = init_slice_repeat_mut( |
207 | 0 | &mut buf_storage.data[..buf_stride * n_imp_blocks_h], |
208 | | 0, |
209 | | ); |
210 | | |
211 | 0 | for block_y in 0..n_imp_blocks_h { |
212 | 0 | for block_x in 0..n_imp_blocks_w { |
213 | 0 | let block = Area::StartingAt { |
214 | 0 | x: (block_x * CHUNK_SIZE) as isize, |
215 | 0 | y: (block_y * CHUNK_SIZE) as isize, |
216 | 0 | }; |
217 | 0 | buf[block_y * buf_stride + block_x] = compute_bias(block, imp_bsize).0; |
218 | 0 | } |
219 | | } |
220 | | |
221 | 0 | Distortion(get_weighted_sse( |
222 | 0 | src1, src2, buf, buf_stride, w, h, bit_depth, cpu, |
223 | 0 | )) |
224 | 0 | } Unexecuted instantiation: rav1e::rdo::sse_wxh::<u16, rav1e::rdo::compute_distortion<u16>::{closure#2}>Unexecuted instantiation: rav1e::rdo::sse_wxh::<u16, rav1e::rdo::compute_distortion<u16>::{closure#1}>Unexecuted instantiation: rav1e::rdo::sse_wxh::<u16, rav1e::rdo::rdo_loop_plane_error<u16>::{closure#0}>Unexecuted instantiation: rav1e::rdo::sse_wxh::<u16, rav1e::rdo::compute_tx_distortion<u16>::{closure#0}>Unexecuted instantiation: rav1e::rdo::sse_wxh::<u16, rav1e::rdo::compute_tx_distortion<u16>::{closure#1}>Unexecuted instantiation: rav1e::rdo::sse_wxh::<u16, rav1e::rdo::rdo_cfl_alpha<u16>::{closure#0}::{closure#0}::{closure#0}>Unexecuted instantiation: rav1e::rdo::sse_wxh::<u8, rav1e::rdo::compute_distortion<u8>::{closure#2}>Unexecuted instantiation: rav1e::rdo::sse_wxh::<u8, rav1e::rdo::compute_distortion<u8>::{closure#1}>Unexecuted instantiation: rav1e::rdo::sse_wxh::<u8, rav1e::rdo::rdo_loop_plane_error<u8>::{closure#0}>Unexecuted instantiation: rav1e::rdo::sse_wxh::<u8, rav1e::rdo::compute_tx_distortion<u8>::{closure#0}>Unexecuted instantiation: rav1e::rdo::sse_wxh::<u8, rav1e::rdo::compute_tx_distortion<u8>::{closure#1}>Unexecuted instantiation: rav1e::rdo::sse_wxh::<u8, rav1e::rdo::rdo_cfl_alpha<u8>::{closure#0}::{closure#0}::{closure#0}> |
225 | | |
226 | | // TODO consider saturating_sub later |
227 | | #[allow(clippy::implicit_saturating_sub)] |
228 | 0 | pub const fn clip_visible_bsize( |
229 | 0 | frame_w: usize, frame_h: usize, bsize: BlockSize, x: usize, y: usize, |
230 | 0 | ) -> (usize, usize) { |
231 | 0 | let blk_w = bsize.width(); |
232 | 0 | let blk_h = bsize.height(); |
233 | | |
234 | 0 | let visible_w: usize = if x + blk_w <= frame_w { |
235 | 0 | blk_w |
236 | 0 | } else if x >= frame_w { |
237 | 0 | 0 |
238 | | } else { |
239 | 0 | frame_w - x |
240 | | }; |
241 | | |
242 | 0 | let visible_h: usize = if y + blk_h <= frame_h { |
243 | 0 | blk_h |
244 | 0 | } else if y >= frame_h { |
245 | 0 | 0 |
246 | | } else { |
247 | 0 | frame_h - y |
248 | | }; |
249 | | |
250 | 0 | (visible_w, visible_h) |
251 | 0 | } |
252 | | |
253 | | // Compute the pixel-domain distortion for an encode |
254 | 0 | fn compute_distortion<T: Pixel>( |
255 | 0 | fi: &FrameInvariants<T>, ts: &TileStateMut<'_, T>, bsize: BlockSize, |
256 | 0 | is_chroma_block: bool, tile_bo: TileBlockOffset, luma_only: bool, |
257 | 0 | ) -> ScaledDistortion { |
258 | 0 | let area = Area::BlockStartingAt { bo: tile_bo.0 }; |
259 | 0 | let input_region = ts.input_tile.planes[0].subregion(area); |
260 | 0 | let rec_region = ts.rec.planes[0].subregion(area); |
261 | | |
262 | | // clip a block to have visible pixles only |
263 | 0 | let frame_bo = ts.to_frame_block_offset(tile_bo); |
264 | 0 | let (visible_w, visible_h) = clip_visible_bsize( |
265 | 0 | fi.width, |
266 | 0 | fi.height, |
267 | 0 | bsize, |
268 | 0 | frame_bo.0.x << MI_SIZE_LOG2, |
269 | 0 | frame_bo.0.y << MI_SIZE_LOG2, |
270 | 0 | ); |
271 | | |
272 | 0 | if visible_w == 0 || visible_h == 0 { |
273 | 0 | return ScaledDistortion::zero(); |
274 | 0 | } |
275 | | |
276 | 0 | let mut distortion = match fi.config.tune { |
277 | 0 | Tune::Psychovisual => cdef_dist_wxh( |
278 | 0 | &input_region, |
279 | 0 | &rec_region, |
280 | 0 | visible_w, |
281 | 0 | visible_h, |
282 | 0 | fi.sequence.bit_depth, |
283 | 0 | |bias_area, bsize| { |
284 | 0 | distortion_scale( |
285 | 0 | fi, |
286 | 0 | input_region.subregion(bias_area).frame_block_offset(), |
287 | 0 | bsize, |
288 | | ) |
289 | 0 | }, Unexecuted instantiation: rav1e::rdo::compute_distortion::<u16>::{closure#0}Unexecuted instantiation: rav1e::rdo::compute_distortion::<u8>::{closure#0} |
290 | 0 | fi.cpu_feature_level, |
291 | | ), |
292 | 0 | Tune::Psnr => sse_wxh( |
293 | 0 | &input_region, |
294 | 0 | &rec_region, |
295 | 0 | visible_w, |
296 | 0 | visible_h, |
297 | 0 | |bias_area, bsize| { |
298 | 0 | distortion_scale( |
299 | 0 | fi, |
300 | 0 | input_region.subregion(bias_area).frame_block_offset(), |
301 | 0 | bsize, |
302 | | ) |
303 | 0 | }, Unexecuted instantiation: rav1e::rdo::compute_distortion::<u16>::{closure#1}Unexecuted instantiation: rav1e::rdo::compute_distortion::<u8>::{closure#1} |
304 | 0 | fi.sequence.bit_depth, |
305 | 0 | fi.cpu_feature_level, |
306 | | ), |
307 | 0 | } * fi.dist_scale[0]; |
308 | | |
309 | 0 | if is_chroma_block |
310 | 0 | && !luma_only |
311 | 0 | && fi.sequence.chroma_sampling != ChromaSampling::Cs400 |
312 | | { |
313 | 0 | let PlaneConfig { xdec, ydec, .. } = ts.input.planes[1].cfg; |
314 | 0 | let chroma_w = if bsize.width() >= 8 || xdec == 0 { |
315 | 0 | (visible_w + xdec) >> xdec |
316 | | } else { |
317 | 0 | (4 + visible_w + xdec) >> xdec |
318 | | }; |
319 | 0 | let chroma_h = if bsize.height() >= 8 || ydec == 0 { |
320 | 0 | (visible_h + ydec) >> ydec |
321 | | } else { |
322 | 0 | (4 + visible_h + ydec) >> ydec |
323 | | }; |
324 | | |
325 | 0 | for p in 1..3 { |
326 | 0 | let input_region = ts.input_tile.planes[p].subregion(area); |
327 | 0 | let rec_region = ts.rec.planes[p].subregion(area); |
328 | 0 | distortion += sse_wxh( |
329 | 0 | &input_region, |
330 | 0 | &rec_region, |
331 | 0 | chroma_w, |
332 | 0 | chroma_h, |
333 | 0 | |bias_area, bsize| { |
334 | 0 | distortion_scale( |
335 | 0 | fi, |
336 | 0 | input_region.subregion(bias_area).frame_block_offset(), |
337 | 0 | bsize, |
338 | | ) |
339 | 0 | }, Unexecuted instantiation: rav1e::rdo::compute_distortion::<u16>::{closure#2}Unexecuted instantiation: rav1e::rdo::compute_distortion::<u8>::{closure#2} |
340 | 0 | fi.sequence.bit_depth, |
341 | 0 | fi.cpu_feature_level, |
342 | 0 | ) * fi.dist_scale[p]; |
343 | | } |
344 | 0 | } |
345 | 0 | distortion |
346 | 0 | } Unexecuted instantiation: rav1e::rdo::compute_distortion::<u16> Unexecuted instantiation: rav1e::rdo::compute_distortion::<u8> |
347 | | |
348 | | // Compute the transform-domain distortion for an encode |
349 | 0 | fn compute_tx_distortion<T: Pixel>( |
350 | 0 | fi: &FrameInvariants<T>, ts: &TileStateMut<'_, T>, bsize: BlockSize, |
351 | 0 | is_chroma_block: bool, tile_bo: TileBlockOffset, tx_dist: ScaledDistortion, |
352 | 0 | skip: bool, luma_only: bool, |
353 | 0 | ) -> ScaledDistortion { |
354 | 0 | assert!(fi.config.tune == Tune::Psnr); |
355 | 0 | let area = Area::BlockStartingAt { bo: tile_bo.0 }; |
356 | 0 | let input_region = ts.input_tile.planes[0].subregion(area); |
357 | 0 | let rec_region = ts.rec.planes[0].subregion(area); |
358 | | |
359 | 0 | let (visible_w, visible_h) = if !skip { |
360 | 0 | (bsize.width(), bsize.height()) |
361 | | } else { |
362 | 0 | let frame_bo = ts.to_frame_block_offset(tile_bo); |
363 | 0 | clip_visible_bsize( |
364 | 0 | fi.width, |
365 | 0 | fi.height, |
366 | 0 | bsize, |
367 | 0 | frame_bo.0.x << MI_SIZE_LOG2, |
368 | 0 | frame_bo.0.y << MI_SIZE_LOG2, |
369 | | ) |
370 | | }; |
371 | | |
372 | 0 | if visible_w == 0 || visible_h == 0 { |
373 | 0 | return ScaledDistortion::zero(); |
374 | 0 | } |
375 | | |
376 | 0 | let mut distortion = if skip { |
377 | 0 | sse_wxh( |
378 | 0 | &input_region, |
379 | 0 | &rec_region, |
380 | 0 | visible_w, |
381 | 0 | visible_h, |
382 | 0 | |bias_area, bsize| { |
383 | 0 | distortion_scale( |
384 | 0 | fi, |
385 | 0 | input_region.subregion(bias_area).frame_block_offset(), |
386 | 0 | bsize, |
387 | | ) |
388 | 0 | }, Unexecuted instantiation: rav1e::rdo::compute_tx_distortion::<u16>::{closure#0}Unexecuted instantiation: rav1e::rdo::compute_tx_distortion::<u8>::{closure#0} |
389 | 0 | fi.sequence.bit_depth, |
390 | 0 | fi.cpu_feature_level, |
391 | 0 | ) * fi.dist_scale[0] |
392 | | } else { |
393 | 0 | tx_dist |
394 | | }; |
395 | | |
396 | 0 | if is_chroma_block |
397 | 0 | && !luma_only |
398 | 0 | && skip |
399 | 0 | && fi.sequence.chroma_sampling != ChromaSampling::Cs400 |
400 | | { |
401 | 0 | let PlaneConfig { xdec, ydec, .. } = ts.input.planes[1].cfg; |
402 | 0 | let chroma_w = if bsize.width() >= 8 || xdec == 0 { |
403 | 0 | (visible_w + xdec) >> xdec |
404 | | } else { |
405 | 0 | (4 + visible_w + xdec) >> xdec |
406 | | }; |
407 | 0 | let chroma_h = if bsize.height() >= 8 || ydec == 0 { |
408 | 0 | (visible_h + ydec) >> ydec |
409 | | } else { |
410 | 0 | (4 + visible_h + ydec) >> ydec |
411 | | }; |
412 | | |
413 | 0 | for p in 1..3 { |
414 | 0 | let input_region = ts.input_tile.planes[p].subregion(area); |
415 | 0 | let rec_region = ts.rec.planes[p].subregion(area); |
416 | 0 | distortion += sse_wxh( |
417 | 0 | &input_region, |
418 | 0 | &rec_region, |
419 | 0 | chroma_w, |
420 | 0 | chroma_h, |
421 | 0 | |bias_area, bsize| { |
422 | 0 | distortion_scale( |
423 | 0 | fi, |
424 | 0 | input_region.subregion(bias_area).frame_block_offset(), |
425 | 0 | bsize, |
426 | | ) |
427 | 0 | }, Unexecuted instantiation: rav1e::rdo::compute_tx_distortion::<u16>::{closure#1}Unexecuted instantiation: rav1e::rdo::compute_tx_distortion::<u8>::{closure#1} |
428 | 0 | fi.sequence.bit_depth, |
429 | 0 | fi.cpu_feature_level, |
430 | 0 | ) * fi.dist_scale[p]; |
431 | | } |
432 | 0 | } |
433 | 0 | distortion |
434 | 0 | } Unexecuted instantiation: rav1e::rdo::compute_tx_distortion::<u16> Unexecuted instantiation: rav1e::rdo::compute_tx_distortion::<u8> |
435 | | |
436 | | /// Compute a scaling factor to multiply the distortion of a block by, |
437 | | /// this factor is determined using temporal RDO. |
438 | | /// |
439 | | /// # Panics |
440 | | /// |
441 | | /// - If called with `bsize` of 8x8 or smaller |
442 | | /// - If the coded frame data doesn't exist on the `FrameInvariants` |
443 | 0 | pub fn distortion_scale<T: Pixel>( |
444 | 0 | fi: &FrameInvariants<T>, frame_bo: PlaneBlockOffset, bsize: BlockSize, |
445 | 0 | ) -> DistortionScale { |
446 | 0 | if !fi.config.temporal_rdo() { |
447 | 0 | return DistortionScale::default(); |
448 | 0 | } |
449 | | // EncoderConfig::temporal_rdo() should always return false in situations |
450 | | // where distortion is computed on > 8x8 blocks, so we should never hit this |
451 | | // assert. |
452 | 0 | assert!(bsize <= BlockSize::BLOCK_8X8); |
453 | | |
454 | 0 | let x = frame_bo.0.x >> IMPORTANCE_BLOCK_TO_BLOCK_SHIFT; |
455 | 0 | let y = frame_bo.0.y >> IMPORTANCE_BLOCK_TO_BLOCK_SHIFT; |
456 | | |
457 | 0 | let coded_data = fi.coded_frame_data.as_ref().unwrap(); |
458 | 0 | coded_data.distortion_scales[y * coded_data.w_in_imp_b + x] |
459 | 0 | } Unexecuted instantiation: rav1e::rdo::distortion_scale::<u16> Unexecuted instantiation: rav1e::rdo::distortion_scale::<u8> |
460 | | |
461 | | /// # Panics |
462 | | /// |
463 | | /// - If the coded frame data doesn't exist on the `FrameInvariants` |
464 | 0 | pub fn spatiotemporal_scale<T: Pixel>( |
465 | 0 | fi: &FrameInvariants<T>, frame_bo: PlaneBlockOffset, bsize: BlockSize, |
466 | 0 | ) -> DistortionScale { |
467 | 0 | if !fi.config.temporal_rdo() && fi.config.tune != Tune::Psychovisual { |
468 | 0 | return DistortionScale::default(); |
469 | 0 | } |
470 | | |
471 | 0 | let coded_data = fi.coded_frame_data.as_ref().unwrap(); |
472 | | |
473 | 0 | let x0 = frame_bo.0.x >> IMPORTANCE_BLOCK_TO_BLOCK_SHIFT; |
474 | 0 | let y0 = frame_bo.0.y >> IMPORTANCE_BLOCK_TO_BLOCK_SHIFT; |
475 | 0 | let x1 = (x0 + bsize.width_imp_b()).min(coded_data.w_in_imp_b); |
476 | 0 | let y1 = (y0 + bsize.height_imp_b()).min(coded_data.h_in_imp_b); |
477 | 0 | let den = (((x1 - x0) * (y1 - y0)) as u64) << DistortionScale::SHIFT; |
478 | | |
479 | | // calling this on each slice individually improves autovectorization |
480 | | // compared to using `Iterator::take` |
481 | | #[inline(always)] |
482 | 0 | fn take_slice<T>(slice: &[T], n: usize) -> &[T] { |
483 | 0 | slice.get(..n).unwrap_or(slice) |
484 | 0 | } Unexecuted instantiation: rav1e::rdo::spatiotemporal_scale::take_slice::<rav1e::rdo::DistortionScale> Unexecuted instantiation: rav1e::rdo::spatiotemporal_scale::take_slice::<rav1e::rdo::DistortionScale> |
485 | | |
486 | 0 | let mut sum = 0; |
487 | 0 | for y in y0..y1 { |
488 | 0 | sum += take_slice( |
489 | 0 | &coded_data.distortion_scales[y * coded_data.w_in_imp_b..][x0..x1], |
490 | 0 | MAX_SB_IN_IMP_B, |
491 | 0 | ) |
492 | 0 | .iter() |
493 | 0 | .zip( |
494 | 0 | take_slice( |
495 | 0 | &coded_data.activity_scales[y * coded_data.w_in_imp_b..][x0..x1], |
496 | 0 | MAX_SB_IN_IMP_B, |
497 | 0 | ) |
498 | 0 | .iter(), |
499 | | ) |
500 | 0 | .map(|(d, a)| d.0 as u64 * a.0 as u64) Unexecuted instantiation: rav1e::rdo::spatiotemporal_scale::<u16>::{closure#0}Unexecuted instantiation: rav1e::rdo::spatiotemporal_scale::<u8>::{closure#0} |
501 | 0 | .sum::<u64>(); |
502 | | } |
503 | 0 | DistortionScale(((sum + (den >> 1)) / den) as u32) |
504 | 0 | } Unexecuted instantiation: rav1e::rdo::spatiotemporal_scale::<u16> Unexecuted instantiation: rav1e::rdo::spatiotemporal_scale::<u8> |
505 | | |
506 | 0 | pub fn distortion_scale_for( |
507 | 0 | propagate_cost: f64, intra_cost: f64, |
508 | 0 | ) -> DistortionScale { |
509 | | // The mbtree paper \cite{mbtree} uses the following formula: |
510 | | // |
511 | | // QP_delta = -strength * log2(1 + (propagate_cost / intra_cost)) |
512 | | // |
513 | | // Since this is H.264, this corresponds to the following quantizer: |
514 | | // |
515 | | // Q' = Q * 2^(QP_delta/6) |
516 | | // |
517 | | // Since lambda is proportial to Q^2, this means we want to minimize: |
518 | | // |
519 | | // D + lambda' * R |
520 | | // = D + 2^(QP_delta / 3) * lambda * R |
521 | | // |
522 | | // If we want to keep lambda fixed, we can instead scale distortion and |
523 | | // minimize: |
524 | | // |
525 | | // D * scale + lambda * R |
526 | | // |
527 | | // where: |
528 | | // |
529 | | // scale = 2^(QP_delta / -3) |
530 | | // = (1 + (propagate_cost / intra_cost))^(strength / 3) |
531 | | // |
532 | | // The original paper empirically chooses strength = 2.0, but strength = 1.0 |
533 | | // seems to work best in rav1e currently, this may have something to do with |
534 | | // the fact that they use 16x16 blocks whereas our "importance blocks" are |
535 | | // 8x8, but everything should be scale invariant here so that's weird. |
536 | | // |
537 | | // @article{mbtree, |
538 | | // title={A novel macroblock-tree algorithm for high-performance |
539 | | // optimization of dependent video coding in H.264/AVC}, |
540 | | // author={Garrett-Glaser, Jason}, |
541 | | // journal={Tech. Rep.}, |
542 | | // year={2009}, |
543 | | // url={https://pdfs.semanticscholar.org/032f/1ab7d9db385780a02eb2d579af8303b266d2.pdf} |
544 | | // } |
545 | | |
546 | 0 | if intra_cost == 0. { |
547 | 0 | return DistortionScale::default(); // no scaling |
548 | 0 | } |
549 | | |
550 | 0 | let strength = 1.0; // empirical, see comment above |
551 | 0 | let frac = (intra_cost + propagate_cost) / intra_cost; |
552 | 0 | frac.powf(strength / 3.0).into() |
553 | 0 | } |
554 | | |
555 | | /// Fixed point arithmetic version of distortion scale |
556 | | #[repr(transparent)] |
557 | | #[derive(Copy, Clone)] |
558 | | pub struct DistortionScale(pub u32); |
559 | | |
560 | | #[repr(transparent)] |
561 | | pub struct RawDistortion(u64); |
562 | | |
563 | | #[repr(transparent)] |
564 | | pub struct Distortion(pub u64); |
565 | | |
566 | | #[repr(transparent)] |
567 | | pub struct ScaledDistortion(u64); |
568 | | |
569 | | impl DistortionScale { |
570 | | /// Bits past the radix point |
571 | | const SHIFT: u32 = 14; |
572 | | /// Number of bits used. Determines the max value. |
573 | | /// 28 bits is quite excessive. |
574 | | const BITS: u32 = 28; |
575 | | /// Maximum internal value |
576 | | const MAX: u64 = (1 << Self::BITS) - 1; |
577 | | |
578 | | #[inline] |
579 | 0 | pub const fn new(num: u64, den: u64) -> Self { |
580 | 0 | let raw = (num << Self::SHIFT).saturating_add(den / 2) / den; |
581 | 0 | let mask = (raw <= Self::MAX) as u64; |
582 | 0 | Self((mask * raw + (1 - mask) * Self::MAX) as u32) |
583 | 0 | } Unexecuted instantiation: <rav1e::rdo::DistortionScale>::new Unexecuted instantiation: <rav1e::rdo::DistortionScale>::new |
584 | | |
585 | 0 | pub fn inv_mean(slice: &[Self]) -> Self { |
586 | | use crate::util::{bexp64, blog32_q11}; |
587 | 0 | let sum = slice.iter().map(|&s| blog32_q11(s.0) as i64).sum::<i64>(); |
588 | 0 | let log_inv_mean_q11 = |
589 | 0 | (Self::SHIFT << 11) as i64 - sum / slice.len() as i64; |
590 | 0 | Self( |
591 | 0 | bexp64((log_inv_mean_q11 + (Self::SHIFT << 11) as i64) << (57 - 11)) |
592 | 0 | .clamp(1, (1 << Self::BITS) - 1) as u32, |
593 | 0 | ) |
594 | 0 | } |
595 | | |
596 | | /// Binary logarithm in Q11 |
597 | | #[inline] |
598 | 0 | pub const fn blog16(self) -> i16 { |
599 | | use crate::util::blog32_q11; |
600 | 0 | (blog32_q11(self.0) - ((Self::SHIFT as i32) << 11)) as i16 |
601 | 0 | } Unexecuted instantiation: <rav1e::rdo::DistortionScale>::blog16 Unexecuted instantiation: <rav1e::rdo::DistortionScale>::blog16 |
602 | | |
603 | | /// Binary logarithm in Q57 |
604 | | #[inline] |
605 | 0 | pub const fn blog64(self) -> i64 { |
606 | | use crate::util::{blog64, q57}; |
607 | 0 | blog64(self.0 as i64) - q57(Self::SHIFT as i32) |
608 | 0 | } Unexecuted instantiation: <rav1e::rdo::DistortionScale>::blog64 Unexecuted instantiation: <rav1e::rdo::DistortionScale>::blog64 |
609 | | |
610 | | /// Multiply, round and shift |
611 | | /// Internal implementation, so don't use multiply trait. |
612 | | #[inline] |
613 | 0 | pub const fn mul_u64(self, dist: u64) -> u64 { |
614 | 0 | (self.0 as u64 * dist + (1 << Self::SHIFT >> 1)) >> Self::SHIFT |
615 | 0 | } Unexecuted instantiation: <rav1e::rdo::DistortionScale>::mul_u64 Unexecuted instantiation: <rav1e::rdo::DistortionScale>::mul_u64 |
616 | | } |
617 | | |
618 | | impl std::ops::Mul for DistortionScale { |
619 | | type Output = Self; |
620 | | |
621 | | /// Multiply, round and shift |
622 | | #[inline] |
623 | 0 | fn mul(self, rhs: Self) -> Self { |
624 | 0 | Self( |
625 | 0 | (((self.0 as u64 * rhs.0 as u64) + (1 << (Self::SHIFT - 1))) |
626 | 0 | >> Self::SHIFT) |
627 | 0 | .clamp(1, (1 << Self::BITS) - 1) as u32, |
628 | 0 | ) |
629 | 0 | } Unexecuted instantiation: <rav1e::rdo::DistortionScale as core::ops::arith::Mul>::mul Unexecuted instantiation: <rav1e::rdo::DistortionScale as core::ops::arith::Mul>::mul |
630 | | } |
631 | | |
632 | | impl std::ops::MulAssign for DistortionScale { |
633 | 0 | fn mul_assign(&mut self, rhs: Self) { |
634 | 0 | *self = *self * rhs; |
635 | 0 | } |
636 | | } |
637 | | |
638 | | // Default value for DistortionScale is a fixed point 1 |
639 | | impl Default for DistortionScale { |
640 | | #[inline] |
641 | 0 | fn default() -> Self { |
642 | 0 | Self(1 << Self::SHIFT) |
643 | 0 | } Unexecuted instantiation: <rav1e::rdo::DistortionScale as core::default::Default>::default Unexecuted instantiation: <rav1e::rdo::DistortionScale as core::default::Default>::default |
644 | | } |
645 | | |
646 | | impl fmt::Debug for DistortionScale { |
647 | 0 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
648 | 0 | write!(f, "{}", f64::from(*self)) |
649 | 0 | } |
650 | | } |
651 | | |
652 | | impl From<f64> for DistortionScale { |
653 | | #[inline] |
654 | 0 | fn from(scale: f64) -> Self { |
655 | 0 | let den = 1 << (Self::SHIFT + 1); |
656 | 0 | Self::new((scale * den as f64) as u64, den) |
657 | 0 | } |
658 | | } |
659 | | |
660 | | impl From<DistortionScale> for f64 { |
661 | | #[inline] |
662 | 0 | fn from(scale: DistortionScale) -> Self { |
663 | 0 | scale.0 as f64 / (1 << DistortionScale::SHIFT) as f64 |
664 | 0 | } |
665 | | } |
666 | | |
667 | | impl RawDistortion { |
668 | | #[inline] |
669 | 0 | pub const fn new(dist: u64) -> Self { |
670 | 0 | Self(dist) |
671 | 0 | } Unexecuted instantiation: <rav1e::rdo::RawDistortion>::new Unexecuted instantiation: <rav1e::rdo::RawDistortion>::new |
672 | | } |
673 | | |
674 | | impl std::ops::Mul<DistortionScale> for RawDistortion { |
675 | | type Output = Distortion; |
676 | | #[inline] |
677 | 0 | fn mul(self, rhs: DistortionScale) -> Distortion { |
678 | 0 | Distortion(rhs.mul_u64(self.0)) |
679 | 0 | } Unexecuted instantiation: <rav1e::rdo::RawDistortion as core::ops::arith::Mul<rav1e::rdo::DistortionScale>>::mul Unexecuted instantiation: <rav1e::rdo::RawDistortion as core::ops::arith::Mul<rav1e::rdo::DistortionScale>>::mul |
680 | | } |
681 | | |
682 | | impl Distortion { |
683 | | #[inline] |
684 | 0 | pub const fn zero() -> Self { |
685 | 0 | Self(0) |
686 | 0 | } Unexecuted instantiation: <rav1e::rdo::Distortion>::zero Unexecuted instantiation: <rav1e::rdo::Distortion>::zero |
687 | | } |
688 | | |
689 | | impl std::ops::Mul<DistortionScale> for Distortion { |
690 | | type Output = ScaledDistortion; |
691 | | #[inline] |
692 | 0 | fn mul(self, rhs: DistortionScale) -> ScaledDistortion { |
693 | 0 | ScaledDistortion(rhs.mul_u64(self.0)) |
694 | 0 | } Unexecuted instantiation: <rav1e::rdo::Distortion as core::ops::arith::Mul<rav1e::rdo::DistortionScale>>::mul Unexecuted instantiation: <rav1e::rdo::Distortion as core::ops::arith::Mul<rav1e::rdo::DistortionScale>>::mul |
695 | | } |
696 | | |
697 | | impl std::ops::AddAssign for Distortion { |
698 | | #[inline] |
699 | 0 | fn add_assign(&mut self, other: Self) { |
700 | 0 | self.0 += other.0; |
701 | 0 | } Unexecuted instantiation: <rav1e::rdo::Distortion as core::ops::arith::AddAssign>::add_assign Unexecuted instantiation: <rav1e::rdo::Distortion as core::ops::arith::AddAssign>::add_assign |
702 | | } |
703 | | |
704 | | impl ScaledDistortion { |
705 | | #[inline] |
706 | 0 | pub const fn zero() -> Self { |
707 | 0 | Self(0) |
708 | 0 | } Unexecuted instantiation: <rav1e::rdo::ScaledDistortion>::zero Unexecuted instantiation: <rav1e::rdo::ScaledDistortion>::zero |
709 | | } |
710 | | |
711 | | impl std::ops::AddAssign for ScaledDistortion { |
712 | | #[inline] |
713 | 0 | fn add_assign(&mut self, other: Self) { |
714 | 0 | self.0 += other.0; |
715 | 0 | } Unexecuted instantiation: <rav1e::rdo::ScaledDistortion as core::ops::arith::AddAssign>::add_assign Unexecuted instantiation: <rav1e::rdo::ScaledDistortion as core::ops::arith::AddAssign>::add_assign |
716 | | } |
717 | | |
718 | 0 | pub fn compute_rd_cost<T: Pixel>( |
719 | 0 | fi: &FrameInvariants<T>, rate: u32, distortion: ScaledDistortion, |
720 | 0 | ) -> f64 { |
721 | 0 | let rate_in_bits = (rate as f64) / ((1 << OD_BITRES) as f64); |
722 | 0 | fi.lambda.mul_add(rate_in_bits, distortion.0 as f64) |
723 | 0 | } Unexecuted instantiation: rav1e::rdo::compute_rd_cost::<u16> Unexecuted instantiation: rav1e::rdo::compute_rd_cost::<u8> |
724 | | |
725 | 0 | pub fn rdo_tx_size_type<T: Pixel>( |
726 | 0 | fi: &FrameInvariants<T>, ts: &mut TileStateMut<'_, T>, |
727 | 0 | cw: &mut ContextWriter, bsize: BlockSize, tile_bo: TileBlockOffset, |
728 | 0 | luma_mode: PredictionMode, ref_frames: [RefType; 2], mvs: [MotionVector; 2], |
729 | 0 | skip: bool, |
730 | 0 | ) -> (TxSize, TxType) { |
731 | 0 | let is_inter = !luma_mode.is_intra(); |
732 | 0 | let mut tx_size = max_txsize_rect_lookup[bsize as usize]; |
733 | | |
734 | 0 | if fi.enable_inter_txfm_split && is_inter && !skip { |
735 | 0 | tx_size = sub_tx_size_map[tx_size as usize]; // Always choose one level split size |
736 | 0 | } |
737 | | |
738 | 0 | let mut best_tx_type = TxType::DCT_DCT; |
739 | 0 | let mut best_tx_size = tx_size; |
740 | 0 | let mut best_rd = f64::MAX; |
741 | | |
742 | 0 | let do_rdo_tx_size = fi.tx_mode_select |
743 | 0 | && fi.config.speed_settings.transform.rdo_tx_decision |
744 | 0 | && !is_inter; |
745 | 0 | let rdo_tx_depth = if do_rdo_tx_size { 2 } else { 0 }; |
746 | 0 | let mut cw_checkpoint: Option<ContextWriterCheckpoint> = None; |
747 | | |
748 | 0 | for _ in 0..=rdo_tx_depth { |
749 | 0 | let tx_set = get_tx_set(tx_size, is_inter, fi.use_reduced_tx_set); |
750 | | |
751 | 0 | let do_rdo_tx_type = tx_set > TxSet::TX_SET_DCTONLY |
752 | 0 | && fi.config.speed_settings.transform.rdo_tx_decision |
753 | 0 | && !is_inter |
754 | 0 | && !skip; |
755 | | |
756 | 0 | if !do_rdo_tx_size && !do_rdo_tx_type { |
757 | 0 | return (best_tx_size, best_tx_type); |
758 | 0 | }; |
759 | | |
760 | 0 | let tx_types = |
761 | 0 | if do_rdo_tx_type { RAV1E_TX_TYPES } else { &[TxType::DCT_DCT] }; |
762 | | |
763 | | // Luma plane transform type decision |
764 | 0 | let (tx_type, rd_cost) = rdo_tx_type_decision( |
765 | 0 | fi, |
766 | 0 | ts, |
767 | 0 | cw, |
768 | 0 | &mut cw_checkpoint, |
769 | 0 | luma_mode, |
770 | 0 | ref_frames, |
771 | 0 | mvs, |
772 | 0 | bsize, |
773 | 0 | tile_bo, |
774 | 0 | tx_size, |
775 | 0 | tx_set, |
776 | 0 | tx_types, |
777 | 0 | best_rd, |
778 | 0 | ); |
779 | | |
780 | 0 | if rd_cost < best_rd { |
781 | 0 | best_tx_size = tx_size; |
782 | 0 | best_tx_type = tx_type; |
783 | 0 | best_rd = rd_cost; |
784 | 0 | } |
785 | | |
786 | 0 | debug_assert!(tx_size.width_log2() <= bsize.width_log2()); |
787 | 0 | debug_assert!(tx_size.height_log2() <= bsize.height_log2()); |
788 | 0 | debug_assert!( |
789 | 0 | tx_size.sqr() <= TxSize::TX_32X32 || tx_type == TxType::DCT_DCT |
790 | | ); |
791 | | |
792 | 0 | let next_tx_size = sub_tx_size_map[tx_size as usize]; |
793 | | |
794 | 0 | if next_tx_size == tx_size { |
795 | 0 | break; |
796 | 0 | } else { |
797 | 0 | tx_size = next_tx_size; |
798 | 0 | }; |
799 | | } |
800 | | |
801 | 0 | (best_tx_size, best_tx_type) |
802 | 0 | } Unexecuted instantiation: rav1e::rdo::rdo_tx_size_type::<u16> Unexecuted instantiation: rav1e::rdo::rdo_tx_size_type::<u8> |
803 | | |
804 | | #[inline] |
805 | 0 | const fn dmv_in_range(mv: MotionVector, ref_mv: MotionVector) -> bool { |
806 | 0 | let diff_row = mv.row as i32 - ref_mv.row as i32; |
807 | 0 | let diff_col = mv.col as i32 - ref_mv.col as i32; |
808 | 0 | diff_row >= MV_LOW |
809 | 0 | && diff_row <= MV_UPP |
810 | 0 | && diff_col >= MV_LOW |
811 | 0 | && diff_col <= MV_UPP |
812 | 0 | } Unexecuted instantiation: rav1e::rdo::dmv_in_range Unexecuted instantiation: rav1e::rdo::dmv_in_range |
813 | | |
814 | | #[inline] |
815 | | #[profiling::function] |
816 | | fn luma_chroma_mode_rdo<T: Pixel>( |
817 | | luma_mode: PredictionMode, fi: &FrameInvariants<T>, bsize: BlockSize, |
818 | | tile_bo: TileBlockOffset, ts: &mut TileStateMut<'_, T>, |
819 | | cw: &mut ContextWriter, rdo_type: RDOType, |
820 | | cw_checkpoint: &ContextWriterCheckpoint, best: &mut PartitionParameters, |
821 | | mvs: [MotionVector; 2], ref_frames: [RefType; 2], |
822 | | mode_set_chroma: &[PredictionMode], luma_mode_is_intra: bool, |
823 | | mode_context: usize, mv_stack: &ArrayVec<CandidateMV, 9>, |
824 | | angle_delta: AngleDelta, |
825 | | ) { |
826 | | let PlaneConfig { xdec, ydec, .. } = ts.input.planes[1].cfg; |
827 | | |
828 | | let is_chroma_block = |
829 | | has_chroma(tile_bo, bsize, xdec, ydec, fi.sequence.chroma_sampling); |
830 | | |
831 | | if !luma_mode_is_intra { |
832 | | let ref_mvs = if mv_stack.is_empty() { |
833 | | [MotionVector::default(); 2] |
834 | | } else { |
835 | | [mv_stack[0].this_mv, mv_stack[0].comp_mv] |
836 | | }; |
837 | | |
838 | | if (luma_mode == PredictionMode::NEWMV |
839 | | || luma_mode == PredictionMode::NEW_NEWMV |
840 | | || luma_mode == PredictionMode::NEW_NEARESTMV) |
841 | | && !dmv_in_range(mvs[0], ref_mvs[0]) |
842 | | { |
843 | | return; |
844 | | } |
845 | | |
846 | | if (luma_mode == PredictionMode::NEW_NEWMV |
847 | | || luma_mode == PredictionMode::NEAREST_NEWMV) |
848 | | && !dmv_in_range(mvs[1], ref_mvs[1]) |
849 | | { |
850 | | return; |
851 | | } |
852 | | } |
853 | | |
854 | | // Find the best chroma prediction mode for the current luma prediction mode |
855 | 0 | let mut chroma_rdo = |skip: bool| -> bool { |
856 | | use crate::segmentation::select_segment; |
857 | | |
858 | 0 | let mut zero_distortion = false; |
859 | | |
860 | 0 | for sidx in select_segment(fi, ts, tile_bo, bsize, skip) { |
861 | 0 | cw.bc.blocks.set_segmentation_idx(tile_bo, bsize, sidx); |
862 | | |
863 | 0 | let (tx_size, tx_type) = rdo_tx_size_type( |
864 | 0 | fi, ts, cw, bsize, tile_bo, luma_mode, ref_frames, mvs, skip, |
865 | 0 | ); |
866 | 0 | for &chroma_mode in mode_set_chroma.iter() { |
867 | 0 | let wr = &mut WriterCounter::new(); |
868 | 0 | let tell = wr.tell_frac(); |
869 | | |
870 | 0 | if bsize >= BlockSize::BLOCK_8X8 && bsize.is_sqr() { |
871 | 0 | cw.write_partition( |
872 | 0 | wr, |
873 | 0 | tile_bo, |
874 | 0 | PartitionType::PARTITION_NONE, |
875 | 0 | bsize, |
876 | 0 | ); |
877 | 0 | } |
878 | | |
879 | | // TODO(yushin): luma and chroma would have different decision based on chroma format |
880 | 0 | let need_recon_pixel = |
881 | 0 | luma_mode_is_intra && tx_size.block_size() != bsize; |
882 | | |
883 | 0 | encode_block_pre_cdef(&fi.sequence, ts, cw, wr, bsize, tile_bo, skip); |
884 | 0 | let (has_coeff, tx_dist) = encode_block_post_cdef( |
885 | 0 | fi, |
886 | 0 | ts, |
887 | 0 | cw, |
888 | 0 | wr, |
889 | 0 | luma_mode, |
890 | 0 | chroma_mode, |
891 | 0 | angle_delta, |
892 | 0 | ref_frames, |
893 | 0 | mvs, |
894 | 0 | bsize, |
895 | 0 | tile_bo, |
896 | 0 | skip, |
897 | 0 | CFLParams::default(), |
898 | 0 | tx_size, |
899 | 0 | tx_type, |
900 | 0 | mode_context, |
901 | 0 | mv_stack, |
902 | 0 | rdo_type, |
903 | 0 | need_recon_pixel, |
904 | 0 | None, |
905 | 0 | ); |
906 | | |
907 | 0 | let rate = wr.tell_frac() - tell; |
908 | 0 | let distortion = if fi.use_tx_domain_distortion && !need_recon_pixel { |
909 | 0 | compute_tx_distortion( |
910 | 0 | fi, |
911 | 0 | ts, |
912 | 0 | bsize, |
913 | 0 | is_chroma_block, |
914 | 0 | tile_bo, |
915 | 0 | tx_dist, |
916 | 0 | skip, |
917 | | false, |
918 | | ) |
919 | | } else { |
920 | 0 | compute_distortion(fi, ts, bsize, is_chroma_block, tile_bo, false) |
921 | | }; |
922 | 0 | let is_zero_dist = distortion.0 == 0; |
923 | 0 | let rd = compute_rd_cost(fi, rate, distortion); |
924 | 0 | if rd < best.rd_cost { |
925 | 0 | //if rd < best.rd_cost || luma_mode == PredictionMode::NEW_NEWMV { |
926 | 0 | best.rd_cost = rd; |
927 | 0 | best.pred_mode_luma = luma_mode; |
928 | 0 | best.pred_mode_chroma = chroma_mode; |
929 | 0 | best.angle_delta = angle_delta; |
930 | 0 | best.ref_frames = ref_frames; |
931 | 0 | best.mvs = mvs; |
932 | 0 | best.skip = skip; |
933 | 0 | best.has_coeff = has_coeff; |
934 | 0 | best.tx_size = tx_size; |
935 | 0 | best.tx_type = tx_type; |
936 | 0 | best.sidx = sidx; |
937 | 0 | zero_distortion = is_zero_dist; |
938 | 0 | } |
939 | | |
940 | 0 | cw.rollback(cw_checkpoint); |
941 | | } |
942 | | } |
943 | | |
944 | 0 | zero_distortion |
945 | 0 | }; Unexecuted instantiation: rav1e::rdo::luma_chroma_mode_rdo::<u16>::{closure#0}Unexecuted instantiation: rav1e::rdo::luma_chroma_mode_rdo::<u8>::{closure#0} |
946 | | |
947 | | // Don't skip when using intra modes |
948 | | let zero_distortion = |
949 | | if !luma_mode_is_intra { chroma_rdo(true) } else { false }; |
950 | | // early skip |
951 | | if !zero_distortion { |
952 | | chroma_rdo(false); |
953 | | } |
954 | | } |
955 | | |
956 | | /// RDO-based mode decision |
957 | | /// |
958 | | /// # Panics |
959 | | /// |
960 | | /// - If the best RD found is negative. |
961 | | /// This should never happen and indicates a development error. |
962 | | #[profiling::function] |
963 | | pub fn rdo_mode_decision<T: Pixel>( |
964 | | fi: &FrameInvariants<T>, ts: &mut TileStateMut<'_, T>, |
965 | | cw: &mut ContextWriter, bsize: BlockSize, tile_bo: TileBlockOffset, |
966 | | inter_cfg: &InterConfig, |
967 | | ) -> PartitionParameters { |
968 | | let PlaneConfig { xdec, ydec, .. } = ts.input.planes[1].cfg; |
969 | | let cw_checkpoint = cw.checkpoint(&tile_bo, fi.sequence.chroma_sampling); |
970 | | |
971 | | let rdo_type = if fi.use_tx_domain_rate { |
972 | | RDOType::TxDistEstRate |
973 | | } else if fi.use_tx_domain_distortion { |
974 | | RDOType::TxDistRealRate |
975 | | } else { |
976 | | RDOType::PixelDistRealRate |
977 | | }; |
978 | | |
979 | | let mut best = if fi.frame_type.has_inter() { |
980 | | assert!(fi.frame_type != FrameType::KEY); |
981 | | |
982 | | inter_frame_rdo_mode_decision( |
983 | | fi, |
984 | | ts, |
985 | | cw, |
986 | | bsize, |
987 | | tile_bo, |
988 | | inter_cfg, |
989 | | &cw_checkpoint, |
990 | | rdo_type, |
991 | | ) |
992 | | } else { |
993 | | PartitionParameters::default() |
994 | | }; |
995 | | |
996 | | let is_chroma_block = |
997 | | has_chroma(tile_bo, bsize, xdec, ydec, fi.sequence.chroma_sampling); |
998 | | |
999 | | if !best.skip { |
1000 | | best = intra_frame_rdo_mode_decision( |
1001 | | fi, |
1002 | | ts, |
1003 | | cw, |
1004 | | bsize, |
1005 | | tile_bo, |
1006 | | &cw_checkpoint, |
1007 | | rdo_type, |
1008 | | best, |
1009 | | is_chroma_block, |
1010 | | ); |
1011 | | } |
1012 | | |
1013 | | if best.pred_mode_luma.is_intra() && is_chroma_block && bsize.cfl_allowed() { |
1014 | | cw.bc.blocks.set_segmentation_idx(tile_bo, bsize, best.sidx); |
1015 | | |
1016 | | let chroma_mode = PredictionMode::UV_CFL_PRED; |
1017 | | let cw_checkpoint = cw.checkpoint(&tile_bo, fi.sequence.chroma_sampling); |
1018 | | let mut wr = WriterCounter::new(); |
1019 | | let angle_delta = AngleDelta { y: best.angle_delta.y, uv: 0 }; |
1020 | | |
1021 | | write_tx_blocks( |
1022 | | fi, |
1023 | | ts, |
1024 | | cw, |
1025 | | &mut wr, |
1026 | | best.pred_mode_luma, |
1027 | | best.pred_mode_luma, |
1028 | | angle_delta, |
1029 | | tile_bo, |
1030 | | bsize, |
1031 | | best.tx_size, |
1032 | | best.tx_type, |
1033 | | false, |
1034 | | CFLParams::default(), |
1035 | | true, |
1036 | | rdo_type, |
1037 | | true, |
1038 | | ); |
1039 | | cw.rollback(&cw_checkpoint); |
1040 | | if fi.sequence.chroma_sampling != ChromaSampling::Cs400 { |
1041 | | if let Some(cfl) = rdo_cfl_alpha(ts, tile_bo, bsize, best.tx_size, fi) { |
1042 | | let mut wr = WriterCounter::new(); |
1043 | | let tell = wr.tell_frac(); |
1044 | | |
1045 | | encode_block_pre_cdef( |
1046 | | &fi.sequence, |
1047 | | ts, |
1048 | | cw, |
1049 | | &mut wr, |
1050 | | bsize, |
1051 | | tile_bo, |
1052 | | best.skip, |
1053 | | ); |
1054 | | let (has_coeff, _) = encode_block_post_cdef( |
1055 | | fi, |
1056 | | ts, |
1057 | | cw, |
1058 | | &mut wr, |
1059 | | best.pred_mode_luma, |
1060 | | chroma_mode, |
1061 | | angle_delta, |
1062 | | best.ref_frames, |
1063 | | best.mvs, |
1064 | | bsize, |
1065 | | tile_bo, |
1066 | | best.skip, |
1067 | | cfl, |
1068 | | best.tx_size, |
1069 | | best.tx_type, |
1070 | | 0, |
1071 | | &[], |
1072 | | rdo_type, |
1073 | | true, // For CFL, luma should be always reconstructed. |
1074 | | None, |
1075 | | ); |
1076 | | |
1077 | | let rate = wr.tell_frac() - tell; |
1078 | | |
1079 | | // For CFL, tx-domain distortion is not an option. |
1080 | | let distortion = |
1081 | | compute_distortion(fi, ts, bsize, is_chroma_block, tile_bo, false); |
1082 | | let rd = compute_rd_cost(fi, rate, distortion); |
1083 | | if rd < best.rd_cost { |
1084 | | best.rd_cost = rd; |
1085 | | best.pred_mode_chroma = chroma_mode; |
1086 | | best.angle_delta = angle_delta; |
1087 | | best.has_coeff = has_coeff; |
1088 | | best.pred_cfl_params = cfl; |
1089 | | } |
1090 | | |
1091 | | cw.rollback(&cw_checkpoint); |
1092 | | } |
1093 | | } |
1094 | | } |
1095 | | |
1096 | | cw.bc.blocks.set_mode(tile_bo, bsize, best.pred_mode_luma); |
1097 | | cw.bc.blocks.set_ref_frames(tile_bo, bsize, best.ref_frames); |
1098 | | cw.bc.blocks.set_motion_vectors(tile_bo, bsize, best.mvs); |
1099 | | |
1100 | | assert!(best.rd_cost >= 0_f64); |
1101 | | |
1102 | | PartitionParameters { |
1103 | | bo: tile_bo, |
1104 | | bsize, |
1105 | | pred_mode_luma: best.pred_mode_luma, |
1106 | | pred_mode_chroma: best.pred_mode_chroma, |
1107 | | pred_cfl_params: best.pred_cfl_params, |
1108 | | angle_delta: best.angle_delta, |
1109 | | ref_frames: best.ref_frames, |
1110 | | mvs: best.mvs, |
1111 | | rd_cost: best.rd_cost, |
1112 | | skip: best.skip, |
1113 | | has_coeff: best.has_coeff, |
1114 | | tx_size: best.tx_size, |
1115 | | tx_type: best.tx_type, |
1116 | | sidx: best.sidx, |
1117 | | } |
1118 | | } |
1119 | | |
1120 | | #[profiling::function] |
1121 | | fn inter_frame_rdo_mode_decision<T: Pixel>( |
1122 | | fi: &FrameInvariants<T>, ts: &mut TileStateMut<'_, T>, |
1123 | | cw: &mut ContextWriter, bsize: BlockSize, tile_bo: TileBlockOffset, |
1124 | | inter_cfg: &InterConfig, cw_checkpoint: &ContextWriterCheckpoint, |
1125 | | rdo_type: RDOType, |
1126 | | ) -> PartitionParameters { |
1127 | | let mut best = PartitionParameters::default(); |
1128 | | |
1129 | | // we can never have more than 7 reference frame sets |
1130 | | let mut ref_frames_set = ArrayVec::<_, 7>::new(); |
1131 | | // again, max of 7 ref slots |
1132 | | let mut ref_slot_set = ArrayVec::<_, 7>::new(); |
1133 | | // our implementation never returns more than 3 at the moment |
1134 | | let mut mvs_from_me = ArrayVec::<_, 3>::new(); |
1135 | | let mut fwdref = None; |
1136 | | let mut bwdref = None; |
1137 | | |
1138 | | for i in inter_cfg.allowed_ref_frames().iter().copied() { |
1139 | | // Don't search LAST3 since it's used only for probs |
1140 | | if i == LAST3_FRAME { |
1141 | | continue; |
1142 | | } |
1143 | | |
1144 | | if !ref_slot_set.contains(&fi.ref_frames[i.to_index()]) { |
1145 | | if fwdref.is_none() && i.is_fwd_ref() { |
1146 | | fwdref = Some(ref_frames_set.len()); |
1147 | | } |
1148 | | if bwdref.is_none() && i.is_bwd_ref() { |
1149 | | bwdref = Some(ref_frames_set.len()); |
1150 | | } |
1151 | | ref_frames_set.push([i, NONE_FRAME]); |
1152 | | let slot_idx = fi.ref_frames[i.to_index()]; |
1153 | | ref_slot_set.push(slot_idx); |
1154 | | } |
1155 | | } |
1156 | | assert!(!ref_frames_set.is_empty()); |
1157 | | |
1158 | | let mut inter_mode_set = ArrayVec::<(PredictionMode, usize), 20>::new(); |
1159 | | let mut mvs_set = ArrayVec::<[MotionVector; 2], 20>::new(); |
1160 | | let mut satds = ArrayVec::<u32, 20>::new(); |
1161 | | let mut mv_stacks = ArrayVec::<_, 20>::new(); |
1162 | | let mut mode_contexts = ArrayVec::<_, 7>::new(); |
1163 | | |
1164 | | for (i, &ref_frames) in ref_frames_set.iter().enumerate() { |
1165 | | let mut mv_stack = ArrayVec::<CandidateMV, 9>::new(); |
1166 | | mode_contexts.push(cw.find_mvrefs( |
1167 | | tile_bo, |
1168 | | ref_frames, |
1169 | | &mut mv_stack, |
1170 | | bsize, |
1171 | | fi, |
1172 | | false, |
1173 | | )); |
1174 | | |
1175 | | let mut pmv = [MotionVector::default(); 2]; |
1176 | | if !mv_stack.is_empty() { |
1177 | | pmv[0] = mv_stack[0].this_mv; |
1178 | | } |
1179 | | if mv_stack.len() > 1 { |
1180 | | pmv[1] = mv_stack[1].this_mv; |
1181 | | } |
1182 | | |
1183 | | let res = estimate_motion( |
1184 | | fi, |
1185 | | ts, |
1186 | | bsize.width(), |
1187 | | bsize.height(), |
1188 | | tile_bo, |
1189 | | ref_frames[0], |
1190 | | Some(pmv), |
1191 | | MVSamplingMode::CORNER { right: true, bottom: true }, |
1192 | | false, |
1193 | | 0, |
1194 | | None, |
1195 | | ) |
1196 | | .unwrap_or_else(MotionSearchResult::empty); |
1197 | | let b_me = res.mv; |
1198 | | |
1199 | | mvs_from_me.push([b_me, MotionVector::default()]); |
1200 | | |
1201 | | for &x in RAV1E_INTER_MODES_MINIMAL { |
1202 | | inter_mode_set.push((x, i)); |
1203 | | } |
1204 | | if !mv_stack.is_empty() { |
1205 | | inter_mode_set.push((PredictionMode::NEAR0MV, i)); |
1206 | | } |
1207 | | if mv_stack.len() >= 2 { |
1208 | | inter_mode_set.push((PredictionMode::GLOBALMV, i)); |
1209 | | } |
1210 | | let include_near_mvs = fi.config.speed_settings.motion.include_near_mvs; |
1211 | | if include_near_mvs { |
1212 | | if mv_stack.len() >= 3 { |
1213 | | inter_mode_set.push((PredictionMode::NEAR1MV, i)); |
1214 | | } |
1215 | | if mv_stack.len() >= 4 { |
1216 | | inter_mode_set.push((PredictionMode::NEAR2MV, i)); |
1217 | | } |
1218 | | } |
1219 | 0 | let same_row_col = |x: &CandidateMV| { |
1220 | 0 | x.this_mv.row == mvs_from_me[i][0].row |
1221 | 0 | && x.this_mv.col == mvs_from_me[i][0].col |
1222 | 0 | }; Unexecuted instantiation: rav1e::rdo::inter_frame_rdo_mode_decision::<u16>::{closure#0}Unexecuted instantiation: rav1e::rdo::inter_frame_rdo_mode_decision::<u8>::{closure#0} |
1223 | | if !mv_stack |
1224 | | .iter() |
1225 | | .take(if include_near_mvs { 4 } else { 2 }) |
1226 | | .any(same_row_col) |
1227 | | && (mvs_from_me[i][0].row != 0 || mvs_from_me[i][0].col != 0) |
1228 | | { |
1229 | | inter_mode_set.push((PredictionMode::NEWMV, i)); |
1230 | | } |
1231 | | |
1232 | | mv_stacks.push(mv_stack); |
1233 | | } |
1234 | | |
1235 | | let sz = bsize.width_mi().min(bsize.height_mi()); |
1236 | | |
1237 | | // To use non single reference modes, block width and height must be greater than 4. |
1238 | | if fi.reference_mode != ReferenceMode::SINGLE && sz >= 2 { |
1239 | | // Adding compound candidate |
1240 | | if let Some(r0) = fwdref { |
1241 | | if let Some(r1) = bwdref { |
1242 | | let ref_frames = [ref_frames_set[r0][0], ref_frames_set[r1][0]]; |
1243 | | ref_frames_set.push(ref_frames); |
1244 | | let mv0 = mvs_from_me[r0][0]; |
1245 | | let mv1 = mvs_from_me[r1][0]; |
1246 | | mvs_from_me.push([mv0, mv1]); |
1247 | | let mut mv_stack = ArrayVec::<CandidateMV, 9>::new(); |
1248 | | mode_contexts.push(cw.find_mvrefs( |
1249 | | tile_bo, |
1250 | | ref_frames, |
1251 | | &mut mv_stack, |
1252 | | bsize, |
1253 | | fi, |
1254 | | true, |
1255 | | )); |
1256 | | for &x in RAV1E_INTER_COMPOUND_MODES { |
1257 | | // exclude any NEAR mode based on speed setting |
1258 | | if fi.config.speed_settings.motion.include_near_mvs |
1259 | | || !x.has_nearmv() |
1260 | | { |
1261 | | let mv_stack_idx = ref_frames_set.len() - 1; |
1262 | | // exclude NEAR modes if the mv_stack is too short |
1263 | | if !(x.has_nearmv() && x.ref_mv_idx() >= mv_stack.len()) { |
1264 | | inter_mode_set.push((x, mv_stack_idx)); |
1265 | | } |
1266 | | } |
1267 | | } |
1268 | | mv_stacks.push(mv_stack); |
1269 | | } |
1270 | | } |
1271 | | } |
1272 | | |
1273 | | let num_modes_rdo = if fi.config.speed_settings.prediction.prediction_modes |
1274 | | >= PredictionModesSetting::ComplexAll |
1275 | | { |
1276 | | inter_mode_set.len() |
1277 | | } else { |
1278 | | 9 // This number is determined by AWCY test |
1279 | | }; |
1280 | | |
1281 | 0 | inter_mode_set.iter().for_each(|&(luma_mode, i)| { |
1282 | 0 | let mvs = match luma_mode { |
1283 | 0 | PredictionMode::NEWMV | PredictionMode::NEW_NEWMV => mvs_from_me[i], |
1284 | | PredictionMode::NEARESTMV | PredictionMode::NEAREST_NEARESTMV => { |
1285 | 0 | if !mv_stacks[i].is_empty() { |
1286 | 0 | [mv_stacks[i][0].this_mv, mv_stacks[i][0].comp_mv] |
1287 | | } else { |
1288 | 0 | [MotionVector::default(); 2] |
1289 | | } |
1290 | | } |
1291 | | PredictionMode::NEAR0MV | PredictionMode::NEAR_NEAR0MV => { |
1292 | 0 | if mv_stacks[i].len() > 1 { |
1293 | 0 | [mv_stacks[i][1].this_mv, mv_stacks[i][1].comp_mv] |
1294 | | } else { |
1295 | 0 | [MotionVector::default(); 2] |
1296 | | } |
1297 | | } |
1298 | | PredictionMode::NEAR1MV |
1299 | | | PredictionMode::NEAR2MV |
1300 | | | PredictionMode::NEAR_NEAR1MV |
1301 | 0 | | PredictionMode::NEAR_NEAR2MV => [ |
1302 | 0 | mv_stacks[i][luma_mode.ref_mv_idx()].this_mv, |
1303 | 0 | mv_stacks[i][luma_mode.ref_mv_idx()].comp_mv, |
1304 | 0 | ], |
1305 | | PredictionMode::NEAREST_NEWMV => { |
1306 | 0 | [mv_stacks[i][0].this_mv, mvs_from_me[i][1]] |
1307 | | } |
1308 | | PredictionMode::NEW_NEARESTMV => { |
1309 | 0 | [mvs_from_me[i][0], mv_stacks[i][0].comp_mv] |
1310 | | } |
1311 | | PredictionMode::GLOBALMV | PredictionMode::GLOBAL_GLOBALMV => { |
1312 | 0 | [MotionVector::default(); 2] |
1313 | | } |
1314 | | _ => { |
1315 | 0 | unimplemented!(); |
1316 | | } |
1317 | | }; |
1318 | 0 | mvs_set.push(mvs); |
1319 | | |
1320 | | // Calculate SATD for each mode |
1321 | 0 | if num_modes_rdo != inter_mode_set.len() { |
1322 | 0 | let tile_rect = ts.tile_rect(); |
1323 | 0 | let rec = &mut ts.rec.planes[0]; |
1324 | 0 | let po = tile_bo.plane_offset(rec.plane_cfg); |
1325 | 0 | let mut rec_region = |
1326 | 0 | rec.subregion_mut(Area::BlockStartingAt { bo: tile_bo.0 }); |
1327 | 0 |
|
1328 | 0 | luma_mode.predict_inter( |
1329 | 0 | fi, |
1330 | 0 | tile_rect, |
1331 | 0 | 0, |
1332 | 0 | po, |
1333 | 0 | &mut rec_region, |
1334 | 0 | bsize.width(), |
1335 | 0 | bsize.height(), |
1336 | 0 | ref_frames_set[i], |
1337 | 0 | mvs, |
1338 | 0 | &mut ts.inter_compound_buffers, |
1339 | 0 | ); |
1340 | 0 |
|
1341 | 0 | let plane_org = ts.input_tile.planes[0] |
1342 | 0 | .subregion(Area::BlockStartingAt { bo: tile_bo.0 }); |
1343 | 0 | let plane_ref = rec_region.as_const(); |
1344 | 0 |
|
1345 | 0 | let satd = get_satd( |
1346 | 0 | &plane_org, |
1347 | 0 | &plane_ref, |
1348 | 0 | bsize.width(), |
1349 | 0 | bsize.height(), |
1350 | 0 | fi.sequence.bit_depth, |
1351 | 0 | fi.cpu_feature_level, |
1352 | 0 | ); |
1353 | 0 | satds.push(satd); |
1354 | 0 | } else { |
1355 | 0 | satds.push(0); |
1356 | 0 | } |
1357 | 0 | }); Unexecuted instantiation: rav1e::rdo::inter_frame_rdo_mode_decision::<u16>::{closure#1}Unexecuted instantiation: rav1e::rdo::inter_frame_rdo_mode_decision::<u8>::{closure#1} |
1358 | | |
1359 | | let mut sorted = |
1360 | | izip!(inter_mode_set, mvs_set, satds).collect::<ArrayVec<_, 20>>(); |
1361 | | if num_modes_rdo != sorted.len() { |
1362 | | sorted.sort_by_key(|((_mode, _i), _mvs, satd)| *satd); |
1363 | | } |
1364 | | |
1365 | | sorted.iter().take(num_modes_rdo).for_each( |
1366 | 0 | |&((luma_mode, i), mvs, _satd)| { |
1367 | 0 | let mode_set_chroma = ArrayVec::from([luma_mode]); |
1368 | | |
1369 | 0 | luma_chroma_mode_rdo( |
1370 | 0 | luma_mode, |
1371 | 0 | fi, |
1372 | 0 | bsize, |
1373 | 0 | tile_bo, |
1374 | 0 | ts, |
1375 | 0 | cw, |
1376 | 0 | rdo_type, |
1377 | 0 | cw_checkpoint, |
1378 | 0 | &mut best, |
1379 | 0 | mvs, |
1380 | 0 | ref_frames_set[i], |
1381 | 0 | &mode_set_chroma, |
1382 | | false, |
1383 | 0 | mode_contexts[i], |
1384 | 0 | &mv_stacks[i], |
1385 | 0 | AngleDelta::default(), |
1386 | | ); |
1387 | 0 | }, Unexecuted instantiation: rav1e::rdo::inter_frame_rdo_mode_decision::<u16>::{closure#3}Unexecuted instantiation: rav1e::rdo::inter_frame_rdo_mode_decision::<u8>::{closure#3} |
1388 | | ); |
1389 | | |
1390 | | best |
1391 | | } |
1392 | | |
1393 | | #[profiling::function] |
1394 | | fn intra_frame_rdo_mode_decision<T: Pixel>( |
1395 | | fi: &FrameInvariants<T>, ts: &mut TileStateMut<'_, T>, |
1396 | | cw: &mut ContextWriter, bsize: BlockSize, tile_bo: TileBlockOffset, |
1397 | | cw_checkpoint: &ContextWriterCheckpoint, rdo_type: RDOType, |
1398 | | mut best: PartitionParameters, is_chroma_block: bool, |
1399 | | ) -> PartitionParameters { |
1400 | | let mut modes = ArrayVec::<_, INTRA_MODES>::new(); |
1401 | | |
1402 | | // Reduce number of prediction modes at higher speed levels |
1403 | | let num_modes_rdo = if (fi.frame_type == FrameType::KEY |
1404 | | && fi.config.speed_settings.prediction.prediction_modes |
1405 | | >= PredictionModesSetting::ComplexKeyframes) |
1406 | | || (fi.frame_type.has_inter() |
1407 | | && fi.config.speed_settings.prediction.prediction_modes |
1408 | | >= PredictionModesSetting::ComplexAll) |
1409 | | { |
1410 | | 7 |
1411 | | } else { |
1412 | | 3 |
1413 | | }; |
1414 | | |
1415 | | let intra_mode_set = RAV1E_INTRA_MODES; |
1416 | | |
1417 | | // Find mode with lowest rate cost |
1418 | | { |
1419 | | use crate::ec::cdf_to_pdf; |
1420 | | |
1421 | | let probs_all = cdf_to_pdf(if fi.frame_type.has_inter() { |
1422 | | cw.get_cdf_intra_mode(bsize) |
1423 | | } else { |
1424 | | cw.get_cdf_intra_mode_kf(tile_bo) |
1425 | | }); |
1426 | | |
1427 | | modes.try_extend_from_slice(intra_mode_set).unwrap(); |
1428 | 0 | modes.sort_by_key(|&a| !probs_all[a as usize]); Unexecuted instantiation: rav1e::rdo::intra_frame_rdo_mode_decision::<u16>::{closure#0}Unexecuted instantiation: rav1e::rdo::intra_frame_rdo_mode_decision::<u8>::{closure#0} |
1429 | | } |
1430 | | |
1431 | | // If tx partition (i.e. fi.tx_mode_select) is enabled, the below intra prediction screening |
1432 | | // may be improved by emulating prediction for each tx block. |
1433 | | { |
1434 | | let satds = { |
1435 | | // FIXME: If tx partition is used, this whole sads block should be fixed |
1436 | | let tx_size = bsize.tx_size(); |
1437 | | let mut edge_buf = Aligned::uninit_array(); |
1438 | | let edge_buf = { |
1439 | | let rec = &ts.rec.planes[0].as_const(); |
1440 | | let po = tile_bo.plane_offset(rec.plane_cfg); |
1441 | | // FIXME: If tx partition is used, get_intra_edges() should be called for each tx block |
1442 | | get_intra_edges( |
1443 | | &mut edge_buf, |
1444 | | rec, |
1445 | | tile_bo, |
1446 | | 0, |
1447 | | 0, |
1448 | | bsize, |
1449 | | po, |
1450 | | tx_size, |
1451 | | fi.sequence.bit_depth, |
1452 | | None, |
1453 | | fi.sequence.enable_intra_edge_filter, |
1454 | | IntraParam::None, |
1455 | | ) |
1456 | | }; |
1457 | | |
1458 | | let ief_params = if fi.sequence.enable_intra_edge_filter { |
1459 | | let above_block_info = ts.above_block_info(tile_bo, 0, 0); |
1460 | | let left_block_info = ts.left_block_info(tile_bo, 0, 0); |
1461 | | Some(IntraEdgeFilterParameters::new( |
1462 | | 0, |
1463 | | above_block_info, |
1464 | | left_block_info, |
1465 | | )) |
1466 | | } else { |
1467 | | None |
1468 | | }; |
1469 | | |
1470 | | let mut satds_all = [0; INTRA_MODES]; |
1471 | | for &luma_mode in modes.iter().skip(num_modes_rdo / 2) { |
1472 | | let tile_rect = ts.tile_rect(); |
1473 | | let rec = &mut ts.rec.planes[0]; |
1474 | | let mut rec_region = |
1475 | | rec.subregion_mut(Area::BlockStartingAt { bo: tile_bo.0 }); |
1476 | | // FIXME: If tx partition is used, luma_mode.predict_intra() should be called for each tx block |
1477 | | luma_mode.predict_intra( |
1478 | | tile_rect, |
1479 | | &mut rec_region, |
1480 | | tx_size, |
1481 | | fi.sequence.bit_depth, |
1482 | | &[0i16; 2], |
1483 | | IntraParam::None, |
1484 | | if luma_mode.is_directional() { ief_params } else { None }, |
1485 | | &edge_buf, |
1486 | | fi.cpu_feature_level, |
1487 | | ); |
1488 | | |
1489 | | let plane_org = ts.input_tile.planes[0] |
1490 | | .subregion(Area::BlockStartingAt { bo: tile_bo.0 }); |
1491 | | let plane_ref = rec_region.as_const(); |
1492 | | |
1493 | | satds_all[luma_mode as usize] = get_satd( |
1494 | | &plane_org, |
1495 | | &plane_ref, |
1496 | | tx_size.width(), |
1497 | | tx_size.height(), |
1498 | | fi.sequence.bit_depth, |
1499 | | fi.cpu_feature_level, |
1500 | | ); |
1501 | | } |
1502 | | satds_all |
1503 | | }; |
1504 | | |
1505 | 0 | modes[num_modes_rdo / 2..].sort_by_key(|&a| satds[a as usize]); Unexecuted instantiation: rav1e::rdo::intra_frame_rdo_mode_decision::<u16>::{closure#1}Unexecuted instantiation: rav1e::rdo::intra_frame_rdo_mode_decision::<u8>::{closure#1} |
1506 | | } |
1507 | | |
1508 | | debug_assert!(num_modes_rdo >= 1); |
1509 | | |
1510 | 0 | modes.iter().take(num_modes_rdo).for_each(|&luma_mode| { |
1511 | 0 | let mvs = [MotionVector::default(); 2]; |
1512 | 0 | let ref_frames = [INTRA_FRAME, NONE_FRAME]; |
1513 | 0 | let mut mode_set_chroma = ArrayVec::<_, 2>::new(); |
1514 | 0 | mode_set_chroma.push(luma_mode); |
1515 | 0 | if is_chroma_block && luma_mode != PredictionMode::DC_PRED { |
1516 | 0 | mode_set_chroma.push(PredictionMode::DC_PRED); |
1517 | 0 | } |
1518 | 0 | luma_chroma_mode_rdo( |
1519 | 0 | luma_mode, |
1520 | 0 | fi, |
1521 | 0 | bsize, |
1522 | 0 | tile_bo, |
1523 | 0 | ts, |
1524 | 0 | cw, |
1525 | 0 | rdo_type, |
1526 | 0 | cw_checkpoint, |
1527 | 0 | &mut best, |
1528 | 0 | mvs, |
1529 | 0 | ref_frames, |
1530 | 0 | &mode_set_chroma, |
1531 | | true, |
1532 | | 0, |
1533 | 0 | &ArrayVec::<CandidateMV, 9>::new(), |
1534 | 0 | AngleDelta::default(), |
1535 | | ); |
1536 | 0 | }); Unexecuted instantiation: rav1e::rdo::intra_frame_rdo_mode_decision::<u16>::{closure#2}Unexecuted instantiation: rav1e::rdo::intra_frame_rdo_mode_decision::<u8>::{closure#2} |
1537 | | |
1538 | | if fi.config.speed_settings.prediction.fine_directional_intra |
1539 | | && bsize >= BlockSize::BLOCK_8X8 |
1540 | | { |
1541 | | // Find the best angle delta for the current best prediction mode |
1542 | | let luma_deltas = best.pred_mode_luma.angle_delta_count(); |
1543 | | let chroma_deltas = best.pred_mode_chroma.angle_delta_count(); |
1544 | | |
1545 | | let mvs = [MotionVector::default(); 2]; |
1546 | | let ref_frames = [INTRA_FRAME, NONE_FRAME]; |
1547 | | let mode_set_chroma = [best.pred_mode_chroma]; |
1548 | | let mv_stack = ArrayVec::<_, 9>::new(); |
1549 | | let mut best_angle_delta = best.angle_delta; |
1550 | 0 | let mut angle_delta_rdo = |y, uv| -> AngleDelta { |
1551 | 0 | if best.angle_delta.y != y || best.angle_delta.uv != uv { |
1552 | 0 | luma_chroma_mode_rdo( |
1553 | 0 | best.pred_mode_luma, |
1554 | 0 | fi, |
1555 | 0 | bsize, |
1556 | 0 | tile_bo, |
1557 | 0 | ts, |
1558 | 0 | cw, |
1559 | 0 | rdo_type, |
1560 | 0 | cw_checkpoint, |
1561 | 0 | &mut best, |
1562 | 0 | mvs, |
1563 | 0 | ref_frames, |
1564 | 0 | &mode_set_chroma, |
1565 | 0 | true, |
1566 | 0 | 0, |
1567 | 0 | &mv_stack, |
1568 | 0 | AngleDelta { y, uv }, |
1569 | 0 | ); |
1570 | 0 | } |
1571 | 0 | best.angle_delta |
1572 | 0 | }; Unexecuted instantiation: rav1e::rdo::intra_frame_rdo_mode_decision::<u16>::{closure#3}Unexecuted instantiation: rav1e::rdo::intra_frame_rdo_mode_decision::<u8>::{closure#3} |
1573 | | |
1574 | | for i in 0..luma_deltas { |
1575 | | let angle_delta_y = |
1576 | | if luma_deltas == 1 { 0 } else { i - MAX_ANGLE_DELTA as i8 }; |
1577 | | best_angle_delta = angle_delta_rdo(angle_delta_y, best_angle_delta.uv); |
1578 | | } |
1579 | | for j in 0..chroma_deltas { |
1580 | | let angle_delta_uv = |
1581 | | if chroma_deltas == 1 { 0 } else { j - MAX_ANGLE_DELTA as i8 }; |
1582 | | best_angle_delta = angle_delta_rdo(best_angle_delta.y, angle_delta_uv); |
1583 | | } |
1584 | | } |
1585 | | |
1586 | | best |
1587 | | } |
1588 | | |
1589 | | /// # Panics |
1590 | | /// |
1591 | | /// - If the block size is invalid for subsampling. |
1592 | | #[profiling::function] |
1593 | | pub fn rdo_cfl_alpha<T: Pixel>( |
1594 | | ts: &mut TileStateMut<'_, T>, tile_bo: TileBlockOffset, bsize: BlockSize, |
1595 | | luma_tx_size: TxSize, fi: &FrameInvariants<T>, |
1596 | | ) -> Option<CFLParams> { |
1597 | | let PlaneConfig { xdec, ydec, .. } = ts.input.planes[1].cfg; |
1598 | | let uv_tx_size = bsize.largest_chroma_tx_size(xdec, ydec); |
1599 | | debug_assert!( |
1600 | | bsize.subsampled_size(xdec, ydec).unwrap() == uv_tx_size.block_size() |
1601 | | ); |
1602 | | |
1603 | | let frame_bo = ts.to_frame_block_offset(tile_bo); |
1604 | | let (visible_tx_w, visible_tx_h) = clip_visible_bsize( |
1605 | | (fi.width + xdec) >> xdec, |
1606 | | (fi.height + ydec) >> ydec, |
1607 | | uv_tx_size.block_size(), |
1608 | | (frame_bo.0.x << MI_SIZE_LOG2) >> xdec, |
1609 | | (frame_bo.0.y << MI_SIZE_LOG2) >> ydec, |
1610 | | ); |
1611 | | |
1612 | | if visible_tx_w == 0 || visible_tx_h == 0 { |
1613 | | return None; |
1614 | | }; |
1615 | | let mut ac = Aligned::<[MaybeUninit<i16>; 32 * 32]>::uninit_array(); |
1616 | | let ac = luma_ac(&mut ac.data, ts, tile_bo, bsize, luma_tx_size, fi); |
1617 | | let best_alpha: ArrayVec<i16, 2> = (1..3) |
1618 | 0 | .map(|p| { |
1619 | 0 | let &PlaneConfig { xdec, ydec, .. } = ts.rec.planes[p].plane_cfg; |
1620 | 0 | let tile_rect = ts.tile_rect().decimated(xdec, ydec); |
1621 | 0 | let rec = &mut ts.rec.planes[p]; |
1622 | 0 | let input = &ts.input_tile.planes[p]; |
1623 | 0 | let po = tile_bo.plane_offset(rec.plane_cfg); |
1624 | 0 | let mut edge_buf = Aligned::uninit_array(); |
1625 | 0 | let edge_buf = get_intra_edges( |
1626 | 0 | &mut edge_buf, |
1627 | 0 | &rec.as_const(), |
1628 | 0 | tile_bo, |
1629 | | 0, |
1630 | | 0, |
1631 | 0 | bsize, |
1632 | 0 | po, |
1633 | 0 | uv_tx_size, |
1634 | 0 | fi.sequence.bit_depth, |
1635 | 0 | Some(PredictionMode::UV_CFL_PRED), |
1636 | 0 | fi.sequence.enable_intra_edge_filter, |
1637 | 0 | IntraParam::None, |
1638 | | ); |
1639 | 0 | let mut alpha_cost = |alpha: i16| -> u64 { |
1640 | 0 | let mut rec_region = |
1641 | 0 | rec.subregion_mut(Area::BlockStartingAt { bo: tile_bo.0 }); |
1642 | 0 | PredictionMode::UV_CFL_PRED.predict_intra( |
1643 | 0 | tile_rect, |
1644 | 0 | &mut rec_region, |
1645 | 0 | uv_tx_size, |
1646 | 0 | fi.sequence.bit_depth, |
1647 | 0 | ac, |
1648 | 0 | IntraParam::Alpha(alpha), |
1649 | 0 | None, |
1650 | 0 | &edge_buf, |
1651 | 0 | fi.cpu_feature_level, |
1652 | | ); |
1653 | 0 | sse_wxh( |
1654 | 0 | &input.subregion(Area::BlockStartingAt { bo: tile_bo.0 }), |
1655 | 0 | &rec_region.as_const(), |
1656 | 0 | visible_tx_w, |
1657 | 0 | visible_tx_h, |
1658 | 0 | |_, _| DistortionScale::default(), // We're not doing RDO here. Unexecuted instantiation: rav1e::rdo::rdo_cfl_alpha::<u16>::{closure#0}::{closure#0}::{closure#0}Unexecuted instantiation: rav1e::rdo::rdo_cfl_alpha::<u8>::{closure#0}::{closure#0}::{closure#0} |
1659 | 0 | fi.sequence.bit_depth, |
1660 | 0 | fi.cpu_feature_level, |
1661 | | ) |
1662 | | .0 |
1663 | 0 | }; Unexecuted instantiation: rav1e::rdo::rdo_cfl_alpha::<u16>::{closure#0}::{closure#0}Unexecuted instantiation: rav1e::rdo::rdo_cfl_alpha::<u8>::{closure#0}::{closure#0} |
1664 | 0 | let mut best = (alpha_cost(0), 0); |
1665 | 0 | let mut count = 2; |
1666 | 0 | for alpha in 1i16..=16i16 { |
1667 | 0 | let cost = (alpha_cost(alpha), alpha_cost(-alpha)); |
1668 | 0 | if cost.0 < best.0 { |
1669 | 0 | best = (cost.0, alpha); |
1670 | 0 | count += 2; |
1671 | 0 | } |
1672 | 0 | if cost.1 < best.0 { |
1673 | 0 | best = (cost.1, -alpha); |
1674 | 0 | count += 2; |
1675 | 0 | } |
1676 | 0 | if count < alpha { |
1677 | 0 | break; |
1678 | 0 | } |
1679 | | } |
1680 | 0 | best.1 |
1681 | 0 | }) Unexecuted instantiation: rav1e::rdo::rdo_cfl_alpha::<u16>::{closure#0}Unexecuted instantiation: rav1e::rdo::rdo_cfl_alpha::<u8>::{closure#0} |
1682 | | .collect(); |
1683 | | |
1684 | | if best_alpha[0] == 0 && best_alpha[1] == 0 { |
1685 | | None |
1686 | | } else { |
1687 | | Some(CFLParams::from_alpha(best_alpha[0], best_alpha[1])) |
1688 | | } |
1689 | | } |
1690 | | |
1691 | | /// RDO-based transform type decision |
1692 | | /// If `cw_checkpoint` is `None`, a checkpoint for cw's (`ContextWriter`) current |
1693 | | /// state is created and stored for later use. |
1694 | | /// |
1695 | | /// # Panics |
1696 | | /// |
1697 | | /// - If a writer checkpoint is never created before or within the function. |
1698 | | /// This should never happen and indicates a development error. |
1699 | | /// - If the best RD found is negative. |
1700 | | /// This should never happen and indicates a development error. |
1701 | 0 | pub fn rdo_tx_type_decision<T: Pixel>( |
1702 | 0 | fi: &FrameInvariants<T>, ts: &mut TileStateMut<'_, T>, |
1703 | 0 | cw: &mut ContextWriter, cw_checkpoint: &mut Option<ContextWriterCheckpoint>, |
1704 | 0 | mode: PredictionMode, ref_frames: [RefType; 2], mvs: [MotionVector; 2], |
1705 | 0 | bsize: BlockSize, tile_bo: TileBlockOffset, tx_size: TxSize, tx_set: TxSet, |
1706 | 0 | tx_types: &[TxType], cur_best_rd: f64, |
1707 | 0 | ) -> (TxType, f64) { |
1708 | 0 | let mut best_type = TxType::DCT_DCT; |
1709 | 0 | let mut best_rd = f64::MAX; |
1710 | | |
1711 | 0 | let PlaneConfig { xdec, ydec, .. } = ts.input.planes[1].cfg; |
1712 | 0 | let is_chroma_block = |
1713 | 0 | has_chroma(tile_bo, bsize, xdec, ydec, fi.sequence.chroma_sampling); |
1714 | | |
1715 | 0 | let is_inter = !mode.is_intra(); |
1716 | | |
1717 | 0 | if cw_checkpoint.is_none() { |
1718 | 0 | // Only run the first call |
1719 | 0 | // Prevents creating multiple checkpoints for own version of cw |
1720 | 0 | *cw_checkpoint = |
1721 | 0 | Some(cw.checkpoint(&tile_bo, fi.sequence.chroma_sampling)); |
1722 | 0 | } |
1723 | | |
1724 | 0 | let rdo_type = if fi.use_tx_domain_distortion { |
1725 | 0 | RDOType::TxDistRealRate |
1726 | | } else { |
1727 | 0 | RDOType::PixelDistRealRate |
1728 | | }; |
1729 | 0 | let need_recon_pixel = tx_size.block_size() != bsize && !is_inter; |
1730 | | |
1731 | 0 | let mut first_iteration = true; |
1732 | 0 | for &tx_type in tx_types { |
1733 | | // Skip unsupported transform types |
1734 | 0 | if av1_tx_used[tx_set as usize][tx_type as usize] == 0 { |
1735 | 0 | continue; |
1736 | 0 | } |
1737 | | |
1738 | 0 | if is_inter { |
1739 | 0 | motion_compensate( |
1740 | 0 | fi, ts, cw, mode, ref_frames, mvs, bsize, tile_bo, true, |
1741 | 0 | ); |
1742 | 0 | } |
1743 | | |
1744 | 0 | let mut wr = WriterCounter::new(); |
1745 | 0 | let tell = wr.tell_frac(); |
1746 | 0 | let (_, tx_dist) = if is_inter { |
1747 | 0 | write_tx_tree( |
1748 | 0 | fi, |
1749 | 0 | ts, |
1750 | 0 | cw, |
1751 | 0 | &mut wr, |
1752 | 0 | mode, |
1753 | | 0, |
1754 | 0 | tile_bo, |
1755 | 0 | bsize, |
1756 | 0 | tx_size, |
1757 | 0 | tx_type, |
1758 | | false, |
1759 | | true, |
1760 | 0 | rdo_type, |
1761 | 0 | need_recon_pixel, |
1762 | | ) |
1763 | | } else { |
1764 | 0 | write_tx_blocks( |
1765 | 0 | fi, |
1766 | 0 | ts, |
1767 | 0 | cw, |
1768 | 0 | &mut wr, |
1769 | 0 | mode, |
1770 | 0 | mode, |
1771 | 0 | AngleDelta::default(), |
1772 | 0 | tile_bo, |
1773 | 0 | bsize, |
1774 | 0 | tx_size, |
1775 | 0 | tx_type, |
1776 | | false, |
1777 | 0 | CFLParams::default(), // Unused. |
1778 | | true, |
1779 | 0 | rdo_type, |
1780 | 0 | need_recon_pixel, |
1781 | | ) |
1782 | | }; |
1783 | | |
1784 | 0 | let rate = wr.tell_frac() - tell; |
1785 | 0 | let distortion = if fi.use_tx_domain_distortion { |
1786 | 0 | compute_tx_distortion( |
1787 | 0 | fi, |
1788 | 0 | ts, |
1789 | 0 | bsize, |
1790 | 0 | is_chroma_block, |
1791 | 0 | tile_bo, |
1792 | 0 | tx_dist, |
1793 | | false, |
1794 | | true, |
1795 | | ) |
1796 | | } else { |
1797 | 0 | compute_distortion(fi, ts, bsize, is_chroma_block, tile_bo, true) |
1798 | | }; |
1799 | 0 | cw.rollback(cw_checkpoint.as_ref().unwrap()); |
1800 | | |
1801 | 0 | let rd = compute_rd_cost(fi, rate, distortion); |
1802 | | |
1803 | 0 | if first_iteration { |
1804 | | // We use an optimization to early exit after testing the first |
1805 | | // transform type if the cost is higher than the existing best. |
1806 | | // The idea is that if this transform size is not better than he |
1807 | | // previous size, it is not worth testing remaining modes for this size. |
1808 | 0 | if rd > cur_best_rd { |
1809 | 0 | break; |
1810 | 0 | } |
1811 | 0 | first_iteration = false; |
1812 | 0 | } |
1813 | | |
1814 | 0 | if rd < best_rd { |
1815 | 0 | best_rd = rd; |
1816 | 0 | best_type = tx_type; |
1817 | 0 | } |
1818 | | } |
1819 | | |
1820 | 0 | assert!(best_rd >= 0_f64); |
1821 | | |
1822 | 0 | (best_type, best_rd) |
1823 | 0 | } Unexecuted instantiation: rav1e::rdo::rdo_tx_type_decision::<u16> Unexecuted instantiation: rav1e::rdo::rdo_tx_type_decision::<u8> |
1824 | | |
1825 | 0 | pub fn get_sub_partitions( |
1826 | 0 | four_partitions: &[TileBlockOffset; 4], partition: PartitionType, |
1827 | 0 | ) -> ArrayVec<TileBlockOffset, 4> { |
1828 | 0 | let mut partition_offsets = ArrayVec::<TileBlockOffset, 4>::new(); |
1829 | | |
1830 | 0 | partition_offsets.push(four_partitions[0]); |
1831 | | |
1832 | 0 | if partition == PARTITION_NONE { |
1833 | 0 | return partition_offsets; |
1834 | 0 | } |
1835 | 0 | if partition == PARTITION_VERT || partition == PARTITION_SPLIT { |
1836 | 0 | partition_offsets.push(four_partitions[1]); |
1837 | 0 | }; |
1838 | 0 | if partition == PARTITION_HORZ || partition == PARTITION_SPLIT { |
1839 | 0 | partition_offsets.push(four_partitions[2]); |
1840 | 0 | }; |
1841 | 0 | if partition == PARTITION_SPLIT { |
1842 | 0 | partition_offsets.push(four_partitions[3]); |
1843 | 0 | }; |
1844 | | |
1845 | 0 | partition_offsets |
1846 | 0 | } |
1847 | | |
1848 | | #[inline(always)] |
1849 | 0 | fn rdo_partition_none<T: Pixel>( |
1850 | 0 | fi: &FrameInvariants<T>, ts: &mut TileStateMut<'_, T>, |
1851 | 0 | cw: &mut ContextWriter, bsize: BlockSize, tile_bo: TileBlockOffset, |
1852 | 0 | inter_cfg: &InterConfig, child_modes: &mut ArrayVec<PartitionParameters, 4>, |
1853 | 0 | ) -> f64 { |
1854 | 0 | debug_assert!(tile_bo.0.x < ts.mi_width && tile_bo.0.y < ts.mi_height); |
1855 | | |
1856 | 0 | let mode = rdo_mode_decision(fi, ts, cw, bsize, tile_bo, inter_cfg); |
1857 | 0 | let cost = mode.rd_cost; |
1858 | | |
1859 | 0 | child_modes.push(mode); |
1860 | | |
1861 | 0 | cost |
1862 | 0 | } Unexecuted instantiation: rav1e::rdo::rdo_partition_none::<u16> Unexecuted instantiation: rav1e::rdo::rdo_partition_none::<u8> |
1863 | | |
1864 | | // VERTICAL, HORIZONTAL or simple SPLIT |
1865 | | #[inline(always)] |
1866 | 0 | fn rdo_partition_simple<T: Pixel, W: Writer>( |
1867 | 0 | fi: &FrameInvariants<T>, ts: &mut TileStateMut<'_, T>, |
1868 | 0 | cw: &mut ContextWriter, w_pre_cdef: &mut W, w_post_cdef: &mut W, |
1869 | 0 | bsize: BlockSize, tile_bo: TileBlockOffset, inter_cfg: &InterConfig, |
1870 | 0 | partition: PartitionType, rdo_type: RDOType, best_rd: f64, |
1871 | 0 | child_modes: &mut ArrayVec<PartitionParameters, 4>, |
1872 | 0 | ) -> Option<f64> { |
1873 | 0 | debug_assert!(tile_bo.0.x < ts.mi_width && tile_bo.0.y < ts.mi_height); |
1874 | 0 | let subsize = bsize.subsize(partition).unwrap(); |
1875 | | |
1876 | 0 | let cost = if bsize >= BlockSize::BLOCK_8X8 { |
1877 | 0 | let w: &mut W = if cw.bc.cdef_coded { w_post_cdef } else { w_pre_cdef }; |
1878 | 0 | let tell = w.tell_frac(); |
1879 | 0 | cw.write_partition(w, tile_bo, partition, bsize); |
1880 | 0 | compute_rd_cost(fi, w.tell_frac() - tell, ScaledDistortion::zero()) |
1881 | | } else { |
1882 | 0 | 0.0 |
1883 | | }; |
1884 | | |
1885 | 0 | let hbsw = subsize.width_mi(); // Half the block size width in blocks |
1886 | 0 | let hbsh = subsize.height_mi(); // Half the block size height in blocks |
1887 | 0 | let four_partitions = [ |
1888 | 0 | tile_bo, |
1889 | 0 | TileBlockOffset(BlockOffset { x: tile_bo.0.x + hbsw, y: tile_bo.0.y }), |
1890 | 0 | TileBlockOffset(BlockOffset { x: tile_bo.0.x, y: tile_bo.0.y + hbsh }), |
1891 | 0 | TileBlockOffset(BlockOffset { |
1892 | 0 | x: tile_bo.0.x + hbsw, |
1893 | 0 | y: tile_bo.0.y + hbsh, |
1894 | 0 | }), |
1895 | 0 | ]; |
1896 | | |
1897 | 0 | let partitions = get_sub_partitions(&four_partitions, partition); |
1898 | | |
1899 | 0 | let mut rd_cost_sum = 0.0; |
1900 | | |
1901 | 0 | for offset in partitions { |
1902 | 0 | let hbs = subsize.width_mi() >> 1; |
1903 | 0 | let has_cols = offset.0.x + hbs < ts.mi_width; |
1904 | 0 | let has_rows = offset.0.y + hbs < ts.mi_height; |
1905 | | |
1906 | 0 | if has_cols && has_rows { |
1907 | 0 | let mode_decision = |
1908 | 0 | rdo_mode_decision(fi, ts, cw, subsize, offset, inter_cfg); |
1909 | | |
1910 | 0 | rd_cost_sum += mode_decision.rd_cost; |
1911 | | |
1912 | 0 | if fi.enable_early_exit && rd_cost_sum > best_rd { |
1913 | 0 | return None; |
1914 | 0 | } |
1915 | 0 | if subsize >= BlockSize::BLOCK_8X8 && subsize.is_sqr() { |
1916 | 0 | let w: &mut W = |
1917 | 0 | if cw.bc.cdef_coded { w_post_cdef } else { w_pre_cdef }; |
1918 | 0 | cw.write_partition(w, offset, PartitionType::PARTITION_NONE, subsize); |
1919 | 0 | } |
1920 | 0 | encode_block_with_modes( |
1921 | 0 | fi, |
1922 | 0 | ts, |
1923 | 0 | cw, |
1924 | 0 | w_pre_cdef, |
1925 | 0 | w_post_cdef, |
1926 | 0 | subsize, |
1927 | 0 | offset, |
1928 | 0 | &mode_decision, |
1929 | 0 | rdo_type, |
1930 | 0 | None, |
1931 | | ); |
1932 | 0 | child_modes.push(mode_decision); |
1933 | | } else { |
1934 | | //rd_cost_sum += f64::MAX; |
1935 | 0 | return None; |
1936 | | } |
1937 | | } |
1938 | | |
1939 | 0 | Some(cost + rd_cost_sum) |
1940 | 0 | } Unexecuted instantiation: rav1e::rdo::rdo_partition_simple::<u16, rav1e::ec::WriterBase<rav1e::ec::WriterRecorder>> Unexecuted instantiation: rav1e::rdo::rdo_partition_simple::<u8, rav1e::ec::WriterBase<rav1e::ec::WriterRecorder>> |
1941 | | |
1942 | | /// RDO-based single level partitioning decision |
1943 | | /// |
1944 | | /// # Panics |
1945 | | /// |
1946 | | /// - If the best RD found is negative. |
1947 | | /// This should never happen, and indicates a development error. |
1948 | | #[profiling::function] |
1949 | | pub fn rdo_partition_decision<T: Pixel, W: Writer>( |
1950 | | fi: &FrameInvariants<T>, ts: &mut TileStateMut<'_, T>, |
1951 | | cw: &mut ContextWriter, w_pre_cdef: &mut W, w_post_cdef: &mut W, |
1952 | | bsize: BlockSize, tile_bo: TileBlockOffset, |
1953 | | cached_block: &PartitionGroupParameters, partition_types: &[PartitionType], |
1954 | | rdo_type: RDOType, inter_cfg: &InterConfig, |
1955 | | ) -> PartitionGroupParameters { |
1956 | | let mut best_partition = cached_block.part_type; |
1957 | | let mut best_rd = cached_block.rd_cost; |
1958 | | let mut best_pred_modes = cached_block.part_modes.clone(); |
1959 | | |
1960 | | let cw_checkpoint = cw.checkpoint(&tile_bo, fi.sequence.chroma_sampling); |
1961 | | let w_pre_checkpoint = w_pre_cdef.checkpoint(); |
1962 | | let w_post_checkpoint = w_post_cdef.checkpoint(); |
1963 | | |
1964 | | for &partition in partition_types { |
1965 | | // Do not re-encode results we already have |
1966 | | if partition == cached_block.part_type { |
1967 | | continue; |
1968 | | } |
1969 | | |
1970 | | let mut child_modes = ArrayVec::<_, 4>::new(); |
1971 | | |
1972 | | let cost = match partition { |
1973 | | PARTITION_NONE if bsize <= BlockSize::BLOCK_64X64 => { |
1974 | | Some(rdo_partition_none( |
1975 | | fi, |
1976 | | ts, |
1977 | | cw, |
1978 | | bsize, |
1979 | | tile_bo, |
1980 | | inter_cfg, |
1981 | | &mut child_modes, |
1982 | | )) |
1983 | | } |
1984 | | PARTITION_SPLIT | PARTITION_HORZ | PARTITION_VERT => { |
1985 | | rdo_partition_simple( |
1986 | | fi, |
1987 | | ts, |
1988 | | cw, |
1989 | | w_pre_cdef, |
1990 | | w_post_cdef, |
1991 | | bsize, |
1992 | | tile_bo, |
1993 | | inter_cfg, |
1994 | | partition, |
1995 | | rdo_type, |
1996 | | best_rd, |
1997 | | &mut child_modes, |
1998 | | ) |
1999 | | } |
2000 | | _ => { |
2001 | | unreachable!(); |
2002 | | } |
2003 | | }; |
2004 | | |
2005 | | if let Some(rd) = cost { |
2006 | | if rd < best_rd { |
2007 | | best_rd = rd; |
2008 | | best_partition = partition; |
2009 | | best_pred_modes.clone_from(&child_modes); |
2010 | | } |
2011 | | } |
2012 | | cw.rollback(&cw_checkpoint); |
2013 | | w_pre_cdef.rollback(&w_pre_checkpoint); |
2014 | | w_post_cdef.rollback(&w_post_checkpoint); |
2015 | | } |
2016 | | |
2017 | | assert!(best_rd >= 0_f64); |
2018 | | |
2019 | | PartitionGroupParameters { |
2020 | | rd_cost: best_rd, |
2021 | | part_type: best_partition, |
2022 | | part_modes: best_pred_modes, |
2023 | | } |
2024 | | } |
2025 | | |
2026 | | #[profiling::function] |
2027 | | fn rdo_loop_plane_error<T: Pixel>( |
2028 | | base_sbo: TileSuperBlockOffset, offset_sbo: TileSuperBlockOffset, |
2029 | | sb_w: usize, sb_h: usize, fi: &FrameInvariants<T>, ts: &TileStateMut<'_, T>, |
2030 | | blocks: &TileBlocks<'_>, test: &Frame<T>, src: &Tile<'_, T>, pli: usize, |
2031 | | ) -> ScaledDistortion { |
2032 | | let sb_w_blocks = |
2033 | | if fi.sequence.use_128x128_superblock { 16 } else { 8 } * sb_w; |
2034 | | let sb_h_blocks = |
2035 | | if fi.sequence.use_128x128_superblock { 16 } else { 8 } * sb_h; |
2036 | | // Each direction block is 8x8 in y, potentially smaller if subsampled in chroma |
2037 | | // accumulating in-frame and unpadded |
2038 | | let mut err = Distortion::zero(); |
2039 | | for by in 0..sb_h_blocks { |
2040 | | for bx in 0..sb_w_blocks { |
2041 | | let loop_bo = offset_sbo.block_offset(bx << 1, by << 1); |
2042 | | if loop_bo.0.x < blocks.cols() && loop_bo.0.y < blocks.rows() { |
2043 | | let src_plane = &src.planes[pli]; |
2044 | | let test_plane = &test.planes[pli]; |
2045 | | let PlaneConfig { xdec, ydec, .. } = *src_plane.plane_cfg; |
2046 | | debug_assert_eq!(xdec, test_plane.cfg.xdec); |
2047 | | debug_assert_eq!(ydec, test_plane.cfg.ydec); |
2048 | | |
2049 | | // Unfortunately, our distortion biases are only available via |
2050 | | // Frame-absolute addressing, so we need a block offset |
2051 | | // relative to the full frame origin (not the tile or analysis |
2052 | | // area) |
2053 | | let frame_bo = (base_sbo + offset_sbo).block_offset(bx << 1, by << 1); |
2054 | | let bias = distortion_scale( |
2055 | | fi, |
2056 | | ts.to_frame_block_offset(frame_bo), |
2057 | | BlockSize::BLOCK_8X8, |
2058 | | ); |
2059 | | |
2060 | | let src_region = |
2061 | | src_plane.subregion(Area::BlockStartingAt { bo: loop_bo.0 }); |
2062 | | let test_region = |
2063 | | test_plane.region(Area::BlockStartingAt { bo: loop_bo.0 }); |
2064 | | |
2065 | | err += if pli == 0 { |
2066 | | // For loop filters, We intentionally use cdef_dist even with |
2067 | | // `--tune Psnr`. Using SSE instead gives no PSNR gain but has a |
2068 | | // significant negative impact on other metrics and visual quality. |
2069 | | RawDistortion(cdef_dist_kernel( |
2070 | | &src_region, |
2071 | | &test_region, |
2072 | | 8, |
2073 | | 8, |
2074 | | fi.sequence.bit_depth, |
2075 | | fi.cpu_feature_level, |
2076 | | ) as u64) |
2077 | | * bias |
2078 | | } else { |
2079 | | sse_wxh( |
2080 | | &src_region, |
2081 | | &test_region, |
2082 | | 8 >> xdec, |
2083 | | 8 >> ydec, |
2084 | | |_, _| bias, |
2085 | | fi.sequence.bit_depth, |
2086 | | fi.cpu_feature_level, |
2087 | | ) |
2088 | | }; |
2089 | | } |
2090 | | } |
2091 | | } |
2092 | | err * fi.dist_scale[pli] |
2093 | | } |
2094 | | |
2095 | | /// Passed in a superblock offset representing the upper left corner of |
2096 | | /// the LRU area we're optimizing. This area covers the largest LRU in |
2097 | | /// any of the present planes, but may consist of a number of |
2098 | | /// superblocks and full, smaller LRUs in the other planes |
2099 | | /// |
2100 | | /// # Panics |
2101 | | /// |
2102 | | /// - If both CDEF and LRF are disabled. |
2103 | | #[profiling::function] |
2104 | | pub fn rdo_loop_decision<T: Pixel, W: Writer>( |
2105 | | base_sbo: TileSuperBlockOffset, fi: &FrameInvariants<T>, |
2106 | | ts: &mut TileStateMut<'_, T>, cw: &mut ContextWriter, w: &mut W, |
2107 | | deblock_p: bool, |
2108 | | ) { |
2109 | | let planes = if fi.sequence.chroma_sampling == ChromaSampling::Cs400 { |
2110 | | 1 |
2111 | | } else { |
2112 | | MAX_PLANES |
2113 | | }; |
2114 | | assert!(fi.sequence.enable_cdef || fi.sequence.enable_restoration); |
2115 | | // Determine area of optimization: Which plane has the largest LRUs? |
2116 | | // How many LRUs for each? |
2117 | | let mut sb_w = 1; // how many superblocks wide the largest LRU |
2118 | | // is/how many SBs we're processing (same thing) |
2119 | | let mut sb_h = 1; // how many superblocks wide the largest LRU |
2120 | | // is/how many SBs we're processing (same thing) |
2121 | | let mut lru_w = [0; MAX_PLANES]; // how many LRUs we're processing |
2122 | | let mut lru_h = [0; MAX_PLANES]; // how many LRUs we're processing |
2123 | | for pli in 0..planes { |
2124 | | let sb_h_shift = ts.restoration.planes[pli].rp_cfg.sb_h_shift; |
2125 | | let sb_v_shift = ts.restoration.planes[pli].rp_cfg.sb_v_shift; |
2126 | | if sb_w < (1 << sb_h_shift) { |
2127 | | sb_w = 1 << sb_h_shift; |
2128 | | } |
2129 | | if sb_h < (1 << sb_v_shift) { |
2130 | | sb_h = 1 << sb_v_shift; |
2131 | | } |
2132 | | } |
2133 | | for pli in 0..planes { |
2134 | | let sb_h_shift = ts.restoration.planes[pli].rp_cfg.sb_h_shift; |
2135 | | let sb_v_shift = ts.restoration.planes[pli].rp_cfg.sb_v_shift; |
2136 | | lru_w[pli] = sb_w / (1 << sb_h_shift); |
2137 | | lru_h[pli] = sb_h / (1 << sb_v_shift); |
2138 | | } |
2139 | | |
2140 | | // The superblock width/height determinations may be calling for us |
2141 | | // to compute over superblocks that do not actually exist in the |
2142 | | // frame (off the right or lower edge). Trim sb width/height down |
2143 | | // to actual superblocks. Note that these last superblocks on the |
2144 | | // right/bottom may themselves still span the edge of the frame, but |
2145 | | // they do hold at least some visible pixels. |
2146 | | sb_w = sb_w.min(ts.sb_width - base_sbo.0.x); |
2147 | | sb_h = sb_h.min(ts.sb_height - base_sbo.0.y); |
2148 | | |
2149 | | // We have need to know the Y visible pixel limits as well (the |
2150 | | // sb_w/sb_h figures above can be used to determine how many |
2151 | | // allocated pixels, possibly beyond the visible frame, exist). |
2152 | | let crop_w = |
2153 | | fi.width - ((ts.sbo.0.x + base_sbo.0.x) << SUPERBLOCK_TO_PLANE_SHIFT); |
2154 | | let crop_h = |
2155 | | fi.height - ((ts.sbo.0.y + base_sbo.0.y) << SUPERBLOCK_TO_PLANE_SHIFT); |
2156 | | let pixel_w = crop_w.min(sb_w << SUPERBLOCK_TO_PLANE_SHIFT); |
2157 | | let pixel_h = crop_h.min(sb_h << SUPERBLOCK_TO_PLANE_SHIFT); |
2158 | | |
2159 | | // Based on `RestorationState::new` |
2160 | | const MAX_SB_SHIFT: usize = 4; |
2161 | | const MAX_SB_SIZE: usize = 1 << MAX_SB_SHIFT; |
2162 | | const MAX_LRU_SIZE: usize = MAX_SB_SIZE; |
2163 | | |
2164 | | // Static allocation relies on the "minimal LRU area for all N planes" invariant. |
2165 | | let mut best_index = [-1; MAX_SB_SIZE * MAX_SB_SIZE]; |
2166 | | let mut best_lrf = |
2167 | | [[RestorationFilter::None; MAX_PLANES]; MAX_LRU_SIZE * MAX_LRU_SIZE]; |
2168 | | |
2169 | | // due to imprecision in the reconstruction parameter solver, we |
2170 | | // need to make sure we don't fall into a limit cycle. Track our |
2171 | | // best cost at LRF so that we can break if we get a solution that doesn't |
2172 | | // improve at the reconstruction stage. |
2173 | | let mut best_lrf_cost = [[-1.0; MAX_PLANES]; MAX_LRU_SIZE * MAX_LRU_SIZE]; |
2174 | | |
2175 | | // sub-setted region of the TileBlocks for our working frame area. |
2176 | | // Note that the size of this subset is what signals CDEF as to the |
2177 | | // actual coded size. |
2178 | | let mut tileblocks_subset = cw.bc.blocks.subregion_mut( |
2179 | | base_sbo.block_offset(0, 0).0.x, |
2180 | | base_sbo.block_offset(0, 0).0.y, |
2181 | | sb_w << SUPERBLOCK_TO_BLOCK_SHIFT, |
2182 | | sb_h << SUPERBLOCK_TO_BLOCK_SHIFT, |
2183 | | ); |
2184 | | |
2185 | | // cdef doesn't run on superblocks that are completely skipped. |
2186 | | // Determine which super blocks are marked as skipped so we can avoid running |
2187 | | // them. If all blocks are skipped, we can avoid some of the overhead related |
2188 | | // to setting up for cdef. |
2189 | | let mut cdef_skip = [true; MAX_SB_SIZE * MAX_SB_SIZE]; |
2190 | | let mut cdef_skip_all = true; |
2191 | | if fi.sequence.enable_cdef { |
2192 | | for sby in 0..sb_h { |
2193 | | for sbx in 0..sb_w { |
2194 | | let blocks = tileblocks_subset.subregion(16 * sbx, 16 * sby, 16, 16); |
2195 | | let mut skip = true; |
2196 | | for y in 0..blocks.rows() { |
2197 | | for block in blocks[y].iter() { |
2198 | | skip &= block.skip; |
2199 | | } |
2200 | | } |
2201 | | cdef_skip[sby * MAX_SB_SIZE + sbx] = skip; |
2202 | | cdef_skip_all &= skip; |
2203 | | } |
2204 | | } |
2205 | | } |
2206 | | |
2207 | | // Unlike cdef, loop restoration will run regardless of whether blocks are |
2208 | | // skipped or not. At the same time, the most significant improvement will |
2209 | | // generally be from un-skipped blocks, so lru is only performed if there are |
2210 | | // un-skipped blocks. |
2211 | | // This should be the same as `cdef_skip_all`, except when cdef is disabled. |
2212 | | let mut lru_skip_all = true; |
2213 | | let mut lru_skip = [[true; MAX_PLANES]; MAX_LRU_SIZE * MAX_LRU_SIZE]; |
2214 | | if fi.sequence.enable_restoration { |
2215 | | if fi.config.speed_settings.lru_on_skip { |
2216 | | lru_skip_all = false; |
2217 | | lru_skip = [[false; MAX_PLANES]; MAX_LRU_SIZE * MAX_LRU_SIZE]; |
2218 | | } else { |
2219 | | for pli in 0..planes { |
2220 | | // width, in sb, of an LRU in this plane |
2221 | | let lru_sb_w = 1 << ts.restoration.planes[pli].rp_cfg.sb_h_shift; |
2222 | | // height, in sb, of an LRU in this plane |
2223 | | let lru_sb_h = 1 << ts.restoration.planes[pli].rp_cfg.sb_v_shift; |
2224 | | for lru_y in 0..lru_h[pli] { |
2225 | | // number of LRUs vertically |
2226 | | for lru_x in 0..lru_w[pli] { |
2227 | | // number of LRUs horizontally |
2228 | | |
2229 | | let loop_sbo = TileSuperBlockOffset(SuperBlockOffset { |
2230 | | x: lru_x * lru_sb_w, |
2231 | | y: lru_y * lru_sb_h, |
2232 | | }); |
2233 | | |
2234 | | if !ts.restoration.has_restoration_unit( |
2235 | | base_sbo + loop_sbo, |
2236 | | pli, |
2237 | | false, |
2238 | | ) { |
2239 | | continue; |
2240 | | } |
2241 | | |
2242 | | let start = loop_sbo.block_offset(0, 0).0; |
2243 | | let size = TileSuperBlockOffset(SuperBlockOffset { |
2244 | | x: lru_sb_w, |
2245 | | y: lru_sb_h, |
2246 | | }) |
2247 | | .block_offset(0, 0) |
2248 | | .0; |
2249 | | |
2250 | | let blocks = |
2251 | | tileblocks_subset.subregion(start.x, start.y, size.x, size.y); |
2252 | | let mut skip = true; |
2253 | | for y in 0..blocks.rows() { |
2254 | | for block in blocks[y].iter() { |
2255 | | skip &= block.skip; |
2256 | | } |
2257 | | } |
2258 | | lru_skip[lru_y * MAX_LRU_SIZE + lru_x][pli] = skip; |
2259 | | lru_skip_all &= skip; |
2260 | | } |
2261 | | } |
2262 | | } |
2263 | | } |
2264 | | } |
2265 | | |
2266 | | // Return early if all blocks are skipped for lru and cdef. |
2267 | | if lru_skip_all && cdef_skip_all { |
2268 | | return; |
2269 | | } |
2270 | | |
2271 | | // Loop filter RDO is an iterative process and we need temporary |
2272 | | // scratch data to hold the results of deblocking, cdef, and the |
2273 | | // loop reconstruction filter so that each can be partially updated |
2274 | | // without recomputing the entire stack. Construct |
2275 | | // largest-LRU-sized frames for each, accounting for padding |
2276 | | // required by deblocking, cdef and [optionally] LR. |
2277 | | let mut rec_subset = ts |
2278 | | .rec |
2279 | | .subregion(Area::BlockRect { |
2280 | | bo: base_sbo.block_offset(0, 0).0, |
2281 | | width: (pixel_w + 7) >> 3 << 3, |
2282 | | height: (pixel_h + 7) >> 3 << 3, |
2283 | | }) |
2284 | | .scratch_copy(); |
2285 | | |
2286 | | // const, no need to copy, just need the subregion (but do zero the |
2287 | | // origin to match the other copies/new backing frames). |
2288 | | let src_subset = ts |
2289 | | .input_tile |
2290 | | .subregion(Area::BlockRect { |
2291 | | bo: base_sbo.block_offset(0, 0).0, |
2292 | | width: (pixel_w + 7) >> 3 << 3, |
2293 | | height: (pixel_h + 7) >> 3 << 3, |
2294 | | }) |
2295 | | .home(); |
2296 | | |
2297 | | if deblock_p { |
2298 | | // Find a good deblocking filter solution for the passed in area. |
2299 | | // This is not RDO of deblocking itself, merely a solution to get |
2300 | | // better results from CDEF/LRF RDO. |
2301 | | let deblock_levels = deblock_filter_optimize( |
2302 | | fi, |
2303 | | &rec_subset.as_tile(), |
2304 | | &src_subset, |
2305 | | &tileblocks_subset.as_const(), |
2306 | | crop_w, |
2307 | | crop_h, |
2308 | | ); |
2309 | | |
2310 | | // Deblock the contents of our reconstruction copy. |
2311 | | if deblock_levels[0] != 0 || deblock_levels[1] != 0 { |
2312 | | // copy ts.deblock because we need to set some of our own values here |
2313 | | let mut deblock_copy = *ts.deblock; |
2314 | | deblock_copy.levels = deblock_levels; |
2315 | | |
2316 | | // finally, deblock the temp frame |
2317 | | deblock_filter_frame( |
2318 | | &deblock_copy, |
2319 | | &mut rec_subset.as_tile_mut(), |
2320 | | &tileblocks_subset.as_const(), |
2321 | | crop_w, |
2322 | | crop_h, |
2323 | | fi.sequence.bit_depth, |
2324 | | planes, |
2325 | | ); |
2326 | | } |
2327 | | } |
2328 | | |
2329 | | let mut cdef_work = |
2330 | | if !cdef_skip_all { Some(rec_subset.clone()) } else { None }; |
2331 | | let mut lrf_work = if !lru_skip_all { |
2332 | | Some(Frame { |
2333 | | planes: { |
2334 | 0 | let new_plane = |pli: usize| { |
2335 | 0 | let PlaneConfig { xdec, ydec, width, height, .. } = |
2336 | 0 | rec_subset.planes[pli].cfg; |
2337 | 0 | Plane::new(width, height, xdec, ydec, 0, 0) |
2338 | 0 | }; Unexecuted instantiation: rav1e::rdo::rdo_loop_decision::<u16, rav1e::ec::WriterBase<rav1e::ec::WriterEncoder>>::{closure#0}Unexecuted instantiation: rav1e::rdo::rdo_loop_decision::<u8, rav1e::ec::WriterBase<rav1e::ec::WriterEncoder>>::{closure#0} |
2339 | | [new_plane(0), new_plane(1), new_plane(2)] |
2340 | | }, |
2341 | | }) |
2342 | | } else { |
2343 | | None |
2344 | | }; |
2345 | | |
2346 | | // Precompute directional analysis for CDEF |
2347 | | let cdef_data = { |
2348 | | if cdef_work.is_some() { |
2349 | | Some(( |
2350 | | &rec_subset, |
2351 | | cdef_analyze_superblock_range( |
2352 | | fi, |
2353 | | &rec_subset, |
2354 | | &tileblocks_subset.as_const(), |
2355 | | sb_w, |
2356 | | sb_h, |
2357 | | ), |
2358 | | )) |
2359 | | } else { |
2360 | | None |
2361 | | } |
2362 | | }; |
2363 | | |
2364 | | // CDEF/LRF decision iteration |
2365 | | // Start with a default of CDEF 0 and RestorationFilter::None |
2366 | | // Try all CDEF options for each sb with current LRF; if new CDEF+LRF choice is better, select it. |
2367 | | // Then try all LRF options with current CDEFs; if new CDEFs+LRF choice is better, select it. |
2368 | | // If LRF choice changed for any plane, repeat until no changes |
2369 | | // Limit iterations and where we break based on speed setting (in the TODO list ;-) |
2370 | | let mut cdef_change = true; |
2371 | | let mut lrf_change = true; |
2372 | | while cdef_change || lrf_change { |
2373 | | // search for improved cdef indices, superblock by superblock, if cdef is enabled. |
2374 | | if let (Some((rec_copy, cdef_dirs)), Some(cdef_ref)) = |
2375 | | (&cdef_data, &mut cdef_work.as_mut()) |
2376 | | { |
2377 | | for sby in 0..sb_h { |
2378 | | for sbx in 0..sb_w { |
2379 | | // determine whether this superblock can be skipped |
2380 | | if cdef_skip[sby * MAX_SB_SIZE + sbx] { |
2381 | | continue; |
2382 | | } |
2383 | | |
2384 | | let prev_best_index = best_index[sby * sb_w + sbx]; |
2385 | | let mut best_cost = -1.; |
2386 | | let mut best_new_index = -1i8; |
2387 | | |
2388 | | /* offset of the superblock we're currently testing within the larger |
2389 | | analysis area */ |
2390 | | let loop_sbo = |
2391 | | TileSuperBlockOffset(SuperBlockOffset { x: sbx, y: sby }); |
2392 | | |
2393 | | /* cdef index testing loop */ |
2394 | | for cdef_index in 0..(1 << fi.cdef_bits) { |
2395 | | let mut err = ScaledDistortion::zero(); |
2396 | | let mut rate = 0; |
2397 | | |
2398 | | cdef_filter_superblock( |
2399 | | fi, |
2400 | | &rec_subset, |
2401 | | &mut cdef_ref.as_tile_mut(), |
2402 | | &tileblocks_subset.as_const(), |
2403 | | loop_sbo, |
2404 | | cdef_index, |
2405 | | &cdef_dirs[sby * sb_w + sbx], |
2406 | | ); |
2407 | | // apply LRF if any |
2408 | | for pli in 0..planes { |
2409 | | // We need the cropped-to-visible-frame area of this SB |
2410 | | let wh = |
2411 | | if fi.sequence.use_128x128_superblock { 128 } else { 64 }; |
2412 | | let PlaneConfig { xdec, ydec, .. } = cdef_ref.planes[pli].cfg; |
2413 | | let vis_width = (wh >> xdec).min( |
2414 | | (crop_w >> xdec) |
2415 | | - loop_sbo.plane_offset(&cdef_ref.planes[pli].cfg).x |
2416 | | as usize, |
2417 | | ); |
2418 | | let vis_height = (wh >> ydec).min( |
2419 | | (crop_h >> ydec) |
2420 | | - loop_sbo.plane_offset(&cdef_ref.planes[pli].cfg).y |
2421 | | as usize, |
2422 | | ); |
2423 | | // which LRU are we currently testing against? |
2424 | | if let (Some((lru_x, lru_y)), Some(lrf_ref)) = { |
2425 | | let rp = &ts.restoration.planes[pli]; |
2426 | | ( |
2427 | | rp.restoration_unit_offset(base_sbo, loop_sbo, false), |
2428 | | &mut lrf_work, |
2429 | | ) |
2430 | | } { |
2431 | | // We have a valid LRU, apply LRF, compute error |
2432 | | match best_lrf[lru_y * lru_w[pli] + lru_x][pli] { |
2433 | | RestorationFilter::None => { |
2434 | | err += rdo_loop_plane_error( |
2435 | | base_sbo, |
2436 | | loop_sbo, |
2437 | | 1, |
2438 | | 1, |
2439 | | fi, |
2440 | | ts, |
2441 | | &tileblocks_subset.as_const(), |
2442 | | cdef_ref, |
2443 | | &src_subset, |
2444 | | pli, |
2445 | | ); |
2446 | | rate += if fi.sequence.enable_restoration { |
2447 | | cw.fc.count_lrf_switchable( |
2448 | | w, |
2449 | | &ts.restoration.as_const(), |
2450 | | best_lrf[lru_y * lru_w[pli] + lru_x][pli], |
2451 | | pli, |
2452 | | ) |
2453 | | } else { |
2454 | | 0 // no relative cost differeneces to different |
2455 | | // CDEF params. If cdef is on, it's a wash. |
2456 | | }; |
2457 | | } |
2458 | | RestorationFilter::Sgrproj { set, xqd } => { |
2459 | | // only run on this single superblock |
2460 | | let loop_po = |
2461 | | loop_sbo.plane_offset(&cdef_ref.planes[pli].cfg); |
2462 | | // todo: experiment with borrowing border pixels |
2463 | | // rather than edge-extending. Right now this is |
2464 | | // hard-clipping to the superblock boundary. |
2465 | | setup_integral_image( |
2466 | | &mut ts.integral_buffer, |
2467 | | SOLVE_IMAGE_STRIDE, |
2468 | | vis_width, |
2469 | | vis_height, |
2470 | | vis_width, |
2471 | | vis_height, |
2472 | | &cdef_ref.planes[pli].slice(loop_po), |
2473 | | &cdef_ref.planes[pli].slice(loop_po), |
2474 | | ); |
2475 | | sgrproj_stripe_filter( |
2476 | | set, |
2477 | | xqd, |
2478 | | fi, |
2479 | | &ts.integral_buffer, |
2480 | | SOLVE_IMAGE_STRIDE, |
2481 | | &cdef_ref.planes[pli].slice(loop_po), |
2482 | | &mut lrf_ref.planes[pli].region_mut(Area::Rect { |
2483 | | x: loop_po.x, |
2484 | | y: loop_po.y, |
2485 | | width: vis_width, |
2486 | | height: vis_height, |
2487 | | }), |
2488 | | ); |
2489 | | err += rdo_loop_plane_error( |
2490 | | base_sbo, |
2491 | | loop_sbo, |
2492 | | 1, |
2493 | | 1, |
2494 | | fi, |
2495 | | ts, |
2496 | | &tileblocks_subset.as_const(), |
2497 | | lrf_ref, |
2498 | | &src_subset, |
2499 | | pli, |
2500 | | ); |
2501 | | rate += cw.fc.count_lrf_switchable( |
2502 | | w, |
2503 | | &ts.restoration.as_const(), |
2504 | | best_lrf[lru_y * lru_w[pli] + lru_x][pli], |
2505 | | pli, |
2506 | | ); |
2507 | | } |
2508 | | RestorationFilter::Wiener { .. } => unreachable!(), // coming soon |
2509 | | } |
2510 | | } else { |
2511 | | // No actual LRU here, compute error directly from CDEF output. |
2512 | | err += rdo_loop_plane_error( |
2513 | | base_sbo, |
2514 | | loop_sbo, |
2515 | | 1, |
2516 | | 1, |
2517 | | fi, |
2518 | | ts, |
2519 | | &tileblocks_subset.as_const(), |
2520 | | cdef_ref, |
2521 | | &src_subset, |
2522 | | pli, |
2523 | | ); |
2524 | | // no relative cost differeneces to different |
2525 | | // CDEF params. If cdef is on, it's a wash. |
2526 | | // rate += 0; |
2527 | | } |
2528 | | } |
2529 | | |
2530 | | let cost = compute_rd_cost(fi, rate, err); |
2531 | | if best_cost < 0. || cost < best_cost { |
2532 | | best_cost = cost; |
2533 | | best_new_index = cdef_index as i8; |
2534 | | } |
2535 | | } |
2536 | | |
2537 | | // Did we change any preexisting choices? |
2538 | | if best_new_index != prev_best_index { |
2539 | | cdef_change = true; |
2540 | | best_index[sby * sb_w + sbx] = best_new_index; |
2541 | | tileblocks_subset.set_cdef(loop_sbo, best_new_index as u8); |
2542 | | } |
2543 | | |
2544 | | let mut cdef_ref_tm = TileMut::new( |
2545 | | cdef_ref, |
2546 | | TileRect { |
2547 | | x: 0, |
2548 | | y: 0, |
2549 | | width: cdef_ref.planes[0].cfg.width, |
2550 | | height: cdef_ref.planes[0].cfg.height, |
2551 | | }, |
2552 | | ); |
2553 | | |
2554 | | // Keep cdef output up to date; we need it for restoration |
2555 | | // both below and above (padding) |
2556 | | cdef_filter_superblock( |
2557 | | fi, |
2558 | | rec_copy, |
2559 | | &mut cdef_ref_tm, |
2560 | | &tileblocks_subset.as_const(), |
2561 | | loop_sbo, |
2562 | | best_index[sby * sb_w + sbx] as u8, |
2563 | | &cdef_dirs[sby * sb_w + sbx], |
2564 | | ); |
2565 | | } |
2566 | | } |
2567 | | } |
2568 | | |
2569 | | if !cdef_change { |
2570 | | break; |
2571 | | } |
2572 | | cdef_change = false; |
2573 | | lrf_change = false; |
2574 | | |
2575 | | // search for improved restoration filter parameters if restoration is enabled |
2576 | | if let Some(lrf_ref) = &mut lrf_work.as_mut() { |
2577 | | let lrf_input = if cdef_work.is_some() { |
2578 | | // When CDEF is enabled, we pull from the CDEF output |
2579 | | cdef_work.as_ref().unwrap() |
2580 | | } else { |
2581 | | // When CDEF is disabled, we pull from the [optionally |
2582 | | // deblocked] reconstruction |
2583 | | &rec_subset |
2584 | | }; |
2585 | | for pli in 0..planes { |
2586 | | // Nominal size of LRU in pixels before clipping to visible frame |
2587 | | let unit_size = ts.restoration.planes[pli].rp_cfg.unit_size; |
2588 | | // width, in sb, of an LRU in this plane |
2589 | | let lru_sb_w = 1 << ts.restoration.planes[pli].rp_cfg.sb_h_shift; |
2590 | | // height, in sb, of an LRU in this plane |
2591 | | let lru_sb_h = 1 << ts.restoration.planes[pli].rp_cfg.sb_v_shift; |
2592 | | let PlaneConfig { xdec, ydec, .. } = lrf_ref.planes[pli].cfg; |
2593 | | for lru_y in 0..lru_h[pli] { |
2594 | | // number of LRUs vertically |
2595 | | for lru_x in 0..lru_w[pli] { |
2596 | | // number of LRUs horizontally |
2597 | | |
2598 | | // determine whether this lru should be skipped |
2599 | | if lru_skip[lru_y * MAX_LRU_SIZE + lru_x][pli] { |
2600 | | continue; |
2601 | | } |
2602 | | |
2603 | | let loop_sbo = TileSuperBlockOffset(SuperBlockOffset { |
2604 | | x: lru_x * lru_sb_w, |
2605 | | y: lru_y * lru_sb_h, |
2606 | | }); |
2607 | | if ts.restoration.has_restoration_unit( |
2608 | | base_sbo + loop_sbo, |
2609 | | pli, |
2610 | | false, |
2611 | | ) { |
2612 | | let src_plane = &src_subset.planes[pli]; // uncompressed input for reference |
2613 | | let lrf_in_plane = &lrf_input.planes[pli]; |
2614 | | let lrf_po = loop_sbo.plane_offset(src_plane.plane_cfg); |
2615 | | let mut best_new_lrf = best_lrf[lru_y * lru_w[pli] + lru_x][pli]; |
2616 | | let mut best_cost = |
2617 | | best_lrf_cost[lru_y * lru_w[pli] + lru_x][pli]; |
2618 | | |
2619 | | // Check the no filter option |
2620 | | { |
2621 | | let err = rdo_loop_plane_error( |
2622 | | base_sbo, |
2623 | | loop_sbo, |
2624 | | lru_sb_w, |
2625 | | lru_sb_h, |
2626 | | fi, |
2627 | | ts, |
2628 | | &tileblocks_subset.as_const(), |
2629 | | lrf_input, |
2630 | | &src_subset, |
2631 | | pli, |
2632 | | ); |
2633 | | let rate = cw.fc.count_lrf_switchable( |
2634 | | w, |
2635 | | &ts.restoration.as_const(), |
2636 | | best_new_lrf, |
2637 | | pli, |
2638 | | ); |
2639 | | |
2640 | | let cost = compute_rd_cost(fi, rate, err); |
2641 | | // Was this choice actually an improvement? |
2642 | | if best_cost < 0. || cost < best_cost { |
2643 | | best_cost = cost; |
2644 | | best_lrf_cost[lru_y * lru_w[pli] + lru_x][pli] = cost; |
2645 | | best_new_lrf = RestorationFilter::None; |
2646 | | } |
2647 | | } |
2648 | | |
2649 | | // Look for a self guided filter |
2650 | | // We need the cropped-to-visible-frame computation area of this LRU |
2651 | | let vis_width = unit_size.min( |
2652 | | (crop_w >> xdec) |
2653 | | - loop_sbo.plane_offset(&lrf_ref.planes[pli].cfg).x as usize, |
2654 | | ); |
2655 | | let vis_height = unit_size.min( |
2656 | | (crop_h >> ydec) |
2657 | | - loop_sbo.plane_offset(&lrf_ref.planes[pli].cfg).y as usize, |
2658 | | ); |
2659 | | |
2660 | | // todo: experiment with borrowing border pixels |
2661 | | // rather than edge-extending. Right now this is |
2662 | | // hard-clipping to the superblock boundary. |
2663 | | setup_integral_image( |
2664 | | &mut ts.integral_buffer, |
2665 | | SOLVE_IMAGE_STRIDE, |
2666 | | vis_width, |
2667 | | vis_height, |
2668 | | vis_width, |
2669 | | vis_height, |
2670 | | &lrf_in_plane.slice(lrf_po), |
2671 | | &lrf_in_plane.slice(lrf_po), |
2672 | | ); |
2673 | | |
2674 | | for &set in get_sgr_sets(fi.config.speed_settings.sgr_complexity) |
2675 | | { |
2676 | | let (xqd0, xqd1) = sgrproj_solve( |
2677 | | set, |
2678 | | fi, |
2679 | | &ts.integral_buffer, |
2680 | | &src_plane |
2681 | | .subregion(Area::StartingAt { x: lrf_po.x, y: lrf_po.y }), |
2682 | | &lrf_in_plane.slice(lrf_po), |
2683 | | vis_width, |
2684 | | vis_height, |
2685 | | ); |
2686 | | let current_lrf = |
2687 | | RestorationFilter::Sgrproj { set, xqd: [xqd0, xqd1] }; |
2688 | | if let RestorationFilter::Sgrproj { set, xqd } = current_lrf { |
2689 | | sgrproj_stripe_filter( |
2690 | | set, |
2691 | | xqd, |
2692 | | fi, |
2693 | | &ts.integral_buffer, |
2694 | | SOLVE_IMAGE_STRIDE, |
2695 | | &lrf_in_plane.slice(lrf_po), |
2696 | | &mut lrf_ref.planes[pli].region_mut(Area::Rect { |
2697 | | x: lrf_po.x, |
2698 | | y: lrf_po.y, |
2699 | | width: vis_width, |
2700 | | height: vis_height, |
2701 | | }), |
2702 | | ); |
2703 | | } |
2704 | | let err = rdo_loop_plane_error( |
2705 | | base_sbo, |
2706 | | loop_sbo, |
2707 | | lru_sb_w, |
2708 | | lru_sb_h, |
2709 | | fi, |
2710 | | ts, |
2711 | | &tileblocks_subset.as_const(), |
2712 | | lrf_ref, |
2713 | | &src_subset, |
2714 | | pli, |
2715 | | ); |
2716 | | let rate = cw.fc.count_lrf_switchable( |
2717 | | w, |
2718 | | &ts.restoration.as_const(), |
2719 | | current_lrf, |
2720 | | pli, |
2721 | | ); |
2722 | | let cost = compute_rd_cost(fi, rate, err); |
2723 | | if cost < best_cost { |
2724 | | best_cost = cost; |
2725 | | best_lrf_cost[lru_y * lru_w[pli] + lru_x][pli] = cost; |
2726 | | best_new_lrf = current_lrf; |
2727 | | } |
2728 | | } |
2729 | | |
2730 | | if best_lrf[lru_y * lru_w[pli] + lru_x][pli] |
2731 | | .notequal(best_new_lrf) |
2732 | | { |
2733 | | best_lrf[lru_y * lru_w[pli] + lru_x][pli] = best_new_lrf; |
2734 | | lrf_change = true; |
2735 | | if let Some(ru) = ts.restoration.planes[pli] |
2736 | | .restoration_unit_mut(base_sbo + loop_sbo) |
2737 | | { |
2738 | | ru.filter = best_new_lrf; |
2739 | | } |
2740 | | } |
2741 | | } |
2742 | | } |
2743 | | } |
2744 | | } |
2745 | | } |
2746 | | } |
2747 | | } |
2748 | | |
2749 | | #[test] |
2750 | | fn estimate_rate_test() { |
2751 | | assert_eq!(estimate_rate(0, TxSize::TX_4X4, 0), RDO_RATE_TABLE[0][0][0]); |
2752 | | } |