/rust/registry/src/index.crates.io-6f17d22bba15001f/rav1e-0.7.1/src/rate.rs
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright (c) 2019-2022, The rav1e contributors. All rights reserved |
2 | | // |
3 | | // This source code is subject to the terms of the BSD 2 Clause License and |
4 | | // the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License |
5 | | // was not distributed with this source code in the LICENSE file, you can |
6 | | // obtain it at www.aomedia.org/license/software. If the Alliance for Open |
7 | | // Media Patent License 1.0 was not distributed with this source code in the |
8 | | // PATENTS file, you can obtain it at www.aomedia.org/license/patent. |
9 | | |
10 | | use crate::api::color::ChromaSampling; |
11 | | use crate::api::ContextInner; |
12 | | use crate::encoder::TEMPORAL_DELIMITER; |
13 | | use crate::quantize::{ac_q, dc_q, select_ac_qi, select_dc_qi}; |
14 | | use crate::util::{ |
15 | | bexp64, bexp_q24, blog64, clamp, q24_to_q57, q57, q57_to_q24, Pixel, |
16 | | }; |
17 | | use std::cmp; |
18 | | |
19 | | // The number of frame sub-types for which we track distinct parameters. |
20 | | // This does not include FRAME_SUBTYPE_SEF, because we don't need to do any |
21 | | // parameter tracking for Show Existing Frame frames. |
22 | | pub const FRAME_NSUBTYPES: usize = 4; |
23 | | |
24 | | pub const FRAME_SUBTYPE_I: usize = 0; |
25 | | pub const FRAME_SUBTYPE_P: usize = 1; |
26 | | #[allow(unused)] |
27 | | pub const FRAME_SUBTYPE_B0: usize = 2; |
28 | | #[allow(unused)] |
29 | | pub const FRAME_SUBTYPE_B1: usize = 3; |
30 | | pub const FRAME_SUBTYPE_SEF: usize = 4; |
31 | | |
32 | | const PASS_SINGLE: i32 = 0; |
33 | | const PASS_1: i32 = 1; |
34 | | const PASS_2: i32 = 2; |
35 | | const PASS_2_PLUS_1: i32 = 3; |
36 | | |
37 | | // Magic value at the start of the 2-pass stats file |
38 | | const TWOPASS_MAGIC: i32 = 0x50324156; |
39 | | // Version number for the 2-pass stats file |
40 | | const TWOPASS_VERSION: i32 = 1; |
41 | | // 4 byte magic + 4 byte version + 4 byte TU count + 4 byte SEF frame count |
42 | | // + FRAME_NSUBTYPES*(4 byte frame count + 1 byte exp + 8 byte scale_sum) |
43 | | pub(crate) const TWOPASS_HEADER_SZ: usize = 16 + FRAME_NSUBTYPES * (4 + 1 + 8); |
44 | | // 4 byte frame type (show_frame and fti jointly coded) + 4 byte log_scale_q24 |
45 | | const TWOPASS_PACKET_SZ: usize = 8; |
46 | | |
47 | | const SEF_BITS: i64 = 24; |
48 | | |
49 | | // The scale of AV1 quantizer tables (relative to the pixel domain), i.e., Q3. |
50 | | pub(crate) const QSCALE: i32 = 3; |
51 | | |
52 | | // We clamp the actual I and B frame delays to a minimum of 10 to work |
53 | | // within the range of values where later incrementing the delay works as |
54 | | // designed. |
55 | | // 10 is not an exact choice, but rather a good working trade-off. |
56 | | const INTER_DELAY_TARGET_MIN: i32 = 10; |
57 | | |
58 | | // The base quantizer for a frame is adjusted based on the frame type using the |
59 | | // formula (log_qp*mqp + dqp), where log_qp is the base-2 logarithm of the |
60 | | // "linear" quantizer (the actual factor by which coefficients are divided). |
61 | | // Because log_qp has an implicit offset built in based on the scale of the |
62 | | // coefficients (which depends on the pixel bit depth and the transform |
63 | | // scale), we normalize the quantizer to the equivalent for 8-bit pixels with |
64 | | // orthonormal transforms for the purposes of rate modeling. |
65 | | const MQP_Q12: &[i32; FRAME_NSUBTYPES] = &[ |
66 | | // TODO: Use a const function once f64 operations in const functions are |
67 | | // stable. |
68 | | (1.0 * (1 << 12) as f64) as i32, |
69 | | (1.0 * (1 << 12) as f64) as i32, |
70 | | (1.0 * (1 << 12) as f64) as i32, |
71 | | (1.0 * (1 << 12) as f64) as i32, |
72 | | ]; |
73 | | |
74 | | // The ratio 33_810_170.0 / 86_043_287.0 was derived by approximating the median |
75 | | // of a change of 15 quantizer steps in the quantizer tables. |
76 | | const DQP_Q57: &[i64; FRAME_NSUBTYPES] = &[ |
77 | | (-(33_810_170.0 / 86_043_287.0) * (1i64 << 57) as f64) as i64, |
78 | | (0.0 * (1i64 << 57) as f64) as i64, |
79 | | ((33_810_170.0 / 86_043_287.0) * (1i64 << 57) as f64) as i64, |
80 | | (2.0 * (33_810_170.0 / 86_043_287.0) * (1i64 << 57) as f64) as i64, |
81 | | ]; |
82 | | |
83 | | // For 8-bit-depth inter frames, log_q_y is derived from log_target_q with a |
84 | | // linear model: |
85 | | // log_q_y = log_target_q + (log_target_q >> 32) * Q_MODEL_MUL + Q_MODEL_ADD |
86 | | // Derivation of the linear models: |
87 | | // https://github.com/xiph/rav1e/blob/d02bdbd3b0b7b2cb9fc301031cc6a4e67a567a5c/doc/quantizer-weight-analysis.ipynb |
88 | | #[rustfmt::skip] |
89 | | const Q_MODEL_ADD: [i64; 4] = [ |
90 | | // 4:2:0 |
91 | | -0x24_4FE7_ECB3_DD90, |
92 | | // 4:2:2 |
93 | | -0x37_41DA_38AD_0924, |
94 | | // 4:4:4 |
95 | | -0x70_83BD_A626_311C, |
96 | | // 4:0:0 |
97 | | 0, |
98 | | ]; |
99 | | #[rustfmt::skip] |
100 | | const Q_MODEL_MUL: [i64; 4] = [ |
101 | | // 4:2:0 |
102 | | 0x8A0_50DD, |
103 | | // 4:2:2 |
104 | | 0x887_7666, |
105 | | // 4:4:4 |
106 | | 0x8D4_A712, |
107 | | // 4:0:0 |
108 | | 0, |
109 | | ]; |
110 | | |
111 | | #[rustfmt::skip] |
112 | | const ROUGH_TAN_LOOKUP: &[u16; 18] = &[ |
113 | | 0, 358, 722, 1098, 1491, 1910, |
114 | | 2365, 2868, 3437, 4096, 4881, 5850, |
115 | | 7094, 8784, 11254, 15286, 23230, 46817 |
116 | | ]; |
117 | | |
118 | | // A digital approximation of a 2nd-order low-pass Bessel follower. |
119 | | // We use this for rate control because it has fast reaction time, but is |
120 | | // critically damped. |
121 | | pub struct IIRBessel2 { |
122 | | c: [i32; 2], |
123 | | g: i32, |
124 | | x: [i32; 2], |
125 | | y: [i32; 2], |
126 | | } |
127 | | |
128 | | // alpha is Q24 in the range [0,0.5). |
129 | | // The return value is 5.12. |
130 | 0 | fn warp_alpha(alpha: i32) -> i32 { |
131 | 0 | let i = ((alpha * 36) >> 24).min(16); |
132 | 0 | let t0 = ROUGH_TAN_LOOKUP[i as usize]; |
133 | 0 | let t1 = ROUGH_TAN_LOOKUP[i as usize + 1]; |
134 | 0 | let d = alpha * 36 - (i << 24); |
135 | 0 | ((((t0 as i64) << 32) + (((t1 - t0) << 8) as i64) * (d as i64)) >> 32) as i32 |
136 | 0 | } |
137 | | |
138 | | // Compute Bessel filter coefficients with the specified delay. |
139 | | // Return: Filter parameters (c[0], c[1], g). |
140 | 0 | fn iir_bessel2_get_parameters(delay: i32) -> (i32, i32, i32) { |
141 | 0 | // This borrows some code from an unreleased version of Postfish. |
142 | 0 | // See the recipe at http://unicorn.us.com/alex/2polefilters.html for details |
143 | 0 | // on deriving the filter coefficients. |
144 | 0 | // alpha is Q24 |
145 | 0 | let alpha = (1 << 24) / delay; |
146 | 0 | // warp is 7.12 (5.12? the max value is 70386 in Q12). |
147 | 0 | let warp = warp_alpha(alpha).max(1) as i64; |
148 | 0 | // k1 is 9.12 (6.12?) |
149 | 0 | let k1 = 3 * warp; |
150 | 0 | // k2 is 16.24 (11.24?) |
151 | 0 | let k2 = k1 * warp; |
152 | 0 | // d is 16.15 (10.15?) |
153 | 0 | let d = ((((1 << 12) + k1) << 12) + k2 + 256) >> 9; |
154 | 0 | // a is 0.32, since d is larger than both 1.0 and k2 |
155 | 0 | let a = (k2 << 23) / d; |
156 | 0 | // ik2 is 25.24 |
157 | 0 | let ik2 = (1i64 << 48) / k2; |
158 | 0 | // b1 is Q56; in practice, the integer ranges between -2 and 2. |
159 | 0 | let b1 = 2 * a * (ik2 - (1i64 << 24)); |
160 | 0 | // b2 is Q56; in practice, the integer ranges between -2 and 2. |
161 | 0 | let b2 = (1i64 << 56) - ((4 * a) << 24) - b1; |
162 | 0 | // All of the filter parameters are Q24. |
163 | 0 | ( |
164 | 0 | ((b1 + (1i64 << 31)) >> 32) as i32, |
165 | 0 | ((b2 + (1i64 << 31)) >> 32) as i32, |
166 | 0 | ((a + 128) >> 8) as i32, |
167 | 0 | ) |
168 | 0 | } |
169 | | |
170 | | impl IIRBessel2 { |
171 | 0 | pub fn new(delay: i32, value: i32) -> IIRBessel2 { |
172 | 0 | let (c0, c1, g) = iir_bessel2_get_parameters(delay); |
173 | 0 | IIRBessel2 { c: [c0, c1], g, x: [value, value], y: [value, value] } |
174 | 0 | } |
175 | | |
176 | | // Re-initialize Bessel filter coefficients with the specified delay. |
177 | | // This does not alter the x/y state, but changes the reaction time of the |
178 | | // filter. |
179 | | // Altering the time constant of a reactive filter without altering internal |
180 | | // state is something that has to be done carefully, but our design operates |
181 | | // at high enough delays and with small enough time constant changes to make |
182 | | // it safe. |
183 | 0 | pub fn reinit(&mut self, delay: i32) { |
184 | 0 | let (c0, c1, g) = iir_bessel2_get_parameters(delay); |
185 | 0 | self.c[0] = c0; |
186 | 0 | self.c[1] = c1; |
187 | 0 | self.g = g; |
188 | 0 | } |
189 | | |
190 | 0 | pub fn update(&mut self, x: i32) -> i32 { |
191 | 0 | let c0 = self.c[0] as i64; |
192 | 0 | let c1 = self.c[1] as i64; |
193 | 0 | let g = self.g as i64; |
194 | 0 | let x0 = self.x[0] as i64; |
195 | 0 | let x1 = self.x[1] as i64; |
196 | 0 | let y0 = self.y[0] as i64; |
197 | 0 | let y1 = self.y[1] as i64; |
198 | 0 | let ya = |
199 | 0 | ((((x as i64) + x0 * 2 + x1) * g + y0 * c0 + y1 * c1 + (1i64 << 23)) |
200 | 0 | >> 24) as i32; |
201 | 0 | self.x[1] = self.x[0]; |
202 | 0 | self.x[0] = x; |
203 | 0 | self.y[1] = self.y[0]; |
204 | 0 | self.y[0] = ya; |
205 | 0 | ya |
206 | 0 | } |
207 | | } |
208 | | |
209 | | #[derive(Copy, Clone)] |
210 | | struct RCFrameMetrics { |
211 | | // The log base 2 of the scale factor for this frame in Q24 format. |
212 | | log_scale_q24: i32, |
213 | | // The frame type from pass 1 |
214 | | fti: usize, |
215 | | // Whether or not the frame was hidden in pass 1 |
216 | | show_frame: bool, |
217 | | // TODO: The input frame number corresponding to this frame in the input. |
218 | | // input_frameno: u32 |
219 | | // TODO vfr: PTS |
220 | | } |
221 | | |
222 | | impl RCFrameMetrics { |
223 | 0 | const fn new() -> RCFrameMetrics { |
224 | 0 | RCFrameMetrics { log_scale_q24: 0, fti: 0, show_frame: false } |
225 | 0 | } |
226 | | } |
227 | | |
228 | | /// Rate control pass summary |
229 | | /// |
230 | | /// It contains encoding information related to the whole previous |
231 | | /// encoding pass. |
232 | | #[derive(Debug, Default, Clone)] |
233 | | pub struct RCSummary { |
234 | | pub(crate) ntus: i32, |
235 | | nframes: [i32; FRAME_NSUBTYPES + 1], |
236 | | exp: [u8; FRAME_NSUBTYPES], |
237 | | scale_sum: [i64; FRAME_NSUBTYPES], |
238 | | pub(crate) total: i32, |
239 | | } |
240 | | |
241 | | // Backing storage to deserialize Summary and Per-Frame pass data |
242 | | // |
243 | | // Can store up to a full header size since it is the largest of the two |
244 | | // packet kinds. |
245 | | pub(crate) struct RCDeserialize { |
246 | | // The current byte position in the frame metrics buffer. |
247 | | pass2_buffer_pos: usize, |
248 | | // In pass 2, this represents the number of bytes that are available in the |
249 | | // input buffer. |
250 | | pass2_buffer_fill: usize, |
251 | | // Buffer for current frame metrics in pass 2. |
252 | | pass2_buffer: [u8; TWOPASS_HEADER_SZ], |
253 | | } |
254 | | |
255 | | impl Default for RCDeserialize { |
256 | 0 | fn default() -> Self { |
257 | 0 | RCDeserialize { |
258 | 0 | pass2_buffer: [0; TWOPASS_HEADER_SZ], |
259 | 0 | pass2_buffer_pos: 0, |
260 | 0 | pass2_buffer_fill: 0, |
261 | 0 | } |
262 | 0 | } |
263 | | } |
264 | | |
265 | | impl RCDeserialize { |
266 | | // Fill the backing storage by reading enough bytes from the |
267 | | // buf slice until goal bytes are available for parsing. |
268 | | // |
269 | | // goal must be at most TWOPASS_HEADER_SZ. |
270 | 0 | pub(crate) fn buffer_fill( |
271 | 0 | &mut self, buf: &[u8], consumed: usize, goal: usize, |
272 | 0 | ) -> usize { |
273 | 0 | let mut consumed = consumed; |
274 | 0 | while self.pass2_buffer_fill < goal && consumed < buf.len() { |
275 | 0 | self.pass2_buffer[self.pass2_buffer_fill] = buf[consumed]; |
276 | 0 | self.pass2_buffer_fill += 1; |
277 | 0 | consumed += 1; |
278 | 0 | } |
279 | 0 | consumed |
280 | 0 | } |
281 | | |
282 | | // Read the next n bytes as i64. |
283 | | // n must be within 1 and 8 |
284 | 0 | fn unbuffer_val(&mut self, n: usize) -> i64 { |
285 | 0 | let mut bytes = n; |
286 | 0 | let mut ret = 0; |
287 | 0 | let mut shift = 0; |
288 | 0 | while bytes > 0 { |
289 | 0 | bytes -= 1; |
290 | 0 | ret |= (self.pass2_buffer[self.pass2_buffer_pos] as i64) << shift; |
291 | 0 | self.pass2_buffer_pos += 1; |
292 | 0 | shift += 8; |
293 | 0 | } |
294 | 0 | ret |
295 | 0 | } |
296 | | |
297 | | // Read metrics for the next frame. |
298 | 0 | fn parse_metrics(&mut self) -> Result<RCFrameMetrics, String> { |
299 | 0 | debug_assert!(self.pass2_buffer_fill >= TWOPASS_PACKET_SZ); |
300 | 0 | let ft_val = self.unbuffer_val(4); |
301 | 0 | let show_frame = (ft_val >> 31) != 0; |
302 | 0 | let fti = (ft_val & 0x7FFFFFFF) as usize; |
303 | 0 | // Make sure the frame type is valid. |
304 | 0 | if fti > FRAME_NSUBTYPES { |
305 | 0 | return Err("Invalid frame type".to_string()); |
306 | 0 | } |
307 | 0 | let log_scale_q24 = self.unbuffer_val(4) as i32; |
308 | 0 | Ok(RCFrameMetrics { log_scale_q24, fti, show_frame }) |
309 | 0 | } |
310 | | |
311 | | // Read the summary header data. |
312 | 0 | pub(crate) fn parse_summary(&mut self) -> Result<RCSummary, String> { |
313 | 0 | // check the magic value and version number. |
314 | 0 | if self.unbuffer_val(4) != TWOPASS_MAGIC as i64 { |
315 | 0 | return Err("Magic value mismatch".to_string()); |
316 | 0 | } |
317 | 0 | if self.unbuffer_val(4) != TWOPASS_VERSION as i64 { |
318 | 0 | return Err("Version number mismatch".to_string()); |
319 | 0 | } |
320 | 0 | let mut s = |
321 | 0 | RCSummary { ntus: self.unbuffer_val(4) as i32, ..Default::default() }; |
322 | 0 |
|
323 | 0 | // Make sure the file claims to have at least one TU. |
324 | 0 | // Otherwise we probably got the placeholder data from an aborted |
325 | 0 | // pass 1. |
326 | 0 | if s.ntus < 1 { |
327 | 0 | return Err("No TUs found in first pass summary".to_string()); |
328 | 0 | } |
329 | 0 | let mut total: i32 = 0; |
330 | 0 | for nframes in s.nframes.iter_mut() { |
331 | 0 | let n = self.unbuffer_val(4) as i32; |
332 | 0 | if n < 0 { |
333 | 0 | return Err("Got negative frame count".to_string()); |
334 | 0 | } |
335 | 0 | total = total |
336 | 0 | .checked_add(n) |
337 | 0 | .ok_or_else(|| "Frame count too large".to_string())?; |
338 | | |
339 | 0 | *nframes = n; |
340 | | } |
341 | | |
342 | | // We can't have more TUs than frames. |
343 | 0 | if s.ntus > total { |
344 | 0 | return Err("More TUs than frames".to_string()); |
345 | 0 | } |
346 | 0 |
|
347 | 0 | s.total = total; |
348 | | |
349 | 0 | for exp in s.exp.iter_mut() { |
350 | 0 | *exp = self.unbuffer_val(1) as u8; |
351 | 0 | } |
352 | | |
353 | 0 | for scale_sum in s.scale_sum.iter_mut() { |
354 | 0 | *scale_sum = self.unbuffer_val(8); |
355 | 0 | if *scale_sum < 0 { |
356 | 0 | return Err("Got negative scale sum".to_string()); |
357 | 0 | } |
358 | | } |
359 | 0 | Ok(s) |
360 | 0 | } |
361 | | } |
362 | | |
363 | | pub struct RCState { |
364 | | // The target bit-rate in bits per second. |
365 | | target_bitrate: i32, |
366 | | // The number of TUs over which to distribute the reservoir usage. |
367 | | // We use TUs because in our leaky bucket model, we only add bits to the |
368 | | // reservoir on TU boundaries. |
369 | | reservoir_frame_delay: i32, |
370 | | // Whether or not the reservoir_frame_delay was explicitly specified by the |
371 | | // user, or is the default value. |
372 | | reservoir_frame_delay_is_set: bool, |
373 | | // The maximum quantizer index to allow (for the luma AC coefficients, other |
374 | | // quantizers will still be adjusted to match). |
375 | | maybe_ac_qi_max: Option<u8>, |
376 | | // The minimum quantizer index to allow (for the luma AC coefficients). |
377 | | ac_qi_min: u8, |
378 | | // Will we drop frames to meet bitrate requirements? |
379 | | drop_frames: bool, |
380 | | // Do we respect the maximum reservoir fullness? |
381 | | cap_overflow: bool, |
382 | | // Can the reservoir go negative? |
383 | | cap_underflow: bool, |
384 | | // The log of the first-pass base quantizer. |
385 | | pass1_log_base_q: i64, |
386 | | // Two-pass mode state. |
387 | | // PASS_SINGLE => 1-pass encoding. |
388 | | // PASS_1 => 1st pass of 2-pass encoding. |
389 | | // PASS_2 => 2nd pass of 2-pass encoding. |
390 | | // PASS_2_PLUS_1 => 2nd pass of 2-pass encoding, but also emitting pass 1 |
391 | | // data again. |
392 | | twopass_state: i32, |
393 | | // The log of the number of pixels in a frame in Q57 format. |
394 | | log_npixels: i64, |
395 | | // The target average bits per Temporal Unit (input frame). |
396 | | bits_per_tu: i64, |
397 | | // The current bit reservoir fullness (bits available to be used). |
398 | | reservoir_fullness: i64, |
399 | | // The target buffer fullness. |
400 | | // This is where we'd like to be by the last keyframe that appears in the |
401 | | // next reservoir_frame_delay frames. |
402 | | reservoir_target: i64, |
403 | | // The maximum buffer fullness (total size of the buffer). |
404 | | reservoir_max: i64, |
405 | | // The log of estimated scale factor for the rate model in Q57 format. |
406 | | // |
407 | | // TODO: Convert to Q23 or figure out a better way to avoid overflow |
408 | | // once 2-pass mode is introduced, if required. |
409 | | log_scale: [i64; FRAME_NSUBTYPES], |
410 | | // The exponent used in the rate model in Q6 format. |
411 | | exp: [u8; FRAME_NSUBTYPES], |
412 | | // The log of an estimated scale factor used to obtain the real framerate, |
413 | | // for VFR sources or, e.g., 12 fps content doubled to 24 fps, etc. |
414 | | // TODO vfr: log_vfr_scale: i64, |
415 | | // Second-order lowpass filters to track scale and VFR. |
416 | | scalefilter: [IIRBessel2; FRAME_NSUBTYPES], |
417 | | // TODO vfr: vfrfilter: IIRBessel2, |
418 | | // The number of frames of each type we have seen, for filter adaptation |
419 | | // purposes. |
420 | | // These are only 32 bits to guarantee that we can sum the scales over the |
421 | | // whole file without overflow in a 64-bit int. |
422 | | // That limits us to 2.268 years at 60 fps (minus 33% with re-ordering). |
423 | | nframes: [i32; FRAME_NSUBTYPES + 1], |
424 | | inter_delay: [i32; FRAME_NSUBTYPES - 1], |
425 | | inter_delay_target: i32, |
426 | | // The total accumulated estimation bias. |
427 | | rate_bias: i64, |
428 | | // The number of (non-Show Existing Frame) frames that have been encoded. |
429 | | nencoded_frames: i64, |
430 | | // The number of Show Existing Frames that have been emitted. |
431 | | nsef_frames: i64, |
432 | | // Buffer for current frame metrics in pass 1. |
433 | | pass1_buffer: [u8; TWOPASS_HEADER_SZ], |
434 | | // Whether or not the user has retrieved the pass 1 data for the last frame. |
435 | | // For PASS_1 or PASS_2_PLUS_1 encoding, this is set to false after each |
436 | | // frame is encoded, and must be set to true by calling twopass_out() before |
437 | | // the next frame can be encoded. |
438 | | pub pass1_data_retrieved: bool, |
439 | | // Marks whether or not the user has retrieved the summary data at the end of |
440 | | // the encode. |
441 | | pass1_summary_retrieved: bool, |
442 | | // Whether or not the user has provided enough data to encode in the second |
443 | | // pass. |
444 | | // For PASS_2 or PASS_2_PLUS_1 encoding, this is set to false after each |
445 | | // frame, and must be set to true by calling twopass_in() before the next |
446 | | // frame can be encoded. |
447 | | pass2_data_ready: bool, |
448 | | // TODO: Add a way to force the next frame to be a keyframe in 2-pass mode. |
449 | | // Right now we are relying on keyframe detection to detect the same |
450 | | // keyframes. |
451 | | // The metrics for the previous frame. |
452 | | prev_metrics: RCFrameMetrics, |
453 | | // The metrics for the current frame. |
454 | | cur_metrics: RCFrameMetrics, |
455 | | // The buffered metrics for future frames. |
456 | | frame_metrics: Vec<RCFrameMetrics>, |
457 | | // The total number of frames still in use in the circular metric buffer. |
458 | | nframe_metrics: usize, |
459 | | // The index of the current frame in the circular metric buffer. |
460 | | frame_metrics_head: usize, |
461 | | // Data deserialization |
462 | | des: RCDeserialize, |
463 | | // The TU count encoded so far. |
464 | | ntus: i32, |
465 | | // The TU count for the whole file. |
466 | | ntus_total: i32, |
467 | | // The remaining TU count. |
468 | | ntus_left: i32, |
469 | | // The frame count of each frame subtype in the whole file. |
470 | | nframes_total: [i32; FRAME_NSUBTYPES + 1], |
471 | | // The sum of those counts. |
472 | | nframes_total_total: i32, |
473 | | // The number of frames of each subtype yet to be processed. |
474 | | nframes_left: [i32; FRAME_NSUBTYPES + 1], |
475 | | // The sum of the scale values for each frame subtype. |
476 | | scale_sum: [i64; FRAME_NSUBTYPES], |
477 | | // The number of TUs represented by the current scale sums. |
478 | | scale_window_ntus: i32, |
479 | | // The frame count of each frame subtype in the current scale window. |
480 | | scale_window_nframes: [i32; FRAME_NSUBTYPES + 1], |
481 | | // The sum of the scale values for each frame subtype in the current window. |
482 | | scale_window_sum: [i64; FRAME_NSUBTYPES], |
483 | | } |
484 | | |
485 | | // TODO: Separate qi values for each color plane. |
486 | | pub struct QuantizerParameters { |
487 | | // The full-precision, unmodulated log quantizer upon which our modulated |
488 | | // quantizer indices are based. |
489 | | // This is only used to limit sudden quality changes from frame to frame, and |
490 | | // as such is not adjusted when we encounter buffer overrun or underrun. |
491 | | pub log_base_q: i64, |
492 | | // The full-precision log quantizer modulated by the current frame type upon |
493 | | // which our quantizer indices are based (including any adjustments to |
494 | | // prevent buffer overrun or underrun). |
495 | | // This is used when estimating the scale parameter once we know the actual |
496 | | // bit usage of a frame. |
497 | | pub log_target_q: i64, |
498 | | pub dc_qi: [u8; 3], |
499 | | pub ac_qi: [u8; 3], |
500 | | pub lambda: f64, |
501 | | pub dist_scale: [f64; 3], |
502 | | } |
503 | | |
504 | | const Q57_SQUARE_EXP_SCALE: f64 = |
505 | | (2.0 * ::std::f64::consts::LN_2) / ((1i64 << 57) as f64); |
506 | | |
507 | | // Daala style log-offset for chroma quantizers |
508 | | // TODO: Optimal offsets for more configurations than just BT.709 |
509 | 0 | fn chroma_offset( |
510 | 0 | log_target_q: i64, chroma_sampling: ChromaSampling, |
511 | 0 | ) -> (i64, i64) { |
512 | 0 | let x = log_target_q.max(0); |
513 | | // Gradient optimized for CIEDE2000+PSNR on subset3 |
514 | 0 | let y = match chroma_sampling { |
515 | 0 | ChromaSampling::Cs400 => 0, |
516 | 0 | ChromaSampling::Cs420 => (x >> 2) + (x >> 6), // 0.266 |
517 | 0 | ChromaSampling::Cs422 => (x >> 3) + (x >> 4) - (x >> 7), // 0.180 |
518 | 0 | ChromaSampling::Cs444 => (x >> 4) + (x >> 5) + (x >> 8), // 0.098 |
519 | | }; |
520 | | // blog64(7) - blog64(4); blog64(5) - blog64(4) |
521 | 0 | (0x19D_5D9F_D501_0B37 - y, 0xA4_D3C2_5E68_DC58 - y) |
522 | 0 | } |
523 | | |
524 | | impl QuantizerParameters { |
525 | 0 | fn new_from_log_q( |
526 | 0 | log_base_q: i64, log_target_q: i64, bit_depth: usize, |
527 | 0 | chroma_sampling: ChromaSampling, is_intra: bool, |
528 | 0 | log_isqrt_mean_scale: i64, |
529 | 0 | ) -> QuantizerParameters { |
530 | 0 | let scale = log_isqrt_mean_scale + q57(QSCALE + bit_depth as i32 - 8); |
531 | 0 |
|
532 | 0 | let mut log_q_y = log_target_q; |
533 | 0 | if !is_intra && bit_depth == 8 { |
534 | 0 | log_q_y = log_target_q |
535 | 0 | + (log_target_q >> 32) * Q_MODEL_MUL[chroma_sampling as usize] |
536 | 0 | + Q_MODEL_ADD[chroma_sampling as usize]; |
537 | 0 | } |
538 | | |
539 | 0 | let quantizer = bexp64(log_q_y + scale); |
540 | 0 | let (offset_u, offset_v) = |
541 | 0 | chroma_offset(log_q_y + log_isqrt_mean_scale, chroma_sampling); |
542 | 0 | let mono = chroma_sampling == ChromaSampling::Cs400; |
543 | 0 | let log_q_u = log_q_y + offset_u; |
544 | 0 | let log_q_v = log_q_y + offset_v; |
545 | 0 | let quantizer_u = bexp64(log_q_u + scale); |
546 | 0 | let quantizer_v = bexp64(log_q_v + scale); |
547 | 0 | let lambda = (::std::f64::consts::LN_2 / 6.0) |
548 | 0 | * (((log_target_q + log_isqrt_mean_scale) as f64) |
549 | 0 | * Q57_SQUARE_EXP_SCALE) |
550 | 0 | .exp(); |
551 | 0 |
|
552 | 0 | let scale = |q| bexp64((log_target_q - q) * 2 + q57(16)) as f64 / 65536.; |
553 | 0 | let dist_scale = [scale(log_q_y), scale(log_q_u), scale(log_q_v)]; |
554 | 0 |
|
555 | 0 | let base_q_idx = select_ac_qi(quantizer, bit_depth).max(1); |
556 | 0 |
|
557 | 0 | // delta_q only gets 6 bits + a sign bit, so it can differ by 63 at most. |
558 | 0 | let min_qi = base_q_idx.saturating_sub(63).max(1); |
559 | 0 | let max_qi = base_q_idx.saturating_add(63).min(255); |
560 | 0 | let clamp_qi = |qi: u8| qi.clamp(min_qi, max_qi); |
561 | | |
562 | | QuantizerParameters { |
563 | 0 | log_base_q, |
564 | 0 | log_target_q, |
565 | 0 | // TODO: Allow lossless mode; i.e. qi == 0. |
566 | 0 | dc_qi: [ |
567 | 0 | clamp_qi(select_dc_qi(quantizer, bit_depth)), |
568 | 0 | if mono { 0 } else { clamp_qi(select_dc_qi(quantizer_u, bit_depth)) }, |
569 | 0 | if mono { 0 } else { clamp_qi(select_dc_qi(quantizer_v, bit_depth)) }, |
570 | | ], |
571 | | ac_qi: [ |
572 | 0 | base_q_idx, |
573 | 0 | if mono { 0 } else { clamp_qi(select_ac_qi(quantizer_u, bit_depth)) }, |
574 | 0 | if mono { 0 } else { clamp_qi(select_ac_qi(quantizer_v, bit_depth)) }, |
575 | | ], |
576 | 0 | lambda, |
577 | 0 | dist_scale, |
578 | 0 | } |
579 | 0 | } |
580 | | } |
581 | | |
582 | | impl RCState { |
583 | 0 | pub fn new( |
584 | 0 | frame_width: i32, frame_height: i32, framerate_num: i64, |
585 | 0 | framerate_den: i64, target_bitrate: i32, maybe_ac_qi_max: Option<u8>, |
586 | 0 | ac_qi_min: u8, max_key_frame_interval: i32, |
587 | 0 | maybe_reservoir_frame_delay: Option<i32>, |
588 | 0 | ) -> RCState { |
589 | 0 | // The default buffer size is set equal to 1.5x the keyframe interval, or 240 |
590 | 0 | // frames; whichever is smaller, with a minimum of 12. |
591 | 0 | // For user set values, we enforce a minimum of 12. |
592 | 0 | // The interval is short enough to allow reaction, but long enough to allow |
593 | 0 | // looking into the next GOP (avoiding the case where the last frames |
594 | 0 | // before an I-frame get starved), in most cases. |
595 | 0 | // The 12 frame minimum gives us some chance to distribute bit estimation |
596 | 0 | // errors in the worst case. |
597 | 0 | let reservoir_frame_delay = maybe_reservoir_frame_delay |
598 | 0 | .unwrap_or_else(|| ((max_key_frame_interval * 3) >> 1).min(240)) |
599 | 0 | .max(12); |
600 | 0 | // TODO: What are the limits on these? |
601 | 0 | let npixels = (frame_width as i64) * (frame_height as i64); |
602 | 0 | // Insane framerates or frame sizes mean insane bitrates. |
603 | 0 | // Let's not get carried away. |
604 | 0 | // We also subtract 16 bits from each temporal unit to account for the |
605 | 0 | // temporal delimiter, whose bits are not included in the frame sizes |
606 | 0 | // reported to update_state(). |
607 | 0 | // TODO: Support constraints imposed by levels. |
608 | 0 | let bits_per_tu = clamp( |
609 | 0 | (target_bitrate as i64) * framerate_den / framerate_num, |
610 | 0 | 40, |
611 | 0 | 0x4000_0000_0000, |
612 | 0 | ) - (TEMPORAL_DELIMITER.len() * 8) as i64; |
613 | 0 | let reservoir_max = bits_per_tu * (reservoir_frame_delay as i64); |
614 | 0 | // Start with a buffer fullness and fullness target of 50%. |
615 | 0 | let reservoir_target = (reservoir_max + 1) >> 1; |
616 | 0 | // Pick exponents and initial scales for quantizer selection. |
617 | 0 | let ibpp = npixels / bits_per_tu; |
618 | | // These have been derived by encoding many clips at every quantizer |
619 | | // and running a piecewise-linear regression in binary log space. |
620 | 0 | let (i_exp, i_log_scale) = if ibpp < 1 { |
621 | 0 | (48u8, blog64(36) - q57(QSCALE)) |
622 | 0 | } else if ibpp < 4 { |
623 | 0 | (61u8, blog64(55) - q57(QSCALE)) |
624 | | } else { |
625 | 0 | (77u8, blog64(129) - q57(QSCALE)) |
626 | | }; |
627 | 0 | let (p_exp, p_log_scale) = if ibpp < 2 { |
628 | 0 | (69u8, blog64(32) - q57(QSCALE)) |
629 | 0 | } else if ibpp < 139 { |
630 | 0 | (104u8, blog64(84) - q57(QSCALE)) |
631 | | } else { |
632 | 0 | (83u8, blog64(19) - q57(QSCALE)) |
633 | | }; |
634 | 0 | let (b0_exp, b0_log_scale) = if ibpp < 2 { |
635 | 0 | (84u8, blog64(30) - q57(QSCALE)) |
636 | 0 | } else if ibpp < 92 { |
637 | 0 | (120u8, blog64(68) - q57(QSCALE)) |
638 | | } else { |
639 | 0 | (68u8, blog64(4) - q57(QSCALE)) |
640 | | }; |
641 | 0 | let (b1_exp, b1_log_scale) = if ibpp < 2 { |
642 | 0 | (87u8, blog64(27) - q57(QSCALE)) |
643 | 0 | } else if ibpp < 126 { |
644 | 0 | (139u8, blog64(84) - q57(QSCALE)) |
645 | | } else { |
646 | 0 | (61u8, blog64(1) - q57(QSCALE)) |
647 | | }; |
648 | | |
649 | | // TODO: Add support for "golden" P frames. |
650 | 0 | RCState { |
651 | 0 | target_bitrate, |
652 | 0 | reservoir_frame_delay, |
653 | 0 | reservoir_frame_delay_is_set: maybe_reservoir_frame_delay.is_some(), |
654 | 0 | maybe_ac_qi_max, |
655 | 0 | ac_qi_min, |
656 | 0 | drop_frames: false, |
657 | 0 | cap_overflow: true, |
658 | 0 | cap_underflow: false, |
659 | 0 | pass1_log_base_q: 0, |
660 | 0 | twopass_state: PASS_SINGLE, |
661 | 0 | log_npixels: blog64(npixels), |
662 | 0 | bits_per_tu, |
663 | 0 | reservoir_fullness: reservoir_target, |
664 | 0 | reservoir_target, |
665 | 0 | reservoir_max, |
666 | 0 | log_scale: [i_log_scale, p_log_scale, b0_log_scale, b1_log_scale], |
667 | 0 | exp: [i_exp, p_exp, b0_exp, b1_exp], |
668 | 0 | scalefilter: [ |
669 | 0 | IIRBessel2::new(4, q57_to_q24(i_log_scale)), |
670 | 0 | IIRBessel2::new(INTER_DELAY_TARGET_MIN, q57_to_q24(p_log_scale)), |
671 | 0 | IIRBessel2::new(INTER_DELAY_TARGET_MIN, q57_to_q24(b0_log_scale)), |
672 | 0 | IIRBessel2::new(INTER_DELAY_TARGET_MIN, q57_to_q24(b1_log_scale)), |
673 | 0 | ], |
674 | 0 | // TODO VFR |
675 | 0 | nframes: [0; FRAME_NSUBTYPES + 1], |
676 | 0 | inter_delay: [INTER_DELAY_TARGET_MIN; FRAME_NSUBTYPES - 1], |
677 | 0 | inter_delay_target: reservoir_frame_delay >> 1, |
678 | 0 | rate_bias: 0, |
679 | 0 | nencoded_frames: 0, |
680 | 0 | nsef_frames: 0, |
681 | 0 | pass1_buffer: [0; TWOPASS_HEADER_SZ], |
682 | 0 | pass1_data_retrieved: true, |
683 | 0 | pass1_summary_retrieved: false, |
684 | 0 | pass2_data_ready: false, |
685 | 0 | prev_metrics: RCFrameMetrics::new(), |
686 | 0 | cur_metrics: RCFrameMetrics::new(), |
687 | 0 | frame_metrics: Vec::new(), |
688 | 0 | nframe_metrics: 0, |
689 | 0 | frame_metrics_head: 0, |
690 | 0 | ntus: 0, |
691 | 0 | ntus_total: 0, |
692 | 0 | ntus_left: 0, |
693 | 0 | nframes_total: [0; FRAME_NSUBTYPES + 1], |
694 | 0 | nframes_total_total: 0, |
695 | 0 | nframes_left: [0; FRAME_NSUBTYPES + 1], |
696 | 0 | scale_sum: [0; FRAME_NSUBTYPES], |
697 | 0 | scale_window_ntus: 0, |
698 | 0 | scale_window_nframes: [0; FRAME_NSUBTYPES + 1], |
699 | 0 | scale_window_sum: [0; FRAME_NSUBTYPES], |
700 | 0 | des: RCDeserialize::default(), |
701 | 0 | } |
702 | 0 | } |
703 | | |
704 | 0 | pub(crate) fn select_first_pass_qi( |
705 | 0 | &self, bit_depth: usize, fti: usize, chroma_sampling: ChromaSampling, |
706 | 0 | ) -> QuantizerParameters { |
707 | 0 | // Adjust the quantizer for the frame type, result is Q57: |
708 | 0 | let log_q = ((self.pass1_log_base_q + (1i64 << 11)) >> 12) |
709 | 0 | * (MQP_Q12[fti] as i64) |
710 | 0 | + DQP_Q57[fti]; |
711 | 0 | QuantizerParameters::new_from_log_q( |
712 | 0 | self.pass1_log_base_q, |
713 | 0 | log_q, |
714 | 0 | bit_depth, |
715 | 0 | chroma_sampling, |
716 | 0 | fti == 0, |
717 | 0 | 0, |
718 | 0 | ) |
719 | 0 | } |
720 | | |
721 | | // TODO: Separate quantizers for Cb and Cr. |
722 | 0 | #[profiling::function] Unexecuted instantiation: <rav1e::rate::RCState>::select_qi::<u8> Unexecuted instantiation: <rav1e::rate::RCState>::select_qi::<u16> |
723 | | pub(crate) fn select_qi<T: Pixel>( |
724 | | &self, ctx: &ContextInner<T>, output_frameno: u64, fti: usize, |
725 | | maybe_prev_log_base_q: Option<i64>, log_isqrt_mean_scale: i64, |
726 | | ) -> QuantizerParameters { |
727 | | // Is rate control active? |
728 | | if self.target_bitrate <= 0 { |
729 | | // Rate control is not active. |
730 | | // Derive quantizer directly from frame type. |
731 | | let bit_depth = ctx.config.bit_depth; |
732 | | let chroma_sampling = ctx.config.chroma_sampling; |
733 | | let (log_base_q, log_q) = |
734 | | Self::calc_flat_quantizer(ctx.config.quantizer as u8, bit_depth, fti); |
735 | | QuantizerParameters::new_from_log_q( |
736 | | log_base_q, |
737 | | log_q, |
738 | | bit_depth, |
739 | | chroma_sampling, |
740 | | fti == 0, |
741 | | log_isqrt_mean_scale, |
742 | | ) |
743 | | } else { |
744 | | let mut nframes: [i32; FRAME_NSUBTYPES + 1] = [0; FRAME_NSUBTYPES + 1]; |
745 | | let mut log_scale: [i64; FRAME_NSUBTYPES] = self.log_scale; |
746 | | let mut reservoir_tus = self.reservoir_frame_delay.min(self.ntus_left); |
747 | | let mut reservoir_frames = 0; |
748 | | let mut log_cur_scale = (self.scalefilter[fti].y[0] as i64) << 33; |
749 | | match self.twopass_state { |
750 | | // First pass of 2-pass mode: use a fixed base quantizer. |
751 | | PASS_1 => { |
752 | | return self.select_first_pass_qi( |
753 | | ctx.config.bit_depth, |
754 | | fti, |
755 | | ctx.config.chroma_sampling, |
756 | | ); |
757 | | } |
758 | | // Second pass of 2-pass mode: we know exactly how much of each frame |
759 | | // type there is in the current buffer window, and have estimates for |
760 | | // the scales. |
761 | | PASS_2 | PASS_2_PLUS_1 => { |
762 | | let mut scale_window_sum: [i64; FRAME_NSUBTYPES] = |
763 | | self.scale_window_sum; |
764 | | let mut scale_window_nframes: [i32; FRAME_NSUBTYPES + 1] = |
765 | | self.scale_window_nframes; |
766 | | // Intentionally exclude Show Existing Frame frames from this. |
767 | | for ftj in 0..FRAME_NSUBTYPES { |
768 | | reservoir_frames += scale_window_nframes[ftj]; |
769 | | } |
770 | | // If we're approaching the end of the file, add some slack to keep |
771 | | // us from slamming into a rail. |
772 | | // Our rate accuracy goes down, but it keeps the result sensible. |
773 | | // We position the target where the first forced keyframe beyond the |
774 | | // end of the file would be (for consistency with 1-pass mode). |
775 | | // TODO: let mut buf_pad = self.reservoir_frame_delay.min(...); |
776 | | // if buf_delay < buf_pad { |
777 | | // buf_pad -= buf_delay; |
778 | | // } |
779 | | // else ... |
780 | | // Otherwise, search for the last keyframe in the buffer window and |
781 | | // target that. |
782 | | // Currently we only do this when using a finite buffer. |
783 | | // We could save the position of the last keyframe in the stream in |
784 | | // the summary data and do it with a whole-file buffer as well, but |
785 | | // it isn't likely to make a difference. |
786 | | if !self.frame_metrics.is_empty() { |
787 | | let mut fm_tail = self.frame_metrics_head + self.nframe_metrics; |
788 | | if fm_tail >= self.frame_metrics.len() { |
789 | | fm_tail -= self.frame_metrics.len(); |
790 | | } |
791 | | let mut fmi = fm_tail; |
792 | | loop { |
793 | | if fmi == 0 { |
794 | | fmi += self.frame_metrics.len(); |
795 | | } |
796 | | fmi -= 1; |
797 | | // Stop before we remove the first frame. |
798 | | if fmi == self.frame_metrics_head { |
799 | | break; |
800 | | } |
801 | | // If we find a keyframe, remove it and everything past it. |
802 | | if self.frame_metrics[fmi].fti == FRAME_SUBTYPE_I { |
803 | | while fmi != fm_tail { |
804 | | let m = &self.frame_metrics[fmi]; |
805 | | let ftj = m.fti; |
806 | | scale_window_nframes[ftj] -= 1; |
807 | | if ftj < FRAME_NSUBTYPES { |
808 | | scale_window_sum[ftj] -= bexp_q24(m.log_scale_q24); |
809 | | reservoir_frames -= 1; |
810 | | } |
811 | | if m.show_frame { |
812 | | reservoir_tus -= 1; |
813 | | } |
814 | | fmi += 1; |
815 | | if fmi >= self.frame_metrics.len() { |
816 | | fmi = 0; |
817 | | } |
818 | | } |
819 | | // And stop scanning backwards. |
820 | | break; |
821 | | } |
822 | | } |
823 | | } |
824 | | nframes = scale_window_nframes; |
825 | | // If we're not using the same frame type as in pass 1 (because |
826 | | // someone changed some encoding parameters), remove that scale |
827 | | // estimate. |
828 | | // We'll add a replacement for the correct frame type below. |
829 | | if self.cur_metrics.fti != fti { |
830 | | scale_window_nframes[self.cur_metrics.fti] -= 1; |
831 | | if self.cur_metrics.fti != FRAME_SUBTYPE_SEF { |
832 | | scale_window_sum[self.cur_metrics.fti] -= |
833 | | bexp_q24(self.cur_metrics.log_scale_q24); |
834 | | } |
835 | | } else { |
836 | | log_cur_scale = (self.cur_metrics.log_scale_q24 as i64) << 33; |
837 | | } |
838 | | // If we're approaching the end of the file, add some slack to keep |
839 | | // us from slamming into a rail. |
840 | | // Our rate accuracy goes down, but it keeps the result sensible. |
841 | | // We position the target where the first forced keyframe beyond the |
842 | | // end of the file would be (for consistency with 1-pass mode). |
843 | | if reservoir_tus >= self.ntus_left |
844 | | && self.ntus_total as u64 |
845 | | > ctx.gop_input_frameno_start[&output_frameno] |
846 | | { |
847 | | let nfinal_gop_tus = self.ntus_total |
848 | | - (ctx.gop_input_frameno_start[&output_frameno] as i32); |
849 | | if ctx.config.max_key_frame_interval as i32 > nfinal_gop_tus { |
850 | | let reservoir_pad = (ctx.config.max_key_frame_interval as i32 |
851 | | - nfinal_gop_tus) |
852 | | .min(self.reservoir_frame_delay - reservoir_tus); |
853 | | let (guessed_reservoir_frames, guessed_reservoir_tus) = ctx |
854 | | .guess_frame_subtypes( |
855 | | &mut nframes, |
856 | | reservoir_tus + reservoir_pad, |
857 | | ); |
858 | | reservoir_frames = guessed_reservoir_frames; |
859 | | reservoir_tus = guessed_reservoir_tus; |
860 | | } |
861 | | } |
862 | | // Blend in the low-pass filtered scale according to how many |
863 | | // frames of each type we need to add compared to the actual sums in |
864 | | // our window. |
865 | | for ftj in 0..FRAME_NSUBTYPES { |
866 | | let scale = scale_window_sum[ftj] |
867 | | + bexp_q24(self.scalefilter[ftj].y[0]) |
868 | | * (nframes[ftj] - scale_window_nframes[ftj]) as i64; |
869 | | log_scale[ftj] = if nframes[ftj] > 0 { |
870 | | blog64(scale) - blog64(nframes[ftj] as i64) - q57(24) |
871 | | } else { |
872 | | -self.log_npixels |
873 | | }; |
874 | | } |
875 | | } |
876 | | // Single pass. |
877 | | _ => { |
878 | | // Figure out how to re-distribute bits so that we hit our fullness |
879 | | // target before the last keyframe in our current buffer window |
880 | | // (after the current frame), or the end of the buffer window, |
881 | | // whichever comes first. |
882 | | // Count the various types and classes of frames. |
883 | | let (guessed_reservoir_frames, guessed_reservoir_tus) = |
884 | | ctx.guess_frame_subtypes(&mut nframes, self.reservoir_frame_delay); |
885 | | reservoir_frames = guessed_reservoir_frames; |
886 | | reservoir_tus = guessed_reservoir_tus; |
887 | | // TODO: Scale for VFR. |
888 | | } |
889 | | } |
890 | | // If we've been missing our target, add a penalty term. |
891 | | let rate_bias = (self.rate_bias / (self.nencoded_frames + 100)) |
892 | | * (reservoir_frames as i64); |
893 | | // rate_total is the total bits available over the next |
894 | | // reservoir_tus TUs. |
895 | | let rate_total = self.reservoir_fullness - self.reservoir_target |
896 | | + rate_bias |
897 | | + (reservoir_tus as i64) * self.bits_per_tu; |
898 | | // Find a target quantizer that meets our rate target for the |
899 | | // specific mix of frame types we'll have over the next |
900 | | // reservoir_frame frames. |
901 | | // We model the rate<->quantizer relationship as |
902 | | // rate = scale*(quantizer**-exp) |
903 | | // In this case, we have our desired rate, an exponent selected in |
904 | | // setup, and a scale that's been measured over our frame history, |
905 | | // so we're solving for the quantizer. |
906 | | // Exponentiation with arbitrary exponents is expensive, so we work |
907 | | // in the binary log domain (binary exp and log aren't too bad): |
908 | | // rate = exp2(log2(scale) - log2(quantizer)*exp) |
909 | | // There's no easy closed form solution, so we bisection searh for it. |
910 | | let bit_depth = ctx.config.bit_depth; |
911 | | let chroma_sampling = ctx.config.chroma_sampling; |
912 | | // TODO: Proper handling of lossless. |
913 | | let mut log_qlo = blog64(ac_q(self.ac_qi_min, 0, bit_depth).get() as i64) |
914 | | - q57(QSCALE + bit_depth as i32 - 8); |
915 | | // The AC quantizer tables map to values larger than the DC quantizer |
916 | | // tables, so we use that as the upper bound to make sure we can use |
917 | | // the full table if needed. |
918 | | let mut log_qhi = blog64( |
919 | | ac_q(self.maybe_ac_qi_max.unwrap_or(255), 0, bit_depth).get() as i64, |
920 | | ) - q57(QSCALE + bit_depth as i32 - 8); |
921 | | let mut log_base_q = (log_qlo + log_qhi) >> 1; |
922 | | while log_qlo < log_qhi { |
923 | | // Count bits contributed by each frame type using the model. |
924 | | let mut bits = 0i64; |
925 | | for ftj in 0..FRAME_NSUBTYPES { |
926 | | // Modulate base quantizer by frame type. |
927 | | let log_q = ((log_base_q + (1i64 << 11)) >> 12) |
928 | | * (MQP_Q12[ftj] as i64) |
929 | | + DQP_Q57[ftj]; |
930 | | // All the fields here are Q57 except for the exponent, which is |
931 | | // Q6. |
932 | | bits += (nframes[ftj] as i64) |
933 | | * bexp64( |
934 | | log_scale[ftj] + self.log_npixels |
935 | | - ((log_q + 32) >> 6) * (self.exp[ftj] as i64), |
936 | | ); |
937 | | } |
938 | | // The number of bits for Show Existing Frame frames is constant. |
939 | | bits += (nframes[FRAME_SUBTYPE_SEF] as i64) * SEF_BITS; |
940 | | let diff = bits - rate_total; |
941 | | if diff > 0 { |
942 | | log_qlo = log_base_q + 1; |
943 | | } else if diff < 0 { |
944 | | log_qhi = log_base_q - 1; |
945 | | } else { |
946 | | break; |
947 | | } |
948 | | log_base_q = (log_qlo + log_qhi) >> 1; |
949 | | } |
950 | | // If this was not one of the initial frames, limit the change in |
951 | | // base quantizer to within [0.8*Q, 1.2*Q] where Q is the previous |
952 | | // frame's base quantizer. |
953 | | if let Some(prev_log_base_q) = maybe_prev_log_base_q { |
954 | | log_base_q = clamp( |
955 | | log_base_q, |
956 | | prev_log_base_q - 0xA4_D3C2_5E68_DC58, |
957 | | prev_log_base_q + 0xA4_D3C2_5E68_DC58, |
958 | | ); |
959 | | } |
960 | | // Modulate base quantizer by frame type. |
961 | | let mut log_q = ((log_base_q + (1i64 << 11)) >> 12) |
962 | | * (MQP_Q12[fti] as i64) |
963 | | + DQP_Q57[fti]; |
964 | | // The above allocation looks only at the total rate we'll accumulate |
965 | | // in the next reservoir_frame_delay frames. |
966 | | // However, we could overflow the bit reservoir on the very next |
967 | | // frame. |
968 | | // Check for that here if we're not using a soft target. |
969 | | if self.cap_overflow { |
970 | | // Allow 3% of the buffer for prediction error. |
971 | | // This should be plenty, and we don't mind if we go a bit over. |
972 | | // We only want to keep these bits from being completely wasted. |
973 | | let margin = (self.reservoir_max + 31) >> 5; |
974 | | // We want to use at least this many bits next frame. |
975 | | let soft_limit = self.reservoir_fullness + self.bits_per_tu |
976 | | - (self.reservoir_max - margin); |
977 | | if soft_limit > 0 { |
978 | | let log_soft_limit = blog64(soft_limit); |
979 | | // If we're predicting we won't use that many bits... |
980 | | // TODO: When using frame re-ordering, we should include the rate |
981 | | // for all of the frames in the current TU. |
982 | | // When there is more than one frame, there will be no direct |
983 | | // solution for the required adjustment, however. |
984 | | let log_scale_pixels = log_cur_scale + self.log_npixels; |
985 | | let exp = self.exp[fti] as i64; |
986 | | let mut log_q_exp = ((log_q + 32) >> 6) * exp; |
987 | | if log_scale_pixels - log_q_exp < log_soft_limit { |
988 | | // Scale the adjustment based on how far into the margin we are. |
989 | | log_q_exp += ((log_scale_pixels - log_soft_limit - log_q_exp) |
990 | | >> 32) |
991 | | * ((margin.min(soft_limit) << 32) / margin); |
992 | | log_q = ((log_q_exp + (exp >> 1)) / exp) << 6; |
993 | | } |
994 | | } |
995 | | } |
996 | | // We just checked we don't overflow the reservoir next frame, now |
997 | | // check we don't underflow and bust the budget (when not using a |
998 | | // soft target). |
999 | | if self.maybe_ac_qi_max.is_none() { |
1000 | | // Compute the maximum number of bits we can use in the next frame. |
1001 | | // Allow 50% of the rate for a single frame for prediction error. |
1002 | | // This may not be enough for keyframes or sudden changes in |
1003 | | // complexity. |
1004 | | let log_hard_limit = |
1005 | | blog64(self.reservoir_fullness + (self.bits_per_tu >> 1)); |
1006 | | // If we're predicting we'll use more than this... |
1007 | | // TODO: When using frame re-ordering, we should include the rate |
1008 | | // for all of the frames in the current TU. |
1009 | | // When there is more than one frame, there will be no direct |
1010 | | // solution for the required adjustment, however. |
1011 | | let log_scale_pixels = log_cur_scale + self.log_npixels; |
1012 | | let exp = self.exp[fti] as i64; |
1013 | | let mut log_q_exp = ((log_q + 32) >> 6) * exp; |
1014 | | if log_scale_pixels - log_q_exp > log_hard_limit { |
1015 | | // Force the target to hit our limit exactly. |
1016 | | log_q_exp = log_scale_pixels - log_hard_limit; |
1017 | | log_q = ((log_q_exp + (exp >> 1)) / exp) << 6; |
1018 | | // If that target is unreasonable, oh well; we'll have to drop. |
1019 | | } |
1020 | | } |
1021 | | |
1022 | | if let Some(qi_max) = self.maybe_ac_qi_max { |
1023 | | let (max_log_base_q, max_log_q) = |
1024 | | Self::calc_flat_quantizer(qi_max, ctx.config.bit_depth, fti); |
1025 | | log_base_q = cmp::min(log_base_q, max_log_base_q); |
1026 | | log_q = cmp::min(log_q, max_log_q); |
1027 | | } |
1028 | | if self.ac_qi_min > 0 { |
1029 | | let (min_log_base_q, min_log_q) = |
1030 | | Self::calc_flat_quantizer(self.ac_qi_min, ctx.config.bit_depth, fti); |
1031 | | log_base_q = cmp::max(log_base_q, min_log_base_q); |
1032 | | log_q = cmp::max(log_q, min_log_q); |
1033 | | } |
1034 | | QuantizerParameters::new_from_log_q( |
1035 | | log_base_q, |
1036 | | log_q, |
1037 | | bit_depth, |
1038 | | chroma_sampling, |
1039 | | fti == 0, |
1040 | | log_isqrt_mean_scale, |
1041 | | ) |
1042 | | } |
1043 | | } |
1044 | | |
1045 | | // Computes a quantizer directly from the frame type and base quantizer index, |
1046 | | // without consideration for rate control. |
1047 | 0 | fn calc_flat_quantizer( |
1048 | 0 | base_qi: u8, bit_depth: usize, fti: usize, |
1049 | 0 | ) -> (i64, i64) { |
1050 | 0 | // TODO: Rename "quantizer" something that indicates it is a quantizer |
1051 | 0 | // index, and move it somewhere more sensible (or choose a better way to |
1052 | 0 | // parameterize a "quality" configuration parameter). |
1053 | 0 |
|
1054 | 0 | // We use the AC quantizer as the source quantizer since its quantizer |
1055 | 0 | // tables have unique entries, while the DC tables do not. |
1056 | 0 | let ac_quantizer = ac_q(base_qi, 0, bit_depth).get() as i64; |
1057 | 0 | // Pick the nearest DC entry since an exact match may be unavailable. |
1058 | 0 | let dc_qi = select_dc_qi(ac_quantizer, bit_depth); |
1059 | 0 | let dc_quantizer = dc_q(dc_qi, 0, bit_depth).get() as i64; |
1060 | 0 | // Get the log quantizers as Q57. |
1061 | 0 | let log_ac_q = blog64(ac_quantizer) - q57(QSCALE + bit_depth as i32 - 8); |
1062 | 0 | let log_dc_q = blog64(dc_quantizer) - q57(QSCALE + bit_depth as i32 - 8); |
1063 | 0 | // Target the midpoint of the chosen entries. |
1064 | 0 | let log_base_q = (log_ac_q + log_dc_q + 1) >> 1; |
1065 | 0 | // Adjust the quantizer for the frame type, result is Q57: |
1066 | 0 | let log_q = ((log_base_q + (1i64 << 11)) >> 12) * (MQP_Q12[fti] as i64) |
1067 | 0 | + DQP_Q57[fti]; |
1068 | 0 | (log_base_q, log_q) |
1069 | 0 | } |
1070 | | |
1071 | 0 | #[profiling::function] |
1072 | | pub fn update_state( |
1073 | | &mut self, bits: i64, fti: usize, show_frame: bool, log_target_q: i64, |
1074 | | trial: bool, droppable: bool, |
1075 | | ) -> bool { |
1076 | | if trial { |
1077 | | assert!(self.needs_trial_encode(fti)); |
1078 | | assert!(bits > 0); |
1079 | | } |
1080 | | let mut dropped = false; |
1081 | | // Update rate control only if rate control is active. |
1082 | | if self.target_bitrate > 0 { |
1083 | | let mut estimated_bits = 0; |
1084 | | let mut bits = bits; |
1085 | | let mut droppable = droppable; |
1086 | | let mut log_scale = q57(-64); |
1087 | | // Drop frames is also disabled for now in the case of infinite-buffer |
1088 | | // two-pass mode. |
1089 | | if !self.drop_frames |
1090 | | || fti == FRAME_SUBTYPE_SEF |
1091 | | || (self.twopass_state == PASS_2 |
1092 | | || self.twopass_state == PASS_2_PLUS_1) |
1093 | | && !self.frame_metrics.is_empty() |
1094 | | { |
1095 | | droppable = false; |
1096 | | } |
1097 | | if fti == FRAME_SUBTYPE_SEF { |
1098 | | debug_assert!(bits == SEF_BITS); |
1099 | | debug_assert!(show_frame); |
1100 | | // Please don't make trial encodes of a SEF. |
1101 | | debug_assert!(!trial); |
1102 | | estimated_bits = SEF_BITS; |
1103 | | self.nsef_frames += 1; |
1104 | | } else { |
1105 | | let log_q_exp = ((log_target_q + 32) >> 6) * (self.exp[fti] as i64); |
1106 | | let prev_log_scale = self.log_scale[fti]; |
1107 | | if bits <= 0 { |
1108 | | // We didn't code any blocks in this frame. |
1109 | | bits = 0; |
1110 | | dropped = true; |
1111 | | // TODO: Adjust VFR rate based on drop count. |
1112 | | } else { |
1113 | | // Compute the estimated scale factor for this frame type. |
1114 | | let log_bits = blog64(bits); |
1115 | | log_scale = (log_bits - self.log_npixels + log_q_exp).min(q57(16)); |
1116 | | estimated_bits = |
1117 | | bexp64(prev_log_scale + self.log_npixels - log_q_exp); |
1118 | | if !trial { |
1119 | | self.nencoded_frames += 1; |
1120 | | } |
1121 | | } |
1122 | | } |
1123 | | let log_scale_q24 = q57_to_q24(log_scale); |
1124 | | // Special two-pass processing. |
1125 | | if self.twopass_state == PASS_2 || self.twopass_state == PASS_2_PLUS_1 { |
1126 | | // Pass 2 mode: |
1127 | | if !trial { |
1128 | | // Move the current metrics back one frame. |
1129 | | self.prev_metrics = self.cur_metrics; |
1130 | | // Back out the last frame's statistics from the sliding window. |
1131 | | let ftj = self.prev_metrics.fti; |
1132 | | self.nframes_left[ftj] -= 1; |
1133 | | self.scale_window_nframes[ftj] -= 1; |
1134 | | if ftj < FRAME_NSUBTYPES { |
1135 | | self.scale_window_sum[ftj] -= |
1136 | | bexp_q24(self.prev_metrics.log_scale_q24); |
1137 | | } |
1138 | | if self.prev_metrics.show_frame { |
1139 | | self.ntus_left -= 1; |
1140 | | self.scale_window_ntus -= 1; |
1141 | | } |
1142 | | // Free the corresponding entry in the circular buffer. |
1143 | | if !self.frame_metrics.is_empty() { |
1144 | | self.nframe_metrics -= 1; |
1145 | | self.frame_metrics_head += 1; |
1146 | | if self.frame_metrics_head >= self.frame_metrics.len() { |
1147 | | self.frame_metrics_head = 0; |
1148 | | } |
1149 | | } |
1150 | | // Mark us ready for the next 2-pass packet. |
1151 | | self.pass2_data_ready = false; |
1152 | | // Update state, so the user doesn't have to keep calling |
1153 | | // twopass_in() after they've fed in all the data when we're using |
1154 | | // a finite buffer. |
1155 | | self.twopass_in(None).unwrap_or(0); |
1156 | | } |
1157 | | } |
1158 | | if self.twopass_state == PASS_1 || self.twopass_state == PASS_2_PLUS_1 { |
1159 | | // Pass 1 mode: save the metrics for this frame. |
1160 | | self.prev_metrics.log_scale_q24 = log_scale_q24; |
1161 | | self.prev_metrics.fti = fti; |
1162 | | self.prev_metrics.show_frame = show_frame; |
1163 | | self.pass1_data_retrieved = false; |
1164 | | } |
1165 | | // Common to all passes: |
1166 | | if fti != FRAME_SUBTYPE_SEF && bits > 0 { |
1167 | | // If this is the first example of the given frame type we've seen, |
1168 | | // we immediately replace the default scale factor guess with the |
1169 | | // estimate we just computed using the first frame. |
1170 | | if trial || self.nframes[fti] <= 0 { |
1171 | | let f = &mut self.scalefilter[fti]; |
1172 | | let x = log_scale_q24; |
1173 | | f.x[0] = x; |
1174 | | f.x[1] = x; |
1175 | | f.y[0] = x; |
1176 | | f.y[1] = x; |
1177 | | self.log_scale[fti] = log_scale; |
1178 | | // TODO: Duplicate regular P frame state for first golden P frame. |
1179 | | } else { |
1180 | | // Lengthen the time constant for the inter filters as we collect |
1181 | | // more frame statistics, until we reach our target. |
1182 | | if fti > 0 |
1183 | | && self.inter_delay[fti - 1] < self.inter_delay_target |
1184 | | && self.nframes[fti] >= self.inter_delay[fti - 1] |
1185 | | { |
1186 | | self.inter_delay[fti - 1] += 1; |
1187 | | self.scalefilter[fti].reinit(self.inter_delay[fti - 1]); |
1188 | | } |
1189 | | // Update the low-pass scale filter for this frame type regardless |
1190 | | // of whether or not we will ultimately drop this frame. |
1191 | | self.log_scale[fti] = |
1192 | | q24_to_q57(self.scalefilter[fti].update(log_scale_q24)); |
1193 | | } |
1194 | | // If this frame busts our budget, it must be dropped. |
1195 | | if droppable && self.reservoir_fullness + self.bits_per_tu < bits { |
1196 | | // TODO: Adjust VFR rate based on drop count. |
1197 | | bits = 0; |
1198 | | dropped = true; |
1199 | | } else { |
1200 | | // TODO: Update a low-pass filter to estimate the "real" frame rate |
1201 | | // taking timestamps and drops into account. |
1202 | | // This is only done if the frame is coded, as it needs the final |
1203 | | // count of dropped frames. |
1204 | | } |
1205 | | } |
1206 | | if !trial { |
1207 | | // Increment the frame count for filter adaptation purposes. |
1208 | | if !trial && self.nframes[fti] < ::std::i32::MAX { |
1209 | | self.nframes[fti] += 1; |
1210 | | } |
1211 | | self.reservoir_fullness -= bits; |
1212 | | if show_frame { |
1213 | | self.reservoir_fullness += self.bits_per_tu; |
1214 | | // TODO: Properly account for temporal delimiter bits. |
1215 | | } |
1216 | | // If we're too quick filling the buffer and overflow is capped, that |
1217 | | // rate is lost forever. |
1218 | | if self.cap_overflow { |
1219 | | self.reservoir_fullness = |
1220 | | self.reservoir_fullness.min(self.reservoir_max); |
1221 | | } |
1222 | | // If we're too quick draining the buffer and underflow is capped, |
1223 | | // don't try to make up that rate later. |
1224 | | if self.cap_underflow { |
1225 | | self.reservoir_fullness = self.reservoir_fullness.max(0); |
1226 | | } |
1227 | | // Adjust the bias for the real bits we've used. |
1228 | | self.rate_bias += estimated_bits - bits; |
1229 | | } |
1230 | | } |
1231 | | dropped |
1232 | | } |
1233 | | |
1234 | 0 | pub const fn needs_trial_encode(&self, fti: usize) -> bool { |
1235 | 0 | self.target_bitrate > 0 && self.nframes[fti] == 0 |
1236 | 0 | } |
1237 | | |
1238 | 0 | pub(crate) const fn ready(&self) -> bool { |
1239 | 0 | match self.twopass_state { |
1240 | 0 | PASS_SINGLE => true, |
1241 | 0 | PASS_1 => self.pass1_data_retrieved, |
1242 | 0 | PASS_2 => self.pass2_data_ready, |
1243 | 0 | _ => self.pass1_data_retrieved && self.pass2_data_ready, |
1244 | | } |
1245 | 0 | } |
1246 | | |
1247 | 0 | fn buffer_val(&mut self, val: i64, bytes: usize, cur_pos: usize) -> usize { |
1248 | 0 | let mut val = val; |
1249 | 0 | let mut bytes = bytes; |
1250 | 0 | let mut cur_pos = cur_pos; |
1251 | 0 | while bytes > 0 { |
1252 | 0 | bytes -= 1; |
1253 | 0 | self.pass1_buffer[cur_pos] = val as u8; |
1254 | 0 | cur_pos += 1; |
1255 | 0 | val >>= 8; |
1256 | 0 | } |
1257 | 0 | cur_pos |
1258 | 0 | } |
1259 | | |
1260 | 0 | pub(crate) fn select_pass1_log_base_q<T: Pixel>( |
1261 | 0 | &self, ctx: &ContextInner<T>, output_frameno: u64, |
1262 | 0 | ) -> i64 { |
1263 | 0 | assert_eq!(self.twopass_state, PASS_SINGLE); |
1264 | 0 | self.select_qi(ctx, output_frameno, FRAME_SUBTYPE_I, None, 0).log_base_q |
1265 | 0 | } Unexecuted instantiation: <rav1e::rate::RCState>::select_pass1_log_base_q::<u8> Unexecuted instantiation: <rav1e::rate::RCState>::select_pass1_log_base_q::<u16> |
1266 | | |
1267 | | // Initialize the first pass and emit a placeholder summary |
1268 | 0 | pub(crate) fn init_first_pass( |
1269 | 0 | &mut self, maybe_pass1_log_base_q: Option<i64>, |
1270 | 0 | ) { |
1271 | 0 | if let Some(pass1_log_base_q) = maybe_pass1_log_base_q { |
1272 | 0 | assert_eq!(self.twopass_state, PASS_SINGLE); |
1273 | | // Pick first-pass qi for scale calculations. |
1274 | 0 | self.pass1_log_base_q = pass1_log_base_q; |
1275 | | } else { |
1276 | 0 | debug_assert!(self.twopass_state == PASS_2); |
1277 | | } |
1278 | 0 | self.twopass_state += PASS_1; |
1279 | 0 | } |
1280 | | |
1281 | | // Prepare a placeholder summary |
1282 | 0 | fn emit_placeholder_summary(&mut self) -> &[u8] { |
1283 | 0 | // Fill in dummy summary values. |
1284 | 0 | let mut cur_pos = 0; |
1285 | 0 | cur_pos = self.buffer_val(TWOPASS_MAGIC as i64, 4, cur_pos); |
1286 | 0 | cur_pos = self.buffer_val(TWOPASS_VERSION as i64, 4, cur_pos); |
1287 | 0 | cur_pos = self.buffer_val(0, TWOPASS_HEADER_SZ - 8, cur_pos); |
1288 | 0 | debug_assert!(cur_pos == TWOPASS_HEADER_SZ); |
1289 | 0 | self.pass1_data_retrieved = true; |
1290 | 0 | &self.pass1_buffer[..cur_pos] |
1291 | 0 | } |
1292 | | |
1293 | | // Frame-specific pass data |
1294 | 0 | pub(crate) fn emit_frame_data(&mut self) -> Option<&[u8]> { |
1295 | 0 | let mut cur_pos = 0; |
1296 | 0 | let fti = self.prev_metrics.fti; |
1297 | 0 | if fti < FRAME_NSUBTYPES { |
1298 | 0 | self.scale_sum[fti] += bexp_q24(self.prev_metrics.log_scale_q24); |
1299 | 0 | } |
1300 | 0 | if self.prev_metrics.show_frame { |
1301 | 0 | self.ntus += 1; |
1302 | 0 | } |
1303 | | // If we have encoded too many frames, prevent us from reaching the |
1304 | | // ready state required to encode more. |
1305 | 0 | if self.nencoded_frames + self.nsef_frames >= std::i32::MAX as i64 { |
1306 | 0 | None? |
1307 | 0 | } |
1308 | 0 | cur_pos = self.buffer_val( |
1309 | 0 | (self.prev_metrics.show_frame as i64) << 31 |
1310 | 0 | | self.prev_metrics.fti as i64, |
1311 | 0 | 4, |
1312 | 0 | cur_pos, |
1313 | 0 | ); |
1314 | 0 | cur_pos = |
1315 | 0 | self.buffer_val(self.prev_metrics.log_scale_q24 as i64, 4, cur_pos); |
1316 | 0 | debug_assert!(cur_pos == TWOPASS_PACKET_SZ); |
1317 | 0 | self.pass1_data_retrieved = true; |
1318 | 0 | Some(&self.pass1_buffer[..cur_pos]) |
1319 | 0 | } |
1320 | | |
1321 | | // Summary of the whole encoding process. |
1322 | 0 | pub(crate) fn emit_summary(&mut self) -> &[u8] { |
1323 | 0 | let mut cur_pos = 0; |
1324 | 0 | cur_pos = self.buffer_val(TWOPASS_MAGIC as i64, 4, cur_pos); |
1325 | 0 | cur_pos = self.buffer_val(TWOPASS_VERSION as i64, 4, cur_pos); |
1326 | 0 | cur_pos = self.buffer_val(self.ntus as i64, 4, cur_pos); |
1327 | 0 | for fti in 0..=FRAME_NSUBTYPES { |
1328 | 0 | cur_pos = self.buffer_val(self.nframes[fti] as i64, 4, cur_pos); |
1329 | 0 | } |
1330 | 0 | for fti in 0..FRAME_NSUBTYPES { |
1331 | 0 | cur_pos = self.buffer_val(self.exp[fti] as i64, 1, cur_pos); |
1332 | 0 | } |
1333 | 0 | for fti in 0..FRAME_NSUBTYPES { |
1334 | 0 | cur_pos = self.buffer_val(self.scale_sum[fti], 8, cur_pos); |
1335 | 0 | } |
1336 | 0 | debug_assert!(cur_pos == TWOPASS_HEADER_SZ); |
1337 | 0 | self.pass1_summary_retrieved = true; |
1338 | 0 | &self.pass1_buffer[..cur_pos] |
1339 | 0 | } |
1340 | | |
1341 | | // Emit either summary or frame-specific data depending on the previous call |
1342 | 0 | pub(crate) fn twopass_out( |
1343 | 0 | &mut self, done_processing: bool, |
1344 | 0 | ) -> Option<&[u8]> { |
1345 | 0 | if !self.pass1_data_retrieved { |
1346 | 0 | if self.twopass_state != PASS_1 && self.twopass_state != PASS_2_PLUS_1 { |
1347 | 0 | Some(self.emit_placeholder_summary()) |
1348 | | } else { |
1349 | 0 | self.emit_frame_data() |
1350 | | } |
1351 | 0 | } else if done_processing && !self.pass1_summary_retrieved { |
1352 | 0 | Some(self.emit_summary()) |
1353 | | } else { |
1354 | | // The data for this frame has already been retrieved. |
1355 | 0 | None |
1356 | | } |
1357 | 0 | } |
1358 | | |
1359 | | // Initialize the rate control for second pass encoding |
1360 | 0 | pub(crate) fn init_second_pass(&mut self) { |
1361 | 0 | if self.twopass_state == PASS_SINGLE || self.twopass_state == PASS_1 { |
1362 | | // Initialize the second pass. |
1363 | 0 | self.twopass_state += PASS_2; |
1364 | 0 | // If the user requested a finite buffer, reserve the space required for |
1365 | 0 | // it. |
1366 | 0 | if self.reservoir_frame_delay_is_set { |
1367 | 0 | debug_assert!(self.reservoir_frame_delay > 0); |
1368 | | // reservoir_frame_delay counts in TUs, but RCFrameMetrics are stored |
1369 | | // per frame (including Show Existing Frame frames). |
1370 | | // When re-ordering, we will have more frames than TUs. |
1371 | | // How many more? |
1372 | | // That depends on the re-ordering scheme used. |
1373 | | // Doubling the number of TUs and adding a fixed latency equal to the |
1374 | | // maximum number of reference frames we can store should be |
1375 | | // sufficient for any reasonable scheme, and keeps this code from |
1376 | | // depending too closely on the details of the scheme currently used |
1377 | | // by rav1e. |
1378 | 0 | let nmetrics = (self.reservoir_frame_delay as usize) * 2 + 8; |
1379 | 0 | self.frame_metrics.reserve_exact(nmetrics); |
1380 | 0 | self.frame_metrics.resize(nmetrics, RCFrameMetrics::new()); |
1381 | 0 | } |
1382 | 0 | } |
1383 | 0 | } |
1384 | | |
1385 | 0 | pub(crate) fn setup_second_pass(&mut self, s: &RCSummary) { |
1386 | 0 | self.ntus_total = s.ntus; |
1387 | 0 | self.ntus_left = s.ntus; |
1388 | 0 | self.nframes_total = s.nframes; |
1389 | 0 | self.nframes_left = s.nframes; |
1390 | 0 | self.nframes_total_total = s.nframes.iter().sum(); |
1391 | 0 | if self.frame_metrics.is_empty() { |
1392 | 0 | self.reservoir_frame_delay = s.ntus; |
1393 | 0 | self.scale_window_nframes = self.nframes_total; |
1394 | 0 | self.scale_window_sum = s.scale_sum; |
1395 | 0 | self.reservoir_max = |
1396 | 0 | self.bits_per_tu * (self.reservoir_frame_delay as i64); |
1397 | 0 | self.reservoir_target = (self.reservoir_max + 1) >> 1; |
1398 | 0 | self.reservoir_fullness = self.reservoir_target; |
1399 | 0 | } else { |
1400 | 0 | self.reservoir_frame_delay = self.reservoir_frame_delay.min(s.ntus); |
1401 | 0 | } |
1402 | 0 | self.exp = s.exp; |
1403 | 0 | } |
1404 | | |
1405 | | // Parse the rate control summary |
1406 | | // |
1407 | | // It returns the amount of data consumed in the process or |
1408 | | // an empty error on parsing failure. |
1409 | 0 | fn twopass_parse_summary(&mut self, buf: &[u8]) -> Result<usize, String> { |
1410 | 0 | let consumed = self.des.buffer_fill(buf, 0, TWOPASS_HEADER_SZ); |
1411 | 0 | if self.des.pass2_buffer_fill >= TWOPASS_HEADER_SZ { |
1412 | 0 | self.des.pass2_buffer_pos = 0; |
1413 | | |
1414 | 0 | let s = self.des.parse_summary()?; |
1415 | | |
1416 | 0 | self.setup_second_pass(&s); |
1417 | 0 |
|
1418 | 0 | // Got a valid header. |
1419 | 0 | // Set up pass 2. |
1420 | 0 | // Clear the header data from the buffer to make room for the |
1421 | 0 | // packet data. |
1422 | 0 | self.des.pass2_buffer_fill = 0; |
1423 | 0 | } |
1424 | | |
1425 | 0 | Ok(consumed) |
1426 | 0 | } |
1427 | | |
1428 | | // Return the size of the first buffer twopass_in expects |
1429 | | // |
1430 | | // It is the summary size (constant) + the number of frame data packets |
1431 | | // (variable depending on the configuration) it needs to starts encoding. |
1432 | 0 | pub(crate) fn twopass_first_packet_size(&self) -> usize { |
1433 | 0 | let frames_needed = if !self.frame_metrics.is_empty() { |
1434 | | // If we're not using whole-file buffering, we need at least one |
1435 | | // frame per buffer slot. |
1436 | 0 | self.reservoir_frame_delay as usize |
1437 | | } else { |
1438 | | // Otherwise we need just one. |
1439 | 0 | 1 |
1440 | | }; |
1441 | | |
1442 | 0 | TWOPASS_HEADER_SZ + frames_needed * TWOPASS_PACKET_SZ |
1443 | 0 | } |
1444 | | |
1445 | | // Return the number of frame data packets to be parsed before |
1446 | | // the encoding process can continue. |
1447 | 0 | pub(crate) fn twopass_in_frames_needed(&self) -> i32 { |
1448 | 0 | if self.target_bitrate <= 0 { |
1449 | 0 | return 0; |
1450 | 0 | } |
1451 | 0 | if self.frame_metrics.is_empty() { |
1452 | 0 | return i32::from(!self.pass2_data_ready); |
1453 | 0 | } |
1454 | 0 | let mut cur_scale_window_nframes = 0; |
1455 | 0 | let mut cur_nframes_left = 0; |
1456 | 0 | for fti in 0..=FRAME_NSUBTYPES { |
1457 | 0 | cur_scale_window_nframes += self.scale_window_nframes[fti]; |
1458 | 0 | cur_nframes_left += self.nframes_left[fti]; |
1459 | 0 | } |
1460 | | |
1461 | 0 | (self.reservoir_frame_delay - self.scale_window_ntus) |
1462 | 0 | .clamp(0, cur_nframes_left - cur_scale_window_nframes) |
1463 | 0 | } |
1464 | | |
1465 | 0 | pub(crate) fn parse_frame_data_packet( |
1466 | 0 | &mut self, buf: &[u8], |
1467 | 0 | ) -> Result<(), String> { |
1468 | 0 | if buf.len() != TWOPASS_PACKET_SZ { |
1469 | 0 | return Err("Incorrect buffer size".to_string()); |
1470 | 0 | } |
1471 | 0 |
|
1472 | 0 | self.des.buffer_fill(buf, 0, TWOPASS_PACKET_SZ); |
1473 | 0 | self.des.pass2_buffer_pos = 0; |
1474 | 0 | let m = self.des.parse_metrics()?; |
1475 | 0 | self.des.pass2_buffer_fill = 0; |
1476 | 0 |
|
1477 | 0 | if self.frame_metrics.is_empty() { |
1478 | 0 | // We're using a whole-file buffer. |
1479 | 0 | self.cur_metrics = m; |
1480 | 0 | self.pass2_data_ready = true; |
1481 | 0 | } else { |
1482 | | // Safety check |
1483 | 0 | let frames_needed = self.twopass_in_frames_needed(); |
1484 | 0 |
|
1485 | 0 | if frames_needed > 0 { |
1486 | 0 | if self.nframe_metrics >= self.frame_metrics.len() { |
1487 | 0 | return Err( |
1488 | 0 | "Read too many frames without finding enough TUs".to_string(), |
1489 | 0 | ); |
1490 | 0 | } |
1491 | 0 |
|
1492 | 0 | let mut fmi = self.frame_metrics_head + self.nframe_metrics; |
1493 | 0 | if fmi >= self.frame_metrics.len() { |
1494 | 0 | fmi -= self.frame_metrics.len(); |
1495 | 0 | } |
1496 | 0 | self.nframe_metrics += 1; |
1497 | 0 | self.frame_metrics[fmi] = m; |
1498 | 0 | // And accumulate the statistics over the window. |
1499 | 0 | self.scale_window_nframes[m.fti] += 1; |
1500 | 0 | if m.fti < FRAME_NSUBTYPES { |
1501 | 0 | self.scale_window_sum[m.fti] += bexp_q24(m.log_scale_q24); |
1502 | 0 | } |
1503 | 0 | if m.show_frame { |
1504 | 0 | self.scale_window_ntus += 1; |
1505 | 0 | } |
1506 | 0 | if frames_needed == 1 { |
1507 | 0 | self.pass2_data_ready = true; |
1508 | 0 | self.cur_metrics = self.frame_metrics[self.frame_metrics_head]; |
1509 | 0 | } |
1510 | | } else { |
1511 | 0 | return Err("No frames needed".to_string()); |
1512 | | } |
1513 | | } |
1514 | | |
1515 | 0 | Ok(()) |
1516 | 0 | } |
1517 | | |
1518 | | // Parse the rate control per-frame data |
1519 | | // |
1520 | | // If no buffer is passed return the amount of data it expects |
1521 | | // to consume next. |
1522 | | // |
1523 | | // If a properly sized buffer is passed it returns the amount of data |
1524 | | // consumed in the process or an empty error on parsing failure. |
1525 | 0 | fn twopass_parse_frame_data( |
1526 | 0 | &mut self, maybe_buf: Option<&[u8]>, mut consumed: usize, |
1527 | 0 | ) -> Result<usize, String> { |
1528 | 0 | { |
1529 | 0 | if self.frame_metrics.is_empty() { |
1530 | | // We're using a whole-file buffer. |
1531 | 0 | if let Some(buf) = maybe_buf { |
1532 | 0 | consumed = self.des.buffer_fill(buf, consumed, TWOPASS_PACKET_SZ); |
1533 | 0 | if self.des.pass2_buffer_fill >= TWOPASS_PACKET_SZ { |
1534 | 0 | self.des.pass2_buffer_pos = 0; |
1535 | 0 | // Read metrics for the next frame. |
1536 | 0 | self.cur_metrics = self.des.parse_metrics()?; |
1537 | | // Clear the buffer for the next frame. |
1538 | 0 | self.des.pass2_buffer_fill = 0; |
1539 | 0 | self.pass2_data_ready = true; |
1540 | 0 | } |
1541 | | } else { |
1542 | 0 | return Ok(TWOPASS_PACKET_SZ - self.des.pass2_buffer_fill); |
1543 | | } |
1544 | | } else { |
1545 | | // We're using a finite buffer. |
1546 | 0 | let mut cur_scale_window_nframes = 0; |
1547 | 0 | let mut cur_nframes_left = 0; |
1548 | | |
1549 | 0 | for fti in 0..=FRAME_NSUBTYPES { |
1550 | 0 | cur_scale_window_nframes += self.scale_window_nframes[fti]; |
1551 | 0 | cur_nframes_left += self.nframes_left[fti]; |
1552 | 0 | } |
1553 | | |
1554 | 0 | let mut frames_needed = self.twopass_in_frames_needed(); |
1555 | 0 | while frames_needed > 0 { |
1556 | 0 | if let Some(buf) = maybe_buf { |
1557 | 0 | consumed = self.des.buffer_fill(buf, consumed, TWOPASS_PACKET_SZ); |
1558 | 0 | if self.des.pass2_buffer_fill >= TWOPASS_PACKET_SZ { |
1559 | 0 | self.des.pass2_buffer_pos = 0; |
1560 | | // Read the metrics for the next frame. |
1561 | 0 | let m = self.des.parse_metrics()?; |
1562 | | // Add them to the circular buffer. |
1563 | 0 | if self.nframe_metrics >= self.frame_metrics.len() { |
1564 | 0 | return Err( |
1565 | 0 | "Read too many frames without finding enough TUs" |
1566 | 0 | .to_string(), |
1567 | 0 | ); |
1568 | 0 | } |
1569 | 0 | let mut fmi = self.frame_metrics_head + self.nframe_metrics; |
1570 | 0 | if fmi >= self.frame_metrics.len() { |
1571 | 0 | fmi -= self.frame_metrics.len(); |
1572 | 0 | } |
1573 | 0 | self.nframe_metrics += 1; |
1574 | 0 | self.frame_metrics[fmi] = m; |
1575 | 0 | // And accumulate the statistics over the window. |
1576 | 0 | self.scale_window_nframes[m.fti] += 1; |
1577 | 0 | cur_scale_window_nframes += 1; |
1578 | 0 | if m.fti < FRAME_NSUBTYPES { |
1579 | 0 | self.scale_window_sum[m.fti] += bexp_q24(m.log_scale_q24); |
1580 | 0 | } |
1581 | 0 | if m.show_frame { |
1582 | 0 | self.scale_window_ntus += 1; |
1583 | 0 | } |
1584 | 0 | frames_needed = (self.reservoir_frame_delay |
1585 | 0 | - self.scale_window_ntus) |
1586 | 0 | .clamp(0, cur_nframes_left - cur_scale_window_nframes); |
1587 | 0 | // Clear the buffer for the next frame. |
1588 | 0 | self.des.pass2_buffer_fill = 0; |
1589 | | } else { |
1590 | | // Go back for more data. |
1591 | 0 | break; |
1592 | | } |
1593 | | } else { |
1594 | 0 | return Ok( |
1595 | 0 | TWOPASS_PACKET_SZ * (frames_needed as usize) |
1596 | 0 | - self.des.pass2_buffer_fill, |
1597 | 0 | ); |
1598 | | } |
1599 | | } |
1600 | | // If we've got all the frames we need, fill in the current metrics. |
1601 | | // We're ready to go. |
1602 | 0 | if frames_needed <= 0 { |
1603 | 0 | self.cur_metrics = self.frame_metrics[self.frame_metrics_head]; |
1604 | 0 | // Mark us ready for the next frame. |
1605 | 0 | self.pass2_data_ready = true; |
1606 | 0 | } |
1607 | | } |
1608 | | } |
1609 | | |
1610 | 0 | Ok(consumed) |
1611 | 0 | } |
1612 | | |
1613 | | // If called without a buffer it will return the size of the next |
1614 | | // buffer it expects. |
1615 | | // |
1616 | | // If called with a buffer it will consume it fully. |
1617 | | // It returns Ok(0) if the buffer had been parsed or Err(()) |
1618 | | // if the buffer hadn't been enough or other errors happened. |
1619 | 0 | pub(crate) fn twopass_in( |
1620 | 0 | &mut self, maybe_buf: Option<&[u8]>, |
1621 | 0 | ) -> Result<usize, String> { |
1622 | 0 | let mut consumed = 0; |
1623 | 0 | self.init_second_pass(); |
1624 | 0 | // If we haven't got a valid summary header yet, try to parse one. |
1625 | 0 | if self.nframes_total[FRAME_SUBTYPE_I] == 0 { |
1626 | 0 | self.pass2_data_ready = false; |
1627 | 0 | if let Some(buf) = maybe_buf { |
1628 | 0 | consumed = self.twopass_parse_summary(buf)? |
1629 | | } else { |
1630 | 0 | return Ok(self.twopass_first_packet_size()); |
1631 | | } |
1632 | 0 | } |
1633 | 0 | if self.nframes_total[FRAME_SUBTYPE_I] > 0 { |
1634 | 0 | if self.nencoded_frames + self.nsef_frames |
1635 | 0 | >= self.nframes_total_total as i64 |
1636 | 0 | { |
1637 | 0 | // We don't want any more data after the last frame, and we don't want |
1638 | 0 | // to allow any more frames to be encoded. |
1639 | 0 | self.pass2_data_ready = false; |
1640 | 0 | } else if !self.pass2_data_ready { |
1641 | 0 | return self.twopass_parse_frame_data(maybe_buf, consumed); |
1642 | 0 | } |
1643 | 0 | } |
1644 | 0 | Ok(consumed) |
1645 | 0 | } |
1646 | | } |