/src/x264/encoder/ratecontrol.c
Line | Count | Source |
1 | | /***************************************************************************** |
2 | | * ratecontrol.c: ratecontrol |
3 | | ***************************************************************************** |
4 | | * Copyright (C) 2005-2025 x264 project |
5 | | * |
6 | | * Authors: Loren Merritt <lorenm@u.washington.edu> |
7 | | * Michael Niedermayer <michaelni@gmx.at> |
8 | | * Gabriel Bouvigne <gabriel.bouvigne@joost.com> |
9 | | * Fiona Glaser <fiona@x264.com> |
10 | | * Måns Rullgård <mru@mru.ath.cx> |
11 | | * |
12 | | * This program is free software; you can redistribute it and/or modify |
13 | | * it under the terms of the GNU General Public License as published by |
14 | | * the Free Software Foundation; either version 2 of the License, or |
15 | | * (at your option) any later version. |
16 | | * |
17 | | * This program is distributed in the hope that it will be useful, |
18 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
19 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
20 | | * GNU General Public License for more details. |
21 | | * |
22 | | * You should have received a copy of the GNU General Public License |
23 | | * along with this program; if not, write to the Free Software |
24 | | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. |
25 | | * |
26 | | * This program is also available under a commercial proprietary license. |
27 | | * For more information, contact us at licensing@x264.com. |
28 | | *****************************************************************************/ |
29 | | |
30 | | #undef NDEBUG // always check asserts, the speed effect is far too small to disable them |
31 | | |
32 | | #include "common/common.h" |
33 | | #include "ratecontrol.h" |
34 | | #include "me.h" |
35 | | |
36 | | typedef struct |
37 | | { |
38 | | int pict_type; |
39 | | int frame_type; |
40 | | int kept_as_ref; |
41 | | double qscale; |
42 | | int mv_bits; |
43 | | int tex_bits; |
44 | | int misc_bits; |
45 | | double expected_bits; /* total expected bits up to the current frame (current one excluded) */ |
46 | | double expected_vbv; |
47 | | double new_qscale; |
48 | | float new_qp; |
49 | | int i_count; |
50 | | int p_count; |
51 | | int s_count; |
52 | | float blurred_complexity; |
53 | | char direct_mode; |
54 | | int16_t weight[3][2]; |
55 | | int16_t i_weight_denom[2]; |
56 | | int refcount[16]; |
57 | | int refs; |
58 | | int64_t i_duration; |
59 | | int64_t i_cpb_duration; |
60 | | int out_num; |
61 | | } ratecontrol_entry_t; |
62 | | |
63 | | typedef struct |
64 | | { |
65 | | float coeff_min; |
66 | | float coeff; |
67 | | float count; |
68 | | float decay; |
69 | | float offset; |
70 | | } predictor_t; |
71 | | |
72 | | struct x264_ratecontrol_t |
73 | | { |
74 | | /* constants */ |
75 | | int b_abr; |
76 | | int b_2pass; |
77 | | int b_vbv; |
78 | | int b_vbv_min_rate; |
79 | | double fps; |
80 | | double bitrate; |
81 | | double rate_tolerance; |
82 | | double qcompress; |
83 | | int nmb; /* number of macroblocks in a frame */ |
84 | | int qp_constant[3]; |
85 | | |
86 | | /* current frame */ |
87 | | ratecontrol_entry_t *rce; |
88 | | float qpm; /* qp for current macroblock: precise float for AQ */ |
89 | | float qpa_rc; /* average of macroblocks' qp before aq */ |
90 | | float qpa_rc_prev; |
91 | | int qpa_aq; /* average of macroblocks' qp after aq */ |
92 | | int qpa_aq_prev; |
93 | | float qp_novbv; /* QP for the current frame if 1-pass VBV was disabled. */ |
94 | | |
95 | | /* VBV stuff */ |
96 | | double buffer_size; |
97 | | int64_t buffer_fill_final; |
98 | | int64_t buffer_fill_final_min; |
99 | | double buffer_fill; /* planned buffer, if all in-progress frames hit their bit budget */ |
100 | | double buffer_rate; /* # of bits added to buffer_fill after each frame */ |
101 | | double vbv_max_rate; /* # of bits added to buffer_fill per second */ |
102 | | predictor_t *pred; /* predict frame size from satd */ |
103 | | int single_frame_vbv; |
104 | | float rate_factor_max_increment; /* Don't allow RF above (CRF + this value). */ |
105 | | |
106 | | /* ABR stuff */ |
107 | | int last_satd; |
108 | | double last_rceq; |
109 | | double cplxr_sum; /* sum of bits*qscale/rceq */ |
110 | | double expected_bits_sum; /* sum of qscale2bits after rceq, ratefactor, and overflow, only includes finished frames */ |
111 | | int64_t filler_bits_sum; /* sum in bits of finished frames' filler data */ |
112 | | double wanted_bits_window; /* target bitrate * window */ |
113 | | double cbr_decay; |
114 | | double short_term_cplxsum; |
115 | | double short_term_cplxcount; |
116 | | double rate_factor_constant; |
117 | | double ip_offset; |
118 | | double pb_offset; |
119 | | |
120 | | /* 2pass stuff */ |
121 | | FILE *p_stat_file_out; |
122 | | char *psz_stat_file_tmpname; |
123 | | FILE *p_mbtree_stat_file_out; |
124 | | char *psz_mbtree_stat_file_tmpname; |
125 | | char *psz_mbtree_stat_file_name; |
126 | | FILE *p_mbtree_stat_file_in; |
127 | | |
128 | | int num_entries; /* number of ratecontrol_entry_ts */ |
129 | | ratecontrol_entry_t *entry; /* FIXME: copy needed data and free this once init is done */ |
130 | | ratecontrol_entry_t **entry_out; |
131 | | double last_qscale; |
132 | | double last_qscale_for[3]; /* last qscale for a specific pict type, used for max_diff & ipb factor stuff */ |
133 | | int last_non_b_pict_type; |
134 | | double accum_p_qp; /* for determining I-frame quant */ |
135 | | double accum_p_norm; |
136 | | double last_accum_p_norm; |
137 | | double lmin[3]; /* min qscale by frame type */ |
138 | | double lmax[3]; |
139 | | double lstep; /* max change (multiply) in qscale per frame */ |
140 | | struct |
141 | | { |
142 | | uint16_t *qp_buffer[2]; /* Global buffers for converting MB-tree quantizer data. */ |
143 | | int qpbuf_pos; /* In order to handle pyramid reordering, QP buffer acts as a stack. |
144 | | * This value is the current position (0 or 1). */ |
145 | | int src_mb_count; |
146 | | |
147 | | /* For rescaling */ |
148 | | int rescale_enabled; |
149 | | float *scale_buffer[2]; /* Intermediate buffers */ |
150 | | int filtersize[2]; /* filter size (H/V) */ |
151 | | float *coeffs[2]; |
152 | | int *pos[2]; |
153 | | int srcdim[2]; /* Source dimensions (W/H) */ |
154 | | } mbtree; |
155 | | |
156 | | /* MBRC stuff */ |
157 | | volatile float frame_size_estimated; /* Access to this variable must be atomic: double is |
158 | | * not atomic on all arches we care about */ |
159 | | volatile float bits_so_far; |
160 | | double frame_size_maximum; /* Maximum frame size due to MinCR */ |
161 | | double frame_size_planned; |
162 | | double slice_size_planned; |
163 | | predictor_t *row_pred; |
164 | | predictor_t row_preds[3][2]; |
165 | | predictor_t *pred_b_from_p; /* predict B-frame size from P-frame satd */ |
166 | | int bframes; /* # consecutive B-frames before this P-frame */ |
167 | | int bframe_bits; /* total cost of those frames */ |
168 | | |
169 | | int i_zones; |
170 | | x264_zone_t *zones; |
171 | | x264_zone_t *prev_zone; |
172 | | |
173 | | /* hrd stuff */ |
174 | | int initial_cpb_removal_delay; |
175 | | int initial_cpb_removal_delay_offset; |
176 | | double nrt_first_access_unit; /* nominal removal time */ |
177 | | double previous_cpb_final_arrival_time; |
178 | | uint64_t hrd_multiply_denom; |
179 | | }; |
180 | | |
181 | | |
182 | | static int parse_zones( x264_t *h ); |
183 | | static int init_pass2(x264_t *); |
184 | | static float rate_estimate_qscale( x264_t *h ); |
185 | | static int update_vbv( x264_t *h, int bits ); |
186 | | static void update_vbv_plan( x264_t *h, int overhead ); |
187 | | static float predict_size( predictor_t *p, float q, float var ); |
188 | | static void update_predictor( predictor_t *p, float q, float var, float bits ); |
189 | | |
190 | 0 | #define CMP_OPT_FIRST_PASS( opt, param_val )\ |
191 | 0 | {\ |
192 | 0 | if( ( p = strstr( opts, opt "=" ) ) && sscanf( p, opt "=%d" , &i ) && param_val != i )\ |
193 | 0 | {\ |
194 | 0 | x264_log( h, X264_LOG_ERROR, "different " opt " setting than first pass (%d vs %d)\n", param_val, i );\ |
195 | 0 | return -1;\ |
196 | 0 | }\ |
197 | 0 | } |
198 | | |
199 | | /* Terminology: |
200 | | * qp = h.264's quantizer |
201 | | * qscale = linearized quantizer = Lagrange multiplier |
202 | | */ |
203 | | static inline float qp2qscale( float qp ) |
204 | 0 | { |
205 | 0 | return 0.85f * powf( 2.0f, ( qp - (12.0f + QP_BD_OFFSET) ) / 6.0f ); |
206 | 0 | } |
207 | | static inline float qscale2qp( float qscale ) |
208 | 0 | { |
209 | 0 | return (12.0f + QP_BD_OFFSET) + 6.0f * log2f( qscale/0.85f ); |
210 | 0 | } |
211 | | |
212 | | /* Texture bitrate is not quite inversely proportional to qscale, |
213 | | * probably due the the changing number of SKIP blocks. |
214 | | * MV bits level off at about qp<=12, because the lambda used |
215 | | * for motion estimation is constant there. */ |
216 | | static inline double qscale2bits( ratecontrol_entry_t *rce, double qscale ) |
217 | 0 | { |
218 | 0 | if( qscale<0.1 ) |
219 | 0 | qscale = 0.1; |
220 | 0 | return (rce->tex_bits + .1) * pow( rce->qscale / qscale, 1.1 ) |
221 | 0 | + rce->mv_bits * pow( X264_MAX(rce->qscale, 1) / X264_MAX(qscale, 1), 0.5 ) |
222 | 0 | + rce->misc_bits; |
223 | 0 | } |
224 | | |
225 | | static ALWAYS_INLINE uint32_t ac_energy_var( uint64_t sum_ssd, int shift, x264_frame_t *frame, int i, int b_store ) |
226 | 0 | { |
227 | 0 | uint32_t sum = sum_ssd; |
228 | 0 | uint32_t ssd = sum_ssd >> 32; |
229 | 0 | if( b_store ) |
230 | 0 | { |
231 | 0 | frame->i_pixel_sum[i] += sum; |
232 | 0 | frame->i_pixel_ssd[i] += ssd; |
233 | 0 | } |
234 | 0 | return ssd - ((uint64_t)sum * sum >> shift); |
235 | 0 | } |
236 | | |
237 | | static ALWAYS_INLINE uint32_t ac_energy_plane( x264_t *h, int mb_x, int mb_y, x264_frame_t *frame, int i, int b_chroma, int b_field, int b_store ) |
238 | 0 | { |
239 | 0 | int height = b_chroma ? 16>>CHROMA_V_SHIFT : 16; |
240 | 0 | int stride = frame->i_stride[i]; |
241 | 0 | int offset = b_field |
242 | 0 | ? 16 * mb_x + height * (mb_y&~1) * stride + (mb_y&1) * stride |
243 | 0 | : 16 * mb_x + height * mb_y * stride; |
244 | 0 | stride <<= b_field; |
245 | 0 | if( b_chroma ) |
246 | 0 | { |
247 | 0 | ALIGNED_ARRAY_64( pixel, pix,[FENC_STRIDE*16] ); |
248 | 0 | int chromapix = h->luma2chroma_pixel[PIXEL_16x16]; |
249 | 0 | int shift = 7 - CHROMA_V_SHIFT; |
250 | |
|
251 | 0 | h->mc.load_deinterleave_chroma_fenc( pix, frame->plane[1] + offset, stride, height ); |
252 | 0 | return ac_energy_var( h->pixf.var[chromapix]( pix, FENC_STRIDE ), shift, frame, 1, b_store ) |
253 | 0 | + ac_energy_var( h->pixf.var[chromapix]( pix+FENC_STRIDE/2, FENC_STRIDE ), shift, frame, 2, b_store ); |
254 | 0 | } |
255 | 0 | else |
256 | 0 | return ac_energy_var( h->pixf.var[PIXEL_16x16]( frame->plane[i] + offset, stride ), 8, frame, i, b_store ); |
257 | 0 | } |
258 | | |
259 | | // Find the total AC energy of the block in all planes. |
260 | | static NOINLINE uint32_t ac_energy_mb( x264_t *h, int mb_x, int mb_y, x264_frame_t *frame ) |
261 | 0 | { |
262 | | /* This function contains annoying hacks because GCC has a habit of reordering emms |
263 | | * and putting it after floating point ops. As a result, we put the emms at the end of the |
264 | | * function and make sure that its always called before the float math. Noinline makes |
265 | | * sure no reordering goes on. */ |
266 | 0 | uint32_t var; |
267 | 0 | x264_prefetch_fenc( h, frame, mb_x, mb_y ); |
268 | 0 | if( h->mb.b_adaptive_mbaff ) |
269 | 0 | { |
270 | | /* We don't know the super-MB mode we're going to pick yet, so |
271 | | * simply try both and pick the lower of the two. */ |
272 | 0 | uint32_t var_interlaced, var_progressive; |
273 | 0 | var_interlaced = ac_energy_plane( h, mb_x, mb_y, frame, 0, 0, 1, 1 ); |
274 | 0 | var_progressive = ac_energy_plane( h, mb_x, mb_y, frame, 0, 0, 0, 0 ); |
275 | 0 | if( CHROMA444 ) |
276 | 0 | { |
277 | 0 | var_interlaced += ac_energy_plane( h, mb_x, mb_y, frame, 1, 0, 1, 1 ); |
278 | 0 | var_progressive += ac_energy_plane( h, mb_x, mb_y, frame, 1, 0, 0, 0 ); |
279 | 0 | var_interlaced += ac_energy_plane( h, mb_x, mb_y, frame, 2, 0, 1, 1 ); |
280 | 0 | var_progressive += ac_energy_plane( h, mb_x, mb_y, frame, 2, 0, 0, 0 ); |
281 | 0 | } |
282 | 0 | else if( CHROMA_FORMAT ) |
283 | 0 | { |
284 | 0 | var_interlaced += ac_energy_plane( h, mb_x, mb_y, frame, 1, 1, 1, 1 ); |
285 | 0 | var_progressive += ac_energy_plane( h, mb_x, mb_y, frame, 1, 1, 0, 0 ); |
286 | 0 | } |
287 | 0 | var = X264_MIN( var_interlaced, var_progressive ); |
288 | 0 | } |
289 | 0 | else |
290 | 0 | { |
291 | 0 | var = ac_energy_plane( h, mb_x, mb_y, frame, 0, 0, PARAM_INTERLACED, 1 ); |
292 | 0 | if( CHROMA444 ) |
293 | 0 | { |
294 | 0 | var += ac_energy_plane( h, mb_x, mb_y, frame, 1, 0, PARAM_INTERLACED, 1 ); |
295 | 0 | var += ac_energy_plane( h, mb_x, mb_y, frame, 2, 0, PARAM_INTERLACED, 1 ); |
296 | 0 | } |
297 | 0 | else if( CHROMA_FORMAT ) |
298 | 0 | var += ac_energy_plane( h, mb_x, mb_y, frame, 1, 1, PARAM_INTERLACED, 1 ); |
299 | 0 | } |
300 | 0 | x264_emms(); |
301 | 0 | return var; |
302 | 0 | } |
303 | | |
304 | | void x264_adaptive_quant_frame( x264_t *h, x264_frame_t *frame, float *quant_offsets ) |
305 | 0 | { |
306 | | /* Initialize frame stats */ |
307 | 0 | for( int i = 0; i < 3; i++ ) |
308 | 0 | { |
309 | 0 | frame->i_pixel_sum[i] = 0; |
310 | 0 | frame->i_pixel_ssd[i] = 0; |
311 | 0 | } |
312 | | |
313 | | /* Degenerate cases */ |
314 | 0 | if( h->param.rc.i_aq_mode == X264_AQ_NONE || h->param.rc.f_aq_strength == 0 ) |
315 | 0 | { |
316 | | /* Need to init it anyways for MB tree */ |
317 | 0 | if( h->param.rc.i_aq_mode && h->param.rc.f_aq_strength == 0 ) |
318 | 0 | { |
319 | 0 | if( quant_offsets ) |
320 | 0 | { |
321 | 0 | for( int mb_xy = 0; mb_xy < h->mb.i_mb_count; mb_xy++ ) |
322 | 0 | frame->f_qp_offset[mb_xy] = frame->f_qp_offset_aq[mb_xy] = quant_offsets[mb_xy]; |
323 | 0 | if( h->frames.b_have_lowres ) |
324 | 0 | for( int mb_xy = 0; mb_xy < h->mb.i_mb_count; mb_xy++ ) |
325 | 0 | frame->i_inv_qscale_factor[mb_xy] = x264_exp2fix8( frame->f_qp_offset[mb_xy] ); |
326 | 0 | } |
327 | 0 | else |
328 | 0 | { |
329 | 0 | memset( frame->f_qp_offset, 0, h->mb.i_mb_count * sizeof(float) ); |
330 | 0 | memset( frame->f_qp_offset_aq, 0, h->mb.i_mb_count * sizeof(float) ); |
331 | 0 | if( h->frames.b_have_lowres ) |
332 | 0 | for( int mb_xy = 0; mb_xy < h->mb.i_mb_count; mb_xy++ ) |
333 | 0 | frame->i_inv_qscale_factor[mb_xy] = 256; |
334 | 0 | } |
335 | 0 | } |
336 | | /* Need variance data for weighted prediction */ |
337 | 0 | if( h->param.analyse.i_weighted_pred ) |
338 | 0 | { |
339 | 0 | for( int mb_y = 0; mb_y < h->mb.i_mb_height; mb_y++ ) |
340 | 0 | for( int mb_x = 0; mb_x < h->mb.i_mb_width; mb_x++ ) |
341 | 0 | ac_energy_mb( h, mb_x, mb_y, frame ); |
342 | 0 | } |
343 | 0 | else |
344 | 0 | return; |
345 | 0 | } |
346 | | /* Actual adaptive quantization */ |
347 | 0 | else |
348 | 0 | { |
349 | | /* constants chosen to result in approximately the same overall bitrate as without AQ. |
350 | | * FIXME: while they're written in 5 significant digits, they're only tuned to 2. */ |
351 | 0 | float strength; |
352 | 0 | float avg_adj = 0.f; |
353 | 0 | float bias_strength = 0.f; |
354 | |
|
355 | 0 | if( h->param.rc.i_aq_mode == X264_AQ_AUTOVARIANCE || h->param.rc.i_aq_mode == X264_AQ_AUTOVARIANCE_BIASED ) |
356 | 0 | { |
357 | 0 | float bit_depth_correction = 1.f / (1 << (2*(BIT_DEPTH-8))); |
358 | 0 | float avg_adj_pow2 = 0.f; |
359 | 0 | for( int mb_y = 0; mb_y < h->mb.i_mb_height; mb_y++ ) |
360 | 0 | for( int mb_x = 0; mb_x < h->mb.i_mb_width; mb_x++ ) |
361 | 0 | { |
362 | 0 | uint32_t energy = ac_energy_mb( h, mb_x, mb_y, frame ); |
363 | 0 | float qp_adj = powf( energy * bit_depth_correction + 1, 0.125f ); |
364 | 0 | frame->f_qp_offset[mb_x + mb_y*h->mb.i_mb_stride] = qp_adj; |
365 | 0 | avg_adj += qp_adj; |
366 | 0 | avg_adj_pow2 += qp_adj * qp_adj; |
367 | 0 | } |
368 | 0 | avg_adj /= h->mb.i_mb_count; |
369 | 0 | avg_adj_pow2 /= h->mb.i_mb_count; |
370 | 0 | strength = h->param.rc.f_aq_strength * avg_adj; |
371 | 0 | avg_adj = avg_adj - 0.5f * (avg_adj_pow2 - 14.f) / avg_adj; |
372 | 0 | bias_strength = h->param.rc.f_aq_strength; |
373 | 0 | } |
374 | 0 | else |
375 | 0 | strength = h->param.rc.f_aq_strength * 1.0397f; |
376 | |
|
377 | 0 | for( int mb_y = 0; mb_y < h->mb.i_mb_height; mb_y++ ) |
378 | 0 | for( int mb_x = 0; mb_x < h->mb.i_mb_width; mb_x++ ) |
379 | 0 | { |
380 | 0 | float qp_adj; |
381 | 0 | int mb_xy = mb_x + mb_y*h->mb.i_mb_stride; |
382 | 0 | if( h->param.rc.i_aq_mode == X264_AQ_AUTOVARIANCE_BIASED ) |
383 | 0 | { |
384 | 0 | qp_adj = frame->f_qp_offset[mb_xy]; |
385 | 0 | qp_adj = strength * (qp_adj - avg_adj) + bias_strength * (1.f - 14.f / (qp_adj * qp_adj)); |
386 | 0 | } |
387 | 0 | else if( h->param.rc.i_aq_mode == X264_AQ_AUTOVARIANCE ) |
388 | 0 | { |
389 | 0 | qp_adj = frame->f_qp_offset[mb_xy]; |
390 | 0 | qp_adj = strength * (qp_adj - avg_adj); |
391 | 0 | } |
392 | 0 | else |
393 | 0 | { |
394 | 0 | uint32_t energy = ac_energy_mb( h, mb_x, mb_y, frame ); |
395 | 0 | qp_adj = strength * (x264_log2( X264_MAX(energy, 1) ) - (14.427f + 2*(BIT_DEPTH-8))); |
396 | 0 | } |
397 | 0 | if( quant_offsets ) |
398 | 0 | qp_adj += quant_offsets[mb_xy]; |
399 | 0 | frame->f_qp_offset[mb_xy] = |
400 | 0 | frame->f_qp_offset_aq[mb_xy] = qp_adj; |
401 | 0 | if( h->frames.b_have_lowres ) |
402 | 0 | frame->i_inv_qscale_factor[mb_xy] = x264_exp2fix8(qp_adj); |
403 | 0 | } |
404 | 0 | } |
405 | | |
406 | | /* Remove mean from SSD calculation */ |
407 | 0 | for( int i = 0; i < 3; i++ ) |
408 | 0 | { |
409 | 0 | uint64_t ssd = frame->i_pixel_ssd[i]; |
410 | 0 | uint64_t sum = frame->i_pixel_sum[i]; |
411 | 0 | int width = 16*h->mb.i_mb_width >> (i && CHROMA_H_SHIFT); |
412 | 0 | int height = 16*h->mb.i_mb_height >> (i && CHROMA_V_SHIFT); |
413 | 0 | frame->i_pixel_ssd[i] = ssd - (sum * sum + width * height / 2) / (width * height); |
414 | 0 | } |
415 | 0 | } Unexecuted instantiation: x264_8_adaptive_quant_frame Unexecuted instantiation: x264_10_adaptive_quant_frame |
416 | | |
417 | | static int macroblock_tree_rescale_init( x264_t *h, x264_ratecontrol_t *rc ) |
418 | 0 | { |
419 | | /* Use fractional QP array dimensions to compensate for edge padding */ |
420 | 0 | float srcdim[2] = {rc->mbtree.srcdim[0] / 16.f, rc->mbtree.srcdim[1] / 16.f}; |
421 | 0 | float dstdim[2] = { h->param.i_width / 16.f, h->param.i_height / 16.f}; |
422 | 0 | int srcdimi[2] = {ceil(srcdim[0]), ceil(srcdim[1])}; |
423 | 0 | int dstdimi[2] = {ceil(dstdim[0]), ceil(dstdim[1])}; |
424 | 0 | if( h->param.b_interlaced || h->param.b_fake_interlaced ) |
425 | 0 | { |
426 | 0 | srcdimi[1] = (srcdimi[1]+1)&~1; |
427 | 0 | dstdimi[1] = (dstdimi[1]+1)&~1; |
428 | 0 | } |
429 | |
|
430 | 0 | rc->mbtree.src_mb_count = srcdimi[0] * srcdimi[1]; |
431 | |
|
432 | 0 | CHECKED_MALLOC( rc->mbtree.qp_buffer[0], rc->mbtree.src_mb_count * sizeof(uint16_t) ); |
433 | 0 | if( h->param.i_bframe_pyramid && h->param.rc.b_stat_read ) |
434 | 0 | CHECKED_MALLOC( rc->mbtree.qp_buffer[1], rc->mbtree.src_mb_count * sizeof(uint16_t) ); |
435 | 0 | rc->mbtree.qpbuf_pos = -1; |
436 | | |
437 | | /* No rescaling to do */ |
438 | 0 | if( srcdimi[0] == dstdimi[0] && srcdimi[1] == dstdimi[1] ) |
439 | 0 | return 0; |
440 | | |
441 | 0 | rc->mbtree.rescale_enabled = 1; |
442 | | |
443 | | /* Allocate intermediate scaling buffers */ |
444 | 0 | CHECKED_MALLOC( rc->mbtree.scale_buffer[0], srcdimi[0] * srcdimi[1] * sizeof(float) ); |
445 | 0 | CHECKED_MALLOC( rc->mbtree.scale_buffer[1], dstdimi[0] * srcdimi[1] * sizeof(float) ); |
446 | | |
447 | | /* Allocate and calculate resize filter parameters and coefficients */ |
448 | 0 | for( int i = 0; i < 2; i++ ) |
449 | 0 | { |
450 | 0 | if( srcdim[i] > dstdim[i] ) // downscale |
451 | 0 | rc->mbtree.filtersize[i] = 1 + (2 * srcdimi[i] + dstdimi[i] - 1) / dstdimi[i]; |
452 | 0 | else // upscale |
453 | 0 | rc->mbtree.filtersize[i] = 3; |
454 | |
|
455 | 0 | CHECKED_MALLOC( rc->mbtree.coeffs[i], rc->mbtree.filtersize[i] * dstdimi[i] * sizeof(float) ); |
456 | 0 | CHECKED_MALLOC( rc->mbtree.pos[i], dstdimi[i] * sizeof(int) ); |
457 | | |
458 | | /* Initialize filter coefficients */ |
459 | 0 | float inc = srcdim[i] / dstdim[i]; |
460 | 0 | float dmul = inc > 1.f ? dstdim[i] / srcdim[i] : 1.f; |
461 | 0 | float dstinsrc = 0.5f * inc - 0.5f; |
462 | 0 | int filtersize = rc->mbtree.filtersize[i]; |
463 | 0 | for( int j = 0; j < dstdimi[i]; j++ ) |
464 | 0 | { |
465 | 0 | int pos = dstinsrc - (filtersize - 2.f) * 0.5f; |
466 | 0 | float sum = 0.0; |
467 | 0 | rc->mbtree.pos[i][j] = pos; |
468 | 0 | for( int k = 0; k < filtersize; k++ ) |
469 | 0 | { |
470 | 0 | float d = fabs( pos + k - dstinsrc ) * dmul; |
471 | 0 | float coeff = X264_MAX( 1.f - d, 0 ); |
472 | 0 | rc->mbtree.coeffs[i][j * filtersize + k] = coeff; |
473 | 0 | sum += coeff; |
474 | 0 | } |
475 | 0 | sum = 1.0f / sum; |
476 | 0 | for( int k = 0; k < filtersize; k++ ) |
477 | 0 | rc->mbtree.coeffs[i][j * filtersize + k] *= sum; |
478 | 0 | dstinsrc += inc; |
479 | 0 | } |
480 | 0 | } |
481 | | |
482 | | /* Write back actual qp array dimensions */ |
483 | 0 | rc->mbtree.srcdim[0] = srcdimi[0]; |
484 | 0 | rc->mbtree.srcdim[1] = srcdimi[1]; |
485 | 0 | return 0; |
486 | 0 | fail: |
487 | 0 | return -1; |
488 | 0 | } |
489 | | |
490 | | static void macroblock_tree_rescale_destroy( x264_ratecontrol_t *rc ) |
491 | 0 | { |
492 | 0 | for( int i = 0; i < 2; i++ ) |
493 | 0 | { |
494 | 0 | x264_free( rc->mbtree.qp_buffer[i] ); |
495 | 0 | x264_free( rc->mbtree.scale_buffer[i] ); |
496 | 0 | x264_free( rc->mbtree.coeffs[i] ); |
497 | 0 | x264_free( rc->mbtree.pos[i] ); |
498 | 0 | } |
499 | 0 | } |
500 | | |
501 | | static ALWAYS_INLINE float tapfilter( float *src, int pos, int max, int stride, float *coeff, int filtersize ) |
502 | 0 | { |
503 | 0 | float sum = 0.f; |
504 | 0 | for( int i = 0; i < filtersize; i++, pos++ ) |
505 | 0 | sum += src[x264_clip3( pos, 0, max-1 )*stride] * coeff[i]; |
506 | 0 | return sum; |
507 | 0 | } |
508 | | |
509 | | static void macroblock_tree_rescale( x264_t *h, x264_ratecontrol_t *rc, float *dst ) |
510 | 0 | { |
511 | 0 | float *input, *output; |
512 | 0 | int filtersize, stride, height; |
513 | | |
514 | | /* H scale first */ |
515 | 0 | input = rc->mbtree.scale_buffer[0]; |
516 | 0 | output = rc->mbtree.scale_buffer[1]; |
517 | 0 | filtersize = rc->mbtree.filtersize[0]; |
518 | 0 | stride = rc->mbtree.srcdim[0]; |
519 | 0 | height = rc->mbtree.srcdim[1]; |
520 | 0 | for( int y = 0; y < height; y++, input += stride, output += h->mb.i_mb_width ) |
521 | 0 | { |
522 | 0 | float *coeff = rc->mbtree.coeffs[0]; |
523 | 0 | for( int x = 0; x < h->mb.i_mb_width; x++, coeff+=filtersize ) |
524 | 0 | output[x] = tapfilter( input, rc->mbtree.pos[0][x], stride, 1, coeff, filtersize ); |
525 | 0 | } |
526 | | |
527 | | /* V scale next */ |
528 | 0 | input = rc->mbtree.scale_buffer[1]; |
529 | 0 | output = dst; |
530 | 0 | filtersize = rc->mbtree.filtersize[1]; |
531 | 0 | stride = h->mb.i_mb_width; |
532 | 0 | height = rc->mbtree.srcdim[1]; |
533 | 0 | for( int x = 0; x < h->mb.i_mb_width; x++, input++, output++ ) |
534 | 0 | { |
535 | 0 | float *coeff = rc->mbtree.coeffs[1]; |
536 | 0 | for( int y = 0; y < h->mb.i_mb_height; y++, coeff+=filtersize ) |
537 | 0 | output[y*stride] = tapfilter( input, rc->mbtree.pos[1][y], height, stride, coeff, filtersize ); |
538 | 0 | } |
539 | 0 | } |
540 | | |
541 | | int x264_macroblock_tree_read( x264_t *h, x264_frame_t *frame, float *quant_offsets ) |
542 | 0 | { |
543 | 0 | x264_ratecontrol_t *rc = h->rc; |
544 | 0 | uint8_t i_type_actual = rc->entry[frame->i_frame].pict_type; |
545 | |
|
546 | 0 | if( rc->entry[frame->i_frame].kept_as_ref ) |
547 | 0 | { |
548 | 0 | uint8_t i_type; |
549 | 0 | if( rc->mbtree.qpbuf_pos < 0 ) |
550 | 0 | { |
551 | 0 | do |
552 | 0 | { |
553 | 0 | rc->mbtree.qpbuf_pos++; |
554 | |
|
555 | 0 | if( !fread( &i_type, 1, 1, rc->p_mbtree_stat_file_in ) ) |
556 | 0 | goto fail; |
557 | 0 | if( fread( rc->mbtree.qp_buffer[rc->mbtree.qpbuf_pos], sizeof(uint16_t), rc->mbtree.src_mb_count, rc->p_mbtree_stat_file_in ) != (unsigned)rc->mbtree.src_mb_count ) |
558 | 0 | goto fail; |
559 | | |
560 | 0 | if( i_type != i_type_actual && rc->mbtree.qpbuf_pos == 1 ) |
561 | 0 | { |
562 | 0 | x264_log( h, X264_LOG_ERROR, "MB-tree frametype %d doesn't match actual frametype %d.\n", i_type, i_type_actual ); |
563 | 0 | return -1; |
564 | 0 | } |
565 | 0 | } while( i_type != i_type_actual ); |
566 | 0 | } |
567 | | |
568 | 0 | float *dst = rc->mbtree.rescale_enabled ? rc->mbtree.scale_buffer[0] : frame->f_qp_offset; |
569 | 0 | h->mc.mbtree_fix8_unpack( dst, rc->mbtree.qp_buffer[rc->mbtree.qpbuf_pos], rc->mbtree.src_mb_count ); |
570 | 0 | if( rc->mbtree.rescale_enabled ) |
571 | 0 | macroblock_tree_rescale( h, rc, frame->f_qp_offset ); |
572 | 0 | if( h->frames.b_have_lowres ) |
573 | 0 | for( int i = 0; i < h->mb.i_mb_count; i++ ) |
574 | 0 | frame->i_inv_qscale_factor[i] = x264_exp2fix8( frame->f_qp_offset[i] ); |
575 | 0 | rc->mbtree.qpbuf_pos--; |
576 | 0 | } |
577 | 0 | else |
578 | 0 | x264_adaptive_quant_frame( h, frame, quant_offsets ); |
579 | 0 | return 0; |
580 | 0 | fail: |
581 | 0 | x264_log( h, X264_LOG_ERROR, "Incomplete MB-tree stats file.\n" ); |
582 | 0 | return -1; |
583 | 0 | } Unexecuted instantiation: x264_8_macroblock_tree_read Unexecuted instantiation: x264_10_macroblock_tree_read |
584 | | |
585 | | int x264_reference_build_list_optimal( x264_t *h ) |
586 | 0 | { |
587 | 0 | ratecontrol_entry_t *rce = h->rc->rce; |
588 | 0 | x264_frame_t *frames[16]; |
589 | 0 | x264_weight_t weights[16][3]; |
590 | 0 | int refcount[16]; |
591 | |
|
592 | 0 | if( rce->refs != h->i_ref[0] ) |
593 | 0 | return -1; |
594 | | |
595 | 0 | memcpy( frames, h->fref[0], sizeof(frames) ); |
596 | 0 | memcpy( refcount, rce->refcount, sizeof(refcount) ); |
597 | 0 | memcpy( weights, h->fenc->weight, sizeof(weights) ); |
598 | 0 | memset( &h->fenc->weight[1][0], 0, sizeof(x264_weight_t[15][3]) ); |
599 | | |
600 | | /* For now don't reorder ref 0; it seems to lower quality |
601 | | in most cases due to skips. */ |
602 | 0 | for( int ref = 1; ref < h->i_ref[0]; ref++ ) |
603 | 0 | { |
604 | 0 | int max = -1; |
605 | 0 | int bestref = 1; |
606 | |
|
607 | 0 | for( int i = 1; i < h->i_ref[0]; i++ ) |
608 | | /* Favor lower POC as a tiebreaker. */ |
609 | 0 | COPY2_IF_GT( max, refcount[i], bestref, i ); |
610 | | |
611 | | /* FIXME: If there are duplicates from frames other than ref0 then it is possible |
612 | | * that the optimal ordering doesn't place every duplicate. */ |
613 | |
|
614 | 0 | refcount[bestref] = -1; |
615 | 0 | h->fref[0][ref] = frames[bestref]; |
616 | 0 | memcpy( h->fenc->weight[ref], weights[bestref], sizeof(weights[bestref]) ); |
617 | 0 | } |
618 | |
|
619 | 0 | return 0; |
620 | 0 | } Unexecuted instantiation: x264_8_reference_build_list_optimal Unexecuted instantiation: x264_10_reference_build_list_optimal |
621 | | |
622 | | static char *strcat_filename( char *input, char *suffix ) |
623 | 0 | { |
624 | 0 | char *output = x264_malloc( strlen( input ) + strlen( suffix ) + 1 ); |
625 | 0 | if( !output ) |
626 | 0 | return NULL; |
627 | 0 | strcpy( output, input ); |
628 | 0 | strcat( output, suffix ); |
629 | 0 | return output; |
630 | 0 | } |
631 | | |
632 | | void x264_ratecontrol_init_reconfigurable( x264_t *h, int b_init ) |
633 | 0 | { |
634 | 0 | x264_ratecontrol_t *rc = h->rc; |
635 | 0 | if( !b_init && rc->b_2pass ) |
636 | 0 | return; |
637 | | |
638 | 0 | if( h->param.rc.i_rc_method == X264_RC_CRF ) |
639 | 0 | { |
640 | | /* Arbitrary rescaling to make CRF somewhat similar to QP. |
641 | | * Try to compensate for MB-tree's effects as well. */ |
642 | 0 | double base_cplx = h->mb.i_mb_count * (h->param.i_bframe ? 120 : 80); |
643 | 0 | double mbtree_offset = h->param.rc.b_mb_tree ? (1.0-h->param.rc.f_qcompress)*13.5 : 0; |
644 | 0 | rc->rate_factor_constant = pow( base_cplx, 1 - rc->qcompress ) |
645 | 0 | / qp2qscale( h->param.rc.f_rf_constant + mbtree_offset + QP_BD_OFFSET ); |
646 | 0 | } |
647 | |
|
648 | 0 | if( h->param.rc.i_vbv_max_bitrate > 0 && h->param.rc.i_vbv_buffer_size > 0 ) |
649 | 0 | { |
650 | | /* We don't support changing the ABR bitrate right now, |
651 | | so if the stream starts as CBR, keep it CBR. */ |
652 | 0 | if( rc->b_vbv_min_rate ) |
653 | 0 | h->param.rc.i_vbv_max_bitrate = h->param.rc.i_bitrate; |
654 | |
|
655 | 0 | if( h->param.rc.i_vbv_buffer_size < (int)(h->param.rc.i_vbv_max_bitrate / rc->fps) ) |
656 | 0 | { |
657 | 0 | h->param.rc.i_vbv_buffer_size = h->param.rc.i_vbv_max_bitrate / rc->fps; |
658 | 0 | x264_log( h, X264_LOG_WARNING, "VBV buffer size cannot be smaller than one frame, using %d kbit\n", |
659 | 0 | h->param.rc.i_vbv_buffer_size ); |
660 | 0 | } |
661 | |
|
662 | 0 | int kilobit_size = h->param.i_avcintra_class ? 1024 : 1000; |
663 | 0 | int vbv_buffer_size = h->param.rc.i_vbv_buffer_size * kilobit_size; |
664 | 0 | int vbv_max_bitrate = h->param.rc.i_vbv_max_bitrate * kilobit_size; |
665 | | |
666 | | /* Init HRD */ |
667 | 0 | if( h->param.i_nal_hrd && b_init ) |
668 | 0 | { |
669 | 0 | h->sps->vui.hrd.i_cpb_cnt = 1; |
670 | 0 | h->sps->vui.hrd.b_cbr_hrd = h->param.i_nal_hrd == X264_NAL_HRD_CBR; |
671 | 0 | h->sps->vui.hrd.i_time_offset_length = 0; |
672 | |
|
673 | 0 | #define BR_SHIFT 6 |
674 | 0 | #define CPB_SHIFT 4 |
675 | | |
676 | | // normalize HRD size and rate to the value / scale notation |
677 | 0 | h->sps->vui.hrd.i_bit_rate_scale = x264_clip3( x264_ctz( vbv_max_bitrate ) - BR_SHIFT, 0, 15 ); |
678 | 0 | h->sps->vui.hrd.i_bit_rate_value = vbv_max_bitrate >> ( h->sps->vui.hrd.i_bit_rate_scale + BR_SHIFT ); |
679 | 0 | h->sps->vui.hrd.i_bit_rate_unscaled = h->sps->vui.hrd.i_bit_rate_value << ( h->sps->vui.hrd.i_bit_rate_scale + BR_SHIFT ); |
680 | 0 | h->sps->vui.hrd.i_cpb_size_scale = x264_clip3( x264_ctz( vbv_buffer_size ) - CPB_SHIFT, 0, 15 ); |
681 | 0 | h->sps->vui.hrd.i_cpb_size_value = vbv_buffer_size >> ( h->sps->vui.hrd.i_cpb_size_scale + CPB_SHIFT ); |
682 | 0 | h->sps->vui.hrd.i_cpb_size_unscaled = h->sps->vui.hrd.i_cpb_size_value << ( h->sps->vui.hrd.i_cpb_size_scale + CPB_SHIFT ); |
683 | |
|
684 | 0 | #undef CPB_SHIFT |
685 | 0 | #undef BR_SHIFT |
686 | | |
687 | | // arbitrary |
688 | 0 | #define MAX_DURATION 0.5 |
689 | |
|
690 | 0 | int max_cpb_output_delay = X264_MIN( h->param.i_keyint_max * MAX_DURATION * h->sps->vui.i_time_scale / h->sps->vui.i_num_units_in_tick, INT_MAX ); |
691 | 0 | int max_dpb_output_delay = h->sps->vui.i_max_dec_frame_buffering * MAX_DURATION * h->sps->vui.i_time_scale / h->sps->vui.i_num_units_in_tick; |
692 | 0 | int max_delay = (int)(90000.0 * (double)h->sps->vui.hrd.i_cpb_size_unscaled / h->sps->vui.hrd.i_bit_rate_unscaled + 0.5); |
693 | |
|
694 | 0 | h->sps->vui.hrd.i_initial_cpb_removal_delay_length = 2 + x264_clip3( 32 - x264_clz( max_delay ), 4, 22 ); |
695 | 0 | h->sps->vui.hrd.i_cpb_removal_delay_length = x264_clip3( 32 - x264_clz( max_cpb_output_delay ), 4, 31 ); |
696 | 0 | h->sps->vui.hrd.i_dpb_output_delay_length = x264_clip3( 32 - x264_clz( max_dpb_output_delay ), 4, 31 ); |
697 | |
|
698 | 0 | #undef MAX_DURATION |
699 | |
|
700 | 0 | vbv_buffer_size = h->sps->vui.hrd.i_cpb_size_unscaled; |
701 | 0 | vbv_max_bitrate = h->sps->vui.hrd.i_bit_rate_unscaled; |
702 | 0 | } |
703 | 0 | else if( h->param.i_nal_hrd && !b_init ) |
704 | 0 | { |
705 | 0 | x264_log( h, X264_LOG_WARNING, "VBV parameters cannot be changed when NAL HRD is in use\n" ); |
706 | 0 | return; |
707 | 0 | } |
708 | 0 | h->sps->vui.hrd.i_bit_rate_unscaled = vbv_max_bitrate; |
709 | 0 | h->sps->vui.hrd.i_cpb_size_unscaled = vbv_buffer_size; |
710 | |
|
711 | 0 | if( rc->b_vbv_min_rate ) |
712 | 0 | rc->bitrate = (double)h->param.rc.i_bitrate * kilobit_size; |
713 | 0 | rc->buffer_rate = vbv_max_bitrate / rc->fps; |
714 | 0 | rc->vbv_max_rate = vbv_max_bitrate; |
715 | 0 | rc->buffer_size = vbv_buffer_size; |
716 | 0 | rc->single_frame_vbv = rc->buffer_rate * 1.1 > rc->buffer_size; |
717 | 0 | if( rc->b_abr && h->param.rc.i_rc_method == X264_RC_ABR ) |
718 | 0 | rc->cbr_decay = 1.0 - rc->buffer_rate / rc->buffer_size |
719 | 0 | * 0.5 * X264_MAX(0, 1.5 - rc->buffer_rate * rc->fps / rc->bitrate); |
720 | 0 | if( h->param.rc.i_rc_method == X264_RC_CRF && h->param.rc.f_rf_constant_max ) |
721 | 0 | { |
722 | 0 | rc->rate_factor_max_increment = h->param.rc.f_rf_constant_max - h->param.rc.f_rf_constant; |
723 | 0 | if( rc->rate_factor_max_increment <= 0 ) |
724 | 0 | { |
725 | 0 | x264_log( h, X264_LOG_WARNING, "CRF max must be greater than CRF\n" ); |
726 | 0 | rc->rate_factor_max_increment = 0; |
727 | 0 | } |
728 | 0 | } |
729 | 0 | if( b_init ) |
730 | 0 | { |
731 | 0 | if( h->param.rc.f_vbv_buffer_init > 1. ) |
732 | 0 | h->param.rc.f_vbv_buffer_init = x264_clip3f( h->param.rc.f_vbv_buffer_init / h->param.rc.i_vbv_buffer_size, 0, 1 ); |
733 | 0 | h->param.rc.f_vbv_buffer_init = x264_clip3f( X264_MAX( h->param.rc.f_vbv_buffer_init, rc->buffer_rate / rc->buffer_size ), 0, 1); |
734 | 0 | rc->buffer_fill_final = |
735 | 0 | rc->buffer_fill_final_min = rc->buffer_size * h->param.rc.f_vbv_buffer_init * h->sps->vui.i_time_scale; |
736 | 0 | rc->b_vbv = 1; |
737 | 0 | rc->b_vbv_min_rate = !rc->b_2pass |
738 | 0 | && h->param.rc.i_rc_method == X264_RC_ABR |
739 | 0 | && h->param.rc.i_vbv_max_bitrate <= h->param.rc.i_bitrate; |
740 | 0 | } |
741 | 0 | } |
742 | 0 | } Unexecuted instantiation: x264_8_ratecontrol_init_reconfigurable Unexecuted instantiation: x264_10_ratecontrol_init_reconfigurable |
743 | | |
744 | | int x264_ratecontrol_new( x264_t *h ) |
745 | 0 | { |
746 | 0 | x264_ratecontrol_t *rc; |
747 | |
|
748 | 0 | x264_emms(); |
749 | |
|
750 | 0 | CHECKED_MALLOCZERO( h->rc, h->param.i_threads * sizeof(x264_ratecontrol_t) ); |
751 | 0 | rc = h->rc; |
752 | |
|
753 | 0 | rc->b_abr = h->param.rc.i_rc_method != X264_RC_CQP && !h->param.rc.b_stat_read; |
754 | 0 | rc->b_2pass = h->param.rc.i_rc_method == X264_RC_ABR && h->param.rc.b_stat_read; |
755 | | |
756 | | /* FIXME: use integers */ |
757 | 0 | if( h->param.i_fps_num > 0 && h->param.i_fps_den > 0 ) |
758 | 0 | rc->fps = (float) h->param.i_fps_num / h->param.i_fps_den; |
759 | 0 | else |
760 | 0 | rc->fps = 25.0; |
761 | |
|
762 | 0 | if( h->param.rc.b_mb_tree ) |
763 | 0 | { |
764 | 0 | h->param.rc.f_pb_factor = 1; |
765 | 0 | rc->qcompress = 1; |
766 | 0 | } |
767 | 0 | else |
768 | 0 | rc->qcompress = h->param.rc.f_qcompress; |
769 | |
|
770 | 0 | rc->bitrate = h->param.rc.i_bitrate * (h->param.i_avcintra_class ? 1024. : 1000.); |
771 | 0 | rc->rate_tolerance = h->param.rc.f_rate_tolerance; |
772 | 0 | rc->nmb = h->mb.i_mb_count; |
773 | 0 | rc->last_non_b_pict_type = -1; |
774 | 0 | rc->cbr_decay = 1.0; |
775 | |
|
776 | 0 | if( h->param.rc.i_rc_method != X264_RC_ABR && h->param.rc.b_stat_read ) |
777 | 0 | { |
778 | 0 | x264_log( h, X264_LOG_ERROR, "CRF/CQP is incompatible with 2pass.\n" ); |
779 | 0 | return -1; |
780 | 0 | } |
781 | | |
782 | 0 | x264_ratecontrol_init_reconfigurable( h, 1 ); |
783 | |
|
784 | 0 | if( h->param.i_nal_hrd ) |
785 | 0 | { |
786 | 0 | uint64_t denom = (uint64_t)h->sps->vui.hrd.i_bit_rate_unscaled * h->sps->vui.i_time_scale; |
787 | 0 | uint64_t num = 90000; |
788 | 0 | x264_reduce_fraction64( &num, &denom ); |
789 | 0 | rc->hrd_multiply_denom = 90000 / num; |
790 | |
|
791 | 0 | double bits_required = log2( num ) |
792 | 0 | + log2( h->sps->vui.i_time_scale ) |
793 | 0 | + log2( h->sps->vui.hrd.i_cpb_size_unscaled ); |
794 | 0 | if( bits_required >= 63 ) |
795 | 0 | { |
796 | 0 | x264_log( h, X264_LOG_ERROR, "HRD with very large timescale and bufsize not supported\n" ); |
797 | 0 | return -1; |
798 | 0 | } |
799 | 0 | } |
800 | | |
801 | 0 | if( rc->rate_tolerance < 0.01 ) |
802 | 0 | { |
803 | 0 | x264_log( h, X264_LOG_WARNING, "bitrate tolerance too small, using .01\n" ); |
804 | 0 | rc->rate_tolerance = 0.01; |
805 | 0 | } |
806 | |
|
807 | 0 | h->mb.b_variable_qp = rc->b_vbv || h->param.rc.i_aq_mode; |
808 | |
|
809 | 0 | if( rc->b_abr ) |
810 | 0 | { |
811 | | /* FIXME ABR_INIT_QP is actually used only in CRF */ |
812 | 0 | #define ABR_INIT_QP (( h->param.rc.i_rc_method == X264_RC_CRF ? h->param.rc.f_rf_constant : 24 ) + QP_BD_OFFSET) |
813 | 0 | rc->accum_p_norm = .01; |
814 | 0 | rc->accum_p_qp = ABR_INIT_QP * rc->accum_p_norm; |
815 | | /* estimated ratio that produces a reasonable QP for the first I-frame */ |
816 | 0 | rc->cplxr_sum = .01 * pow( 7.0e5, rc->qcompress ) * pow( h->mb.i_mb_count, 0.5 ); |
817 | 0 | rc->wanted_bits_window = 1.0 * rc->bitrate / rc->fps; |
818 | 0 | rc->last_non_b_pict_type = SLICE_TYPE_I; |
819 | 0 | } |
820 | |
|
821 | 0 | rc->ip_offset = 6.0 * log2f( h->param.rc.f_ip_factor ); |
822 | 0 | rc->pb_offset = 6.0 * log2f( h->param.rc.f_pb_factor ); |
823 | 0 | rc->qp_constant[SLICE_TYPE_P] = h->param.rc.i_qp_constant; |
824 | 0 | rc->qp_constant[SLICE_TYPE_I] = x264_clip3( h->param.rc.i_qp_constant - rc->ip_offset + 0.5, 0, QP_MAX ); |
825 | 0 | rc->qp_constant[SLICE_TYPE_B] = x264_clip3( h->param.rc.i_qp_constant + rc->pb_offset + 0.5, 0, QP_MAX ); |
826 | 0 | h->mb.ip_offset = rc->ip_offset + 0.5; |
827 | |
|
828 | 0 | rc->lstep = pow( 2, h->param.rc.i_qp_step / 6.0 ); |
829 | 0 | rc->last_qscale = qp2qscale( 26 + QP_BD_OFFSET ); |
830 | 0 | int num_preds = h->param.b_sliced_threads * h->param.i_threads + 1; |
831 | 0 | CHECKED_MALLOC( rc->pred, 5 * sizeof(predictor_t) * num_preds ); |
832 | 0 | CHECKED_MALLOC( rc->pred_b_from_p, sizeof(predictor_t) ); |
833 | 0 | static const float pred_coeff_table[3] = { 1.0, 1.0, 1.5 }; |
834 | 0 | for( int i = 0; i < 3; i++ ) |
835 | 0 | { |
836 | 0 | rc->last_qscale_for[i] = qp2qscale( ABR_INIT_QP ); |
837 | 0 | rc->lmin[i] = qp2qscale( h->param.rc.i_qp_min ); |
838 | 0 | rc->lmax[i] = qp2qscale( h->param.rc.i_qp_max ); |
839 | 0 | for( int j = 0; j < num_preds; j++ ) |
840 | 0 | { |
841 | 0 | rc->pred[i+j*5].coeff_min = pred_coeff_table[i] / 2; |
842 | 0 | rc->pred[i+j*5].coeff = pred_coeff_table[i]; |
843 | 0 | rc->pred[i+j*5].count = 1.0; |
844 | 0 | rc->pred[i+j*5].decay = 0.5; |
845 | 0 | rc->pred[i+j*5].offset = 0.0; |
846 | 0 | } |
847 | 0 | for( int j = 0; j < 2; j++ ) |
848 | 0 | { |
849 | 0 | rc->row_preds[i][j].coeff_min = .25 / 4; |
850 | 0 | rc->row_preds[i][j].coeff = .25; |
851 | 0 | rc->row_preds[i][j].count = 1.0; |
852 | 0 | rc->row_preds[i][j].decay = 0.5; |
853 | 0 | rc->row_preds[i][j].offset = 0.0; |
854 | 0 | } |
855 | 0 | } |
856 | 0 | rc->pred_b_from_p->coeff_min = 0.5 / 2; |
857 | 0 | rc->pred_b_from_p->coeff = 0.5; |
858 | 0 | rc->pred_b_from_p->count = 1.0; |
859 | 0 | rc->pred_b_from_p->decay = 0.5; |
860 | 0 | rc->pred_b_from_p->offset = 0.0; |
861 | |
|
862 | 0 | if( parse_zones( h ) < 0 ) |
863 | 0 | { |
864 | 0 | x264_log( h, X264_LOG_ERROR, "failed to parse zones\n" ); |
865 | 0 | return -1; |
866 | 0 | } |
867 | | |
868 | | /* Load stat file and init 2pass algo */ |
869 | 0 | if( h->param.rc.b_stat_read ) |
870 | 0 | { |
871 | 0 | char *p, *stats_in, *stats_buf; |
872 | | |
873 | | /* read 1st pass stats */ |
874 | 0 | assert( h->param.rc.psz_stat_in ); |
875 | 0 | stats_buf = stats_in = x264_slurp_file( h->param.rc.psz_stat_in ); |
876 | 0 | if( !stats_buf ) |
877 | 0 | { |
878 | 0 | x264_log( h, X264_LOG_ERROR, "ratecontrol_init: can't open stats file\n" ); |
879 | 0 | return -1; |
880 | 0 | } |
881 | 0 | if( h->param.rc.b_mb_tree ) |
882 | 0 | { |
883 | 0 | char *mbtree_stats_in = strcat_filename( h->param.rc.psz_stat_in, ".mbtree" ); |
884 | 0 | if( !mbtree_stats_in ) |
885 | 0 | return -1; |
886 | 0 | rc->p_mbtree_stat_file_in = x264_fopen( mbtree_stats_in, "rb" ); |
887 | 0 | x264_free( mbtree_stats_in ); |
888 | 0 | if( !rc->p_mbtree_stat_file_in ) |
889 | 0 | { |
890 | 0 | x264_log( h, X264_LOG_ERROR, "ratecontrol_init: can't open mbtree stats file\n" ); |
891 | 0 | return -1; |
892 | 0 | } |
893 | 0 | } |
894 | | |
895 | | /* check whether 1st pass options were compatible with current options */ |
896 | 0 | if( strncmp( stats_buf, "#options:", 9 ) ) |
897 | 0 | { |
898 | 0 | x264_log( h, X264_LOG_ERROR, "options list in stats file not valid\n" ); |
899 | 0 | return -1; |
900 | 0 | } |
901 | | |
902 | 0 | float res_factor, res_factor_bits; |
903 | 0 | { |
904 | 0 | int i, j; |
905 | 0 | uint32_t k, l; |
906 | 0 | char *opts = stats_buf; |
907 | 0 | stats_in = strchr( stats_buf, '\n' ); |
908 | 0 | if( !stats_in ) |
909 | 0 | return -1; |
910 | 0 | *stats_in = '\0'; |
911 | 0 | stats_in++; |
912 | 0 | if( sscanf( opts, "#options: %dx%d", &i, &j ) != 2 ) |
913 | 0 | { |
914 | 0 | x264_log( h, X264_LOG_ERROR, "resolution specified in stats file not valid\n" ); |
915 | 0 | return -1; |
916 | 0 | } |
917 | 0 | else if( h->param.rc.b_mb_tree ) |
918 | 0 | { |
919 | 0 | rc->mbtree.srcdim[0] = i; |
920 | 0 | rc->mbtree.srcdim[1] = j; |
921 | 0 | } |
922 | 0 | res_factor = (float)h->param.i_width * h->param.i_height / (i*j); |
923 | | /* Change in bits relative to resolution isn't quite linear on typical sources, |
924 | | * so we'll at least try to roughly approximate this effect. */ |
925 | 0 | res_factor_bits = powf( res_factor, 0.7 ); |
926 | |
|
927 | 0 | if( !( p = strstr( opts, "timebase=" ) ) || sscanf( p, "timebase=%u/%u", &k, &l ) != 2 ) |
928 | 0 | { |
929 | 0 | x264_log( h, X264_LOG_ERROR, "timebase specified in stats file not valid\n" ); |
930 | 0 | return -1; |
931 | 0 | } |
932 | 0 | if( k != h->param.i_timebase_num || l != h->param.i_timebase_den ) |
933 | 0 | { |
934 | 0 | x264_log( h, X264_LOG_ERROR, "timebase mismatch with 1st pass (%u/%u vs %u/%u)\n", |
935 | 0 | h->param.i_timebase_num, h->param.i_timebase_den, k, l ); |
936 | 0 | return -1; |
937 | 0 | } |
938 | | |
939 | 0 | CMP_OPT_FIRST_PASS( "bitdepth", BIT_DEPTH ); |
940 | 0 | CMP_OPT_FIRST_PASS( "weightp", X264_MAX( 0, h->param.analyse.i_weighted_pred ) ); |
941 | 0 | CMP_OPT_FIRST_PASS( "bframes", h->param.i_bframe ); |
942 | 0 | CMP_OPT_FIRST_PASS( "b_pyramid", h->param.i_bframe_pyramid ); |
943 | 0 | CMP_OPT_FIRST_PASS( "intra_refresh", h->param.b_intra_refresh ); |
944 | 0 | CMP_OPT_FIRST_PASS( "open_gop", h->param.b_open_gop ); |
945 | 0 | CMP_OPT_FIRST_PASS( "bluray_compat", h->param.b_bluray_compat ); |
946 | 0 | CMP_OPT_FIRST_PASS( "mbtree", h->param.rc.b_mb_tree ); |
947 | |
|
948 | 0 | if( (p = strstr( opts, "interlaced=" )) ) |
949 | 0 | { |
950 | 0 | char *current = h->param.b_interlaced ? h->param.b_tff ? "tff" : "bff" : h->param.b_fake_interlaced ? "fake" : "0"; |
951 | 0 | char buf[5]; |
952 | 0 | sscanf( p, "interlaced=%4s", buf ); |
953 | 0 | if( strcmp( current, buf ) ) |
954 | 0 | { |
955 | 0 | x264_log( h, X264_LOG_ERROR, "different interlaced setting than first pass (%s vs %s)\n", current, buf ); |
956 | 0 | return -1; |
957 | 0 | } |
958 | 0 | } |
959 | | |
960 | 0 | if( (p = strstr( opts, "keyint=" )) ) |
961 | 0 | { |
962 | 0 | p += 7; |
963 | 0 | char buf[13] = "infinite "; |
964 | 0 | if( h->param.i_keyint_max != X264_KEYINT_MAX_INFINITE ) |
965 | 0 | sprintf( buf, "%d ", h->param.i_keyint_max ); |
966 | 0 | if( strncmp( p, buf, strlen(buf) ) ) |
967 | 0 | { |
968 | 0 | x264_log( h, X264_LOG_ERROR, "different keyint setting than first pass (%.*s vs %.*s)\n", |
969 | 0 | strlen(buf)-1, buf, strcspn(p, " "), p ); |
970 | 0 | return -1; |
971 | 0 | } |
972 | 0 | } |
973 | | |
974 | 0 | if( strstr( opts, "qp=0" ) && h->param.rc.i_rc_method == X264_RC_ABR ) |
975 | 0 | x264_log( h, X264_LOG_WARNING, "1st pass was lossless, bitrate prediction will be inaccurate\n" ); |
976 | |
|
977 | 0 | if( !strstr( opts, "direct=3" ) && h->param.analyse.i_direct_mv_pred == X264_DIRECT_PRED_AUTO ) |
978 | 0 | { |
979 | 0 | x264_log( h, X264_LOG_WARNING, "direct=auto not used on the first pass\n" ); |
980 | 0 | h->mb.b_direct_auto_write = 1; |
981 | 0 | } |
982 | |
|
983 | 0 | if( ( p = strstr( opts, "b_adapt=" ) ) && sscanf( p, "b_adapt=%d", &i ) && i >= X264_B_ADAPT_NONE && i <= X264_B_ADAPT_TRELLIS ) |
984 | 0 | h->param.i_bframe_adaptive = i; |
985 | 0 | else if( h->param.i_bframe ) |
986 | 0 | { |
987 | 0 | x264_log( h, X264_LOG_ERROR, "b_adapt method specified in stats file not valid\n" ); |
988 | 0 | return -1; |
989 | 0 | } |
990 | | |
991 | 0 | if( (h->param.rc.b_mb_tree || h->param.rc.i_vbv_buffer_size) && ( p = strstr( opts, "rc_lookahead=" ) ) && sscanf( p, "rc_lookahead=%d", &i ) ) |
992 | 0 | h->param.rc.i_lookahead = i; |
993 | 0 | } |
994 | | |
995 | | /* find number of pics */ |
996 | 0 | p = stats_in; |
997 | 0 | int num_entries; |
998 | 0 | for( num_entries = -1; p; num_entries++ ) |
999 | 0 | p = strchr( p + 1, ';' ); |
1000 | 0 | if( !num_entries ) |
1001 | 0 | { |
1002 | 0 | x264_log( h, X264_LOG_ERROR, "empty stats file\n" ); |
1003 | 0 | return -1; |
1004 | 0 | } |
1005 | 0 | rc->num_entries = num_entries; |
1006 | |
|
1007 | 0 | if( h->param.i_frame_total < rc->num_entries && h->param.i_frame_total > 0 ) |
1008 | 0 | { |
1009 | 0 | x264_log( h, X264_LOG_WARNING, "2nd pass has fewer frames than 1st pass (%d vs %d)\n", |
1010 | 0 | h->param.i_frame_total, rc->num_entries ); |
1011 | 0 | } |
1012 | 0 | if( h->param.i_frame_total > rc->num_entries ) |
1013 | 0 | { |
1014 | 0 | x264_log( h, X264_LOG_ERROR, "2nd pass has more frames than 1st pass (%d vs %d)\n", |
1015 | 0 | h->param.i_frame_total, rc->num_entries ); |
1016 | 0 | return -1; |
1017 | 0 | } |
1018 | | |
1019 | 0 | CHECKED_MALLOCZERO( rc->entry, rc->num_entries * sizeof(ratecontrol_entry_t) ); |
1020 | 0 | CHECKED_MALLOC( rc->entry_out, rc->num_entries * sizeof(ratecontrol_entry_t*) ); |
1021 | | |
1022 | | /* init all to skipped p frames */ |
1023 | 0 | for( int i = 0; i < rc->num_entries; i++ ) |
1024 | 0 | { |
1025 | 0 | ratecontrol_entry_t *rce = &rc->entry[i]; |
1026 | 0 | rce->pict_type = SLICE_TYPE_P; |
1027 | 0 | rce->qscale = rce->new_qscale = qp2qscale( 20 + QP_BD_OFFSET ); |
1028 | 0 | rce->misc_bits = rc->nmb + 10; |
1029 | 0 | rce->new_qp = 0; |
1030 | 0 | rc->entry_out[i] = rce; |
1031 | 0 | } |
1032 | | |
1033 | | /* read stats */ |
1034 | 0 | p = stats_in; |
1035 | 0 | double total_qp_aq = 0; |
1036 | 0 | for( int i = 0; i < rc->num_entries; i++ ) |
1037 | 0 | { |
1038 | 0 | ratecontrol_entry_t *rce; |
1039 | 0 | int frame_number = 0; |
1040 | 0 | int frame_out_number = 0; |
1041 | 0 | char pict_type = 0; |
1042 | 0 | int e; |
1043 | 0 | char *next; |
1044 | 0 | float qp_rc, qp_aq; |
1045 | 0 | int ref; |
1046 | |
|
1047 | 0 | next= strchr(p, ';'); |
1048 | 0 | if( next ) |
1049 | 0 | *next++ = 0; //sscanf is unbelievably slow on long strings |
1050 | 0 | e = sscanf( p, " in:%d out:%d ", &frame_number, &frame_out_number ); |
1051 | |
|
1052 | 0 | if( frame_number < 0 || frame_number >= rc->num_entries ) |
1053 | 0 | { |
1054 | 0 | x264_log( h, X264_LOG_ERROR, "bad frame number (%d) at stats line %d\n", frame_number, i ); |
1055 | 0 | return -1; |
1056 | 0 | } |
1057 | 0 | if( frame_out_number < 0 || frame_out_number >= rc->num_entries ) |
1058 | 0 | { |
1059 | 0 | x264_log( h, X264_LOG_ERROR, "bad frame output number (%d) at stats line %d\n", frame_out_number, i ); |
1060 | 0 | return -1; |
1061 | 0 | } |
1062 | 0 | rce = &rc->entry[frame_number]; |
1063 | 0 | rc->entry_out[frame_out_number] = rce; |
1064 | 0 | rce->direct_mode = 0; |
1065 | |
|
1066 | 0 | e += sscanf( p, " in:%*d out:%*d type:%c dur:%"SCNd64" cpbdur:%"SCNd64" q:%f aq:%f tex:%d mv:%d misc:%d imb:%d pmb:%d smb:%d d:%c", |
1067 | 0 | &pict_type, &rce->i_duration, &rce->i_cpb_duration, &qp_rc, &qp_aq, &rce->tex_bits, |
1068 | 0 | &rce->mv_bits, &rce->misc_bits, &rce->i_count, &rce->p_count, |
1069 | 0 | &rce->s_count, &rce->direct_mode ); |
1070 | 0 | rce->tex_bits *= res_factor_bits; |
1071 | 0 | rce->mv_bits *= res_factor_bits; |
1072 | 0 | rce->misc_bits *= res_factor_bits; |
1073 | 0 | rce->i_count *= res_factor; |
1074 | 0 | rce->p_count *= res_factor; |
1075 | 0 | rce->s_count *= res_factor; |
1076 | |
|
1077 | 0 | p = strstr( p, "ref:" ); |
1078 | 0 | if( !p ) |
1079 | 0 | goto parse_error; |
1080 | 0 | p += 4; |
1081 | 0 | for( ref = 0; ref < 16; ref++ ) |
1082 | 0 | { |
1083 | 0 | if( sscanf( p, " %d", &rce->refcount[ref] ) != 1 ) |
1084 | 0 | break; |
1085 | 0 | p = strchr( p+1, ' ' ); |
1086 | 0 | if( !p ) |
1087 | 0 | goto parse_error; |
1088 | 0 | } |
1089 | 0 | rce->refs = ref; |
1090 | | |
1091 | | /* find weights */ |
1092 | 0 | rce->i_weight_denom[0] = rce->i_weight_denom[1] = -1; |
1093 | 0 | char *w = strchr( p, 'w' ); |
1094 | 0 | if( w ) |
1095 | 0 | { |
1096 | 0 | int count = sscanf( w, "w:%hd,%hd,%hd,%hd,%hd,%hd,%hd,%hd", |
1097 | 0 | &rce->i_weight_denom[0], &rce->weight[0][0], &rce->weight[0][1], |
1098 | 0 | &rce->i_weight_denom[1], &rce->weight[1][0], &rce->weight[1][1], |
1099 | 0 | &rce->weight[2][0], &rce->weight[2][1] ); |
1100 | 0 | if( count == 3 ) |
1101 | 0 | rce->i_weight_denom[1] = -1; |
1102 | 0 | else if( count != 8 ) |
1103 | 0 | rce->i_weight_denom[0] = rce->i_weight_denom[1] = -1; |
1104 | 0 | } |
1105 | |
|
1106 | 0 | if( pict_type != 'b' ) |
1107 | 0 | rce->kept_as_ref = 1; |
1108 | 0 | switch( pict_type ) |
1109 | 0 | { |
1110 | 0 | case 'I': |
1111 | 0 | rce->frame_type = X264_TYPE_IDR; |
1112 | 0 | rce->pict_type = SLICE_TYPE_I; |
1113 | 0 | break; |
1114 | 0 | case 'i': |
1115 | 0 | rce->frame_type = X264_TYPE_I; |
1116 | 0 | rce->pict_type = SLICE_TYPE_I; |
1117 | 0 | break; |
1118 | 0 | case 'P': |
1119 | 0 | rce->frame_type = X264_TYPE_P; |
1120 | 0 | rce->pict_type = SLICE_TYPE_P; |
1121 | 0 | break; |
1122 | 0 | case 'B': |
1123 | 0 | rce->frame_type = X264_TYPE_BREF; |
1124 | 0 | rce->pict_type = SLICE_TYPE_B; |
1125 | 0 | break; |
1126 | 0 | case 'b': |
1127 | 0 | rce->frame_type = X264_TYPE_B; |
1128 | 0 | rce->pict_type = SLICE_TYPE_B; |
1129 | 0 | break; |
1130 | 0 | default: e = -1; break; |
1131 | 0 | } |
1132 | 0 | if( e < 14 ) |
1133 | 0 | { |
1134 | 0 | parse_error: |
1135 | 0 | x264_log( h, X264_LOG_ERROR, "statistics are damaged at line %d, parser out=%d\n", i, e ); |
1136 | 0 | return -1; |
1137 | 0 | } |
1138 | 0 | rce->qscale = qp2qscale( qp_rc ); |
1139 | 0 | total_qp_aq += qp_aq; |
1140 | 0 | p = next; |
1141 | 0 | } |
1142 | 0 | if( !h->param.b_stitchable ) |
1143 | 0 | h->pps->i_pic_init_qp = SPEC_QP( (int)(total_qp_aq / rc->num_entries + 0.5) ); |
1144 | |
|
1145 | 0 | x264_free( stats_buf ); |
1146 | |
|
1147 | 0 | if( h->param.rc.i_rc_method == X264_RC_ABR ) |
1148 | 0 | { |
1149 | 0 | if( init_pass2( h ) < 0 ) |
1150 | 0 | return -1; |
1151 | 0 | } /* else we're using constant quant, so no need to run the bitrate allocation */ |
1152 | 0 | } |
1153 | | |
1154 | | /* Open output file */ |
1155 | | /* If input and output files are the same, output to a temp file |
1156 | | * and move it to the real name only when it's complete */ |
1157 | 0 | if( h->param.rc.b_stat_write ) |
1158 | 0 | { |
1159 | 0 | char *p; |
1160 | 0 | rc->psz_stat_file_tmpname = strcat_filename( h->param.rc.psz_stat_out, ".temp" ); |
1161 | 0 | if( !rc->psz_stat_file_tmpname ) |
1162 | 0 | return -1; |
1163 | | |
1164 | 0 | rc->p_stat_file_out = x264_fopen( rc->psz_stat_file_tmpname, "wb" ); |
1165 | 0 | if( rc->p_stat_file_out == NULL ) |
1166 | 0 | { |
1167 | 0 | x264_log( h, X264_LOG_ERROR, "ratecontrol_init: can't open stats file\n" ); |
1168 | 0 | return -1; |
1169 | 0 | } |
1170 | | |
1171 | 0 | p = x264_param2string( &h->param, 1 ); |
1172 | 0 | if( p ) |
1173 | 0 | fprintf( rc->p_stat_file_out, "#options: %s\n", p ); |
1174 | 0 | x264_free( p ); |
1175 | 0 | if( h->param.rc.b_mb_tree && !h->param.rc.b_stat_read ) |
1176 | 0 | { |
1177 | 0 | rc->psz_mbtree_stat_file_tmpname = strcat_filename( h->param.rc.psz_stat_out, ".mbtree.temp" ); |
1178 | 0 | rc->psz_mbtree_stat_file_name = strcat_filename( h->param.rc.psz_stat_out, ".mbtree" ); |
1179 | 0 | if( !rc->psz_mbtree_stat_file_tmpname || !rc->psz_mbtree_stat_file_name ) |
1180 | 0 | return -1; |
1181 | | |
1182 | 0 | rc->p_mbtree_stat_file_out = x264_fopen( rc->psz_mbtree_stat_file_tmpname, "wb" ); |
1183 | 0 | if( rc->p_mbtree_stat_file_out == NULL ) |
1184 | 0 | { |
1185 | 0 | x264_log( h, X264_LOG_ERROR, "ratecontrol_init: can't open mbtree stats file\n" ); |
1186 | 0 | return -1; |
1187 | 0 | } |
1188 | 0 | } |
1189 | 0 | } |
1190 | | |
1191 | 0 | if( h->param.rc.b_mb_tree && (h->param.rc.b_stat_read || h->param.rc.b_stat_write) ) |
1192 | 0 | { |
1193 | 0 | if( !h->param.rc.b_stat_read ) |
1194 | 0 | { |
1195 | 0 | rc->mbtree.srcdim[0] = h->param.i_width; |
1196 | 0 | rc->mbtree.srcdim[1] = h->param.i_height; |
1197 | 0 | } |
1198 | 0 | if( macroblock_tree_rescale_init( h, rc ) < 0 ) |
1199 | 0 | return -1; |
1200 | 0 | } |
1201 | | |
1202 | 0 | for( int i = 0; i<h->param.i_threads; i++ ) |
1203 | 0 | { |
1204 | 0 | h->thread[i]->rc = rc+i; |
1205 | 0 | if( i ) |
1206 | 0 | { |
1207 | 0 | rc[i] = rc[0]; |
1208 | 0 | h->thread[i]->param = h->param; |
1209 | 0 | h->thread[i]->mb.b_variable_qp = h->mb.b_variable_qp; |
1210 | 0 | h->thread[i]->mb.ip_offset = h->mb.ip_offset; |
1211 | 0 | } |
1212 | 0 | } |
1213 | |
|
1214 | 0 | return 0; |
1215 | 0 | fail: |
1216 | 0 | return -1; |
1217 | 0 | } Unexecuted instantiation: x264_8_ratecontrol_new Unexecuted instantiation: x264_10_ratecontrol_new |
1218 | | |
1219 | | static int parse_zone( x264_t *h, x264_zone_t *z, char *p ) |
1220 | 0 | { |
1221 | 0 | int len = 0; |
1222 | 0 | char *tok, UNUSED *saveptr=NULL; |
1223 | 0 | z->param = NULL; |
1224 | 0 | z->f_bitrate_factor = 1; |
1225 | 0 | if( 3 <= sscanf(p, "%d,%d,q=%d%n", &z->i_start, &z->i_end, &z->i_qp, &len) ) |
1226 | 0 | z->b_force_qp = 1; |
1227 | 0 | else if( 3 <= sscanf(p, "%d,%d,b=%f%n", &z->i_start, &z->i_end, &z->f_bitrate_factor, &len) ) |
1228 | 0 | z->b_force_qp = 0; |
1229 | 0 | else if( 2 <= sscanf(p, "%d,%d%n", &z->i_start, &z->i_end, &len) ) |
1230 | 0 | z->b_force_qp = 0; |
1231 | 0 | else |
1232 | 0 | { |
1233 | 0 | x264_log( h, X264_LOG_ERROR, "invalid zone: \"%s\"\n", p ); |
1234 | 0 | return -1; |
1235 | 0 | } |
1236 | 0 | p += len; |
1237 | 0 | if( !*p ) |
1238 | 0 | return 0; |
1239 | 0 | CHECKED_MALLOC( z->param, sizeof(x264_param_t) ); |
1240 | 0 | memcpy( z->param, &h->param, sizeof(x264_param_t) ); |
1241 | 0 | z->param->opaque = NULL; |
1242 | 0 | z->param->param_free = x264_free; |
1243 | 0 | while( (tok = strtok_r( p, ",", &saveptr )) ) |
1244 | 0 | { |
1245 | 0 | char *val = strchr( tok, '=' ); |
1246 | 0 | if( val ) |
1247 | 0 | { |
1248 | 0 | *val = '\0'; |
1249 | 0 | val++; |
1250 | 0 | } |
1251 | 0 | if( x264_param_parse( z->param, tok, val ) ) |
1252 | 0 | { |
1253 | 0 | x264_log( h, X264_LOG_ERROR, "invalid zone param: %s = %s\n", tok, val ); |
1254 | 0 | return -1; |
1255 | 0 | } |
1256 | 0 | p = NULL; |
1257 | 0 | } |
1258 | 0 | return 0; |
1259 | 0 | fail: |
1260 | 0 | return -1; |
1261 | 0 | } |
1262 | | |
1263 | | static int parse_zones( x264_t *h ) |
1264 | 0 | { |
1265 | 0 | x264_ratecontrol_t *rc = h->rc; |
1266 | 0 | if( h->param.rc.psz_zones && !h->param.rc.i_zones ) |
1267 | 0 | { |
1268 | 0 | char *psz_zones, *p; |
1269 | 0 | CHECKED_MALLOC( psz_zones, strlen( h->param.rc.psz_zones )+1 ); |
1270 | 0 | strcpy( psz_zones, h->param.rc.psz_zones ); |
1271 | 0 | h->param.rc.i_zones = 1; |
1272 | 0 | for( p = psz_zones; *p; p++ ) |
1273 | 0 | h->param.rc.i_zones += (*p == '/'); |
1274 | 0 | CHECKED_MALLOC( h->param.rc.zones, h->param.rc.i_zones * sizeof(x264_zone_t) ); |
1275 | 0 | p = psz_zones; |
1276 | 0 | for( int i = 0; i < h->param.rc.i_zones; i++ ) |
1277 | 0 | { |
1278 | 0 | int i_tok = strcspn( p, "/" ); |
1279 | 0 | p[i_tok] = 0; |
1280 | 0 | if( parse_zone( h, &h->param.rc.zones[i], p ) ) |
1281 | 0 | { |
1282 | 0 | x264_free( psz_zones ); |
1283 | 0 | return -1; |
1284 | 0 | } |
1285 | 0 | p += i_tok + 1; |
1286 | 0 | } |
1287 | 0 | x264_free( psz_zones ); |
1288 | 0 | } |
1289 | | |
1290 | 0 | if( h->param.rc.i_zones > 0 ) |
1291 | 0 | { |
1292 | 0 | for( int i = 0; i < h->param.rc.i_zones; i++ ) |
1293 | 0 | { |
1294 | 0 | x264_zone_t z = h->param.rc.zones[i]; |
1295 | 0 | if( z.i_start < 0 || z.i_start > z.i_end ) |
1296 | 0 | { |
1297 | 0 | x264_log( h, X264_LOG_ERROR, "invalid zone: start=%d end=%d\n", |
1298 | 0 | z.i_start, z.i_end ); |
1299 | 0 | return -1; |
1300 | 0 | } |
1301 | 0 | else if( !z.b_force_qp && z.f_bitrate_factor <= 0 ) |
1302 | 0 | { |
1303 | 0 | x264_log( h, X264_LOG_ERROR, "invalid zone: bitrate_factor=%f\n", |
1304 | 0 | z.f_bitrate_factor ); |
1305 | 0 | return -1; |
1306 | 0 | } |
1307 | 0 | } |
1308 | | |
1309 | 0 | rc->i_zones = h->param.rc.i_zones + 1; |
1310 | 0 | CHECKED_MALLOC( rc->zones, rc->i_zones * sizeof(x264_zone_t) ); |
1311 | 0 | memcpy( rc->zones+1, h->param.rc.zones, (rc->i_zones-1) * sizeof(x264_zone_t) ); |
1312 | | |
1313 | | // default zone to fall back to if none of the others match |
1314 | 0 | rc->zones[0].i_start = 0; |
1315 | 0 | rc->zones[0].i_end = INT_MAX; |
1316 | 0 | rc->zones[0].b_force_qp = 0; |
1317 | 0 | rc->zones[0].f_bitrate_factor = 1; |
1318 | 0 | CHECKED_MALLOC( rc->zones[0].param, sizeof(x264_param_t) ); |
1319 | 0 | memcpy( rc->zones[0].param, &h->param, sizeof(x264_param_t) ); |
1320 | 0 | rc->zones[0].param->opaque = NULL; |
1321 | 0 | for( int i = 1; i < rc->i_zones; i++ ) |
1322 | 0 | { |
1323 | 0 | if( !rc->zones[i].param ) |
1324 | 0 | rc->zones[i].param = rc->zones[0].param; |
1325 | 0 | } |
1326 | 0 | } |
1327 | | |
1328 | 0 | return 0; |
1329 | 0 | fail: |
1330 | 0 | return -1; |
1331 | 0 | } |
1332 | | |
1333 | | static x264_zone_t *get_zone( x264_t *h, int frame_num ) |
1334 | 0 | { |
1335 | 0 | x264_ratecontrol_t *rc = h->rc; |
1336 | 0 | for( int i = rc->i_zones - 1; i >= 0; i-- ) |
1337 | 0 | { |
1338 | 0 | x264_zone_t *z = &rc->zones[i]; |
1339 | 0 | if( frame_num >= z->i_start && frame_num <= z->i_end ) |
1340 | 0 | return z; |
1341 | 0 | } |
1342 | 0 | return NULL; |
1343 | 0 | } |
1344 | | |
1345 | | void x264_ratecontrol_summary( x264_t *h ) |
1346 | 0 | { |
1347 | 0 | x264_ratecontrol_t *rc = h->rc; |
1348 | 0 | if( rc->b_abr && h->param.rc.i_rc_method == X264_RC_ABR && rc->cbr_decay > .9999 ) |
1349 | 0 | { |
1350 | 0 | double base_cplx = h->mb.i_mb_count * (h->param.i_bframe ? 120 : 80); |
1351 | 0 | double mbtree_offset = h->param.rc.b_mb_tree ? (1.0-h->param.rc.f_qcompress)*13.5 : 0; |
1352 | 0 | x264_log( h, X264_LOG_INFO, "final ratefactor: %.2f\n", |
1353 | 0 | qscale2qp( pow( base_cplx, 1 - rc->qcompress ) |
1354 | 0 | * rc->cplxr_sum / rc->wanted_bits_window ) - mbtree_offset - QP_BD_OFFSET ); |
1355 | 0 | } |
1356 | 0 | } Unexecuted instantiation: x264_8_ratecontrol_summary Unexecuted instantiation: x264_10_ratecontrol_summary |
1357 | | |
1358 | | void x264_ratecontrol_delete( x264_t *h ) |
1359 | 0 | { |
1360 | 0 | x264_ratecontrol_t *rc = h->rc; |
1361 | 0 | int b_regular_file; |
1362 | |
|
1363 | 0 | if( rc->p_stat_file_out ) |
1364 | 0 | { |
1365 | 0 | b_regular_file = x264_is_regular_file( rc->p_stat_file_out ); |
1366 | 0 | fclose( rc->p_stat_file_out ); |
1367 | 0 | if( h->i_frame >= rc->num_entries && b_regular_file ) |
1368 | 0 | if( x264_rename( rc->psz_stat_file_tmpname, h->param.rc.psz_stat_out ) != 0 ) |
1369 | 0 | { |
1370 | 0 | x264_log( h, X264_LOG_ERROR, "failed to rename \"%s\" to \"%s\"\n", |
1371 | 0 | rc->psz_stat_file_tmpname, h->param.rc.psz_stat_out ); |
1372 | 0 | } |
1373 | 0 | x264_free( rc->psz_stat_file_tmpname ); |
1374 | 0 | } |
1375 | 0 | if( rc->p_mbtree_stat_file_out ) |
1376 | 0 | { |
1377 | 0 | b_regular_file = x264_is_regular_file( rc->p_mbtree_stat_file_out ); |
1378 | 0 | fclose( rc->p_mbtree_stat_file_out ); |
1379 | 0 | if( h->i_frame >= rc->num_entries && b_regular_file ) |
1380 | 0 | if( x264_rename( rc->psz_mbtree_stat_file_tmpname, rc->psz_mbtree_stat_file_name ) != 0 ) |
1381 | 0 | { |
1382 | 0 | x264_log( h, X264_LOG_ERROR, "failed to rename \"%s\" to \"%s\"\n", |
1383 | 0 | rc->psz_mbtree_stat_file_tmpname, rc->psz_mbtree_stat_file_name ); |
1384 | 0 | } |
1385 | 0 | x264_free( rc->psz_mbtree_stat_file_tmpname ); |
1386 | 0 | x264_free( rc->psz_mbtree_stat_file_name ); |
1387 | 0 | } |
1388 | 0 | if( rc->p_mbtree_stat_file_in ) |
1389 | 0 | fclose( rc->p_mbtree_stat_file_in ); |
1390 | 0 | x264_free( rc->pred ); |
1391 | 0 | x264_free( rc->pred_b_from_p ); |
1392 | 0 | x264_free( rc->entry ); |
1393 | 0 | x264_free( rc->entry_out ); |
1394 | 0 | macroblock_tree_rescale_destroy( rc ); |
1395 | 0 | if( rc->zones ) |
1396 | 0 | { |
1397 | 0 | x264_param_cleanup( rc->zones[0].param ); |
1398 | 0 | x264_free( rc->zones[0].param ); |
1399 | 0 | for( int i = 1; i < rc->i_zones; i++ ) |
1400 | 0 | if( rc->zones[i].param != rc->zones[0].param && rc->zones[i].param->param_free ) |
1401 | 0 | { |
1402 | 0 | x264_param_cleanup( rc->zones[i].param ); |
1403 | 0 | rc->zones[i].param->param_free( rc->zones[i].param ); |
1404 | 0 | } |
1405 | 0 | x264_free( rc->zones ); |
1406 | 0 | } |
1407 | 0 | x264_free( rc ); |
1408 | 0 | } Unexecuted instantiation: x264_8_ratecontrol_delete Unexecuted instantiation: x264_10_ratecontrol_delete |
1409 | | |
1410 | | static void accum_p_qp_update( x264_t *h, float qp ) |
1411 | 0 | { |
1412 | 0 | x264_ratecontrol_t *rc = h->rc; |
1413 | 0 | rc->accum_p_qp *= .95; |
1414 | 0 | rc->accum_p_norm *= .95; |
1415 | 0 | rc->accum_p_norm += 1; |
1416 | 0 | if( h->sh.i_type == SLICE_TYPE_I ) |
1417 | 0 | rc->accum_p_qp += qp + rc->ip_offset; |
1418 | 0 | else |
1419 | 0 | rc->accum_p_qp += qp; |
1420 | 0 | } |
1421 | | |
1422 | | void x264_ratecontrol_zone_init( x264_t *h ) |
1423 | 0 | { |
1424 | 0 | x264_ratecontrol_t *rc = h->rc; |
1425 | 0 | x264_zone_t *zone = get_zone( h, h->fenc->i_frame ); |
1426 | 0 | if( zone && (!rc->prev_zone || zone->param != rc->prev_zone->param) ) |
1427 | 0 | x264_encoder_reconfig_apply( h, zone->param ); |
1428 | 0 | rc->prev_zone = zone; |
1429 | 0 | } Unexecuted instantiation: x264_8_ratecontrol_zone_init Unexecuted instantiation: x264_10_ratecontrol_zone_init |
1430 | | |
1431 | | /* Before encoding a frame, choose a QP for it */ |
1432 | | void x264_ratecontrol_start( x264_t *h, int i_force_qp, int overhead ) |
1433 | 0 | { |
1434 | 0 | x264_ratecontrol_t *rc = h->rc; |
1435 | 0 | ratecontrol_entry_t *rce = NULL; |
1436 | 0 | x264_zone_t *zone = get_zone( h, h->fenc->i_frame ); |
1437 | 0 | float q; |
1438 | |
|
1439 | 0 | x264_emms(); |
1440 | |
|
1441 | 0 | if( h->param.rc.b_stat_read ) |
1442 | 0 | { |
1443 | 0 | int frame = h->fenc->i_frame; |
1444 | 0 | assert( frame >= 0 && frame < rc->num_entries ); |
1445 | 0 | rce = rc->rce = &rc->entry[frame]; |
1446 | |
|
1447 | 0 | if( h->sh.i_type == SLICE_TYPE_B |
1448 | 0 | && h->param.analyse.i_direct_mv_pred == X264_DIRECT_PRED_AUTO ) |
1449 | 0 | { |
1450 | 0 | h->sh.b_direct_spatial_mv_pred = ( rce->direct_mode == 's' ); |
1451 | 0 | h->mb.b_direct_auto_read = ( rce->direct_mode == 's' || rce->direct_mode == 't' ); |
1452 | 0 | } |
1453 | 0 | } |
1454 | |
|
1455 | 0 | if( rc->b_vbv ) |
1456 | 0 | { |
1457 | 0 | memset( h->fdec->i_row_bits, 0, h->mb.i_mb_height * sizeof(int) ); |
1458 | 0 | memset( h->fdec->f_row_qp, 0, h->mb.i_mb_height * sizeof(float) ); |
1459 | 0 | memset( h->fdec->f_row_qscale, 0, h->mb.i_mb_height * sizeof(float) ); |
1460 | 0 | rc->row_pred = rc->row_preds[h->sh.i_type]; |
1461 | 0 | rc->buffer_rate = h->fenc->i_cpb_duration * rc->vbv_max_rate * h->sps->vui.i_num_units_in_tick / h->sps->vui.i_time_scale; |
1462 | 0 | update_vbv_plan( h, overhead ); |
1463 | |
|
1464 | 0 | const x264_level_t *l = x264_levels; |
1465 | 0 | while( l->level_idc != 0 && l->level_idc != h->param.i_level_idc ) |
1466 | 0 | l++; |
1467 | |
|
1468 | 0 | int mincr = l->mincr; |
1469 | |
|
1470 | 0 | if( h->param.b_bluray_compat ) |
1471 | 0 | mincr = 4; |
1472 | | |
1473 | | /* Profiles above High don't require minCR, so just set the maximum to a large value. */ |
1474 | 0 | if( h->sps->i_profile_idc > PROFILE_HIGH ) |
1475 | 0 | rc->frame_size_maximum = 1e9; |
1476 | 0 | else |
1477 | 0 | { |
1478 | | /* The spec has a bizarre special case for the first frame. */ |
1479 | 0 | if( h->i_frame == 0 ) |
1480 | 0 | { |
1481 | | //384 * ( Max( PicSizeInMbs, fR * MaxMBPS ) + MaxMBPS * ( tr( 0 ) - tr,n( 0 ) ) ) / MinCR |
1482 | 0 | double fr = 1. / (h->param.i_level_idc >= 60 ? 300 : 172); |
1483 | 0 | int pic_size_in_mbs = h->mb.i_mb_width * h->mb.i_mb_height; |
1484 | 0 | rc->frame_size_maximum = 384 * BIT_DEPTH * X264_MAX( pic_size_in_mbs, fr*l->mbps ) / mincr; |
1485 | 0 | } |
1486 | 0 | else |
1487 | 0 | { |
1488 | | //384 * MaxMBPS * ( tr( n ) - tr( n - 1 ) ) / MinCR |
1489 | 0 | rc->frame_size_maximum = 384 * BIT_DEPTH * ((double)h->fenc->i_cpb_duration * h->sps->vui.i_num_units_in_tick / h->sps->vui.i_time_scale) * l->mbps / mincr; |
1490 | 0 | } |
1491 | 0 | } |
1492 | 0 | } |
1493 | |
|
1494 | 0 | if( h->sh.i_type != SLICE_TYPE_B ) |
1495 | 0 | rc->bframes = h->fenc->i_bframes; |
1496 | |
|
1497 | 0 | if( rc->b_abr ) |
1498 | 0 | { |
1499 | 0 | q = qscale2qp( rate_estimate_qscale( h ) ); |
1500 | 0 | } |
1501 | 0 | else if( rc->b_2pass ) |
1502 | 0 | { |
1503 | 0 | rce->new_qscale = rate_estimate_qscale( h ); |
1504 | 0 | q = qscale2qp( rce->new_qscale ); |
1505 | 0 | } |
1506 | 0 | else /* CQP */ |
1507 | 0 | { |
1508 | 0 | if( h->sh.i_type == SLICE_TYPE_B && h->fdec->b_kept_as_ref ) |
1509 | 0 | q = ( rc->qp_constant[ SLICE_TYPE_B ] + rc->qp_constant[ SLICE_TYPE_P ] ) / 2; |
1510 | 0 | else |
1511 | 0 | q = rc->qp_constant[ h->sh.i_type ]; |
1512 | |
|
1513 | 0 | if( zone ) |
1514 | 0 | { |
1515 | 0 | if( zone->b_force_qp ) |
1516 | 0 | q += zone->i_qp - rc->qp_constant[SLICE_TYPE_P]; |
1517 | 0 | else |
1518 | 0 | q -= 6*log2f( zone->f_bitrate_factor ); |
1519 | 0 | } |
1520 | 0 | } |
1521 | 0 | if( i_force_qp != X264_QP_AUTO ) |
1522 | 0 | q = i_force_qp - 1; |
1523 | |
|
1524 | 0 | q = x264_clip3f( q, h->param.rc.i_qp_min, h->param.rc.i_qp_max ); |
1525 | |
|
1526 | 0 | rc->qpa_rc = rc->qpa_rc_prev = |
1527 | 0 | rc->qpa_aq = rc->qpa_aq_prev = 0; |
1528 | 0 | h->fdec->f_qp_avg_rc = |
1529 | 0 | h->fdec->f_qp_avg_aq = |
1530 | 0 | rc->qpm = q; |
1531 | 0 | if( rce ) |
1532 | 0 | rce->new_qp = q; |
1533 | |
|
1534 | 0 | accum_p_qp_update( h, rc->qpm ); |
1535 | |
|
1536 | 0 | if( h->sh.i_type != SLICE_TYPE_B ) |
1537 | 0 | rc->last_non_b_pict_type = h->sh.i_type; |
1538 | 0 | } Unexecuted instantiation: x264_8_ratecontrol_start Unexecuted instantiation: x264_10_ratecontrol_start |
1539 | | |
1540 | | static float predict_row_size( x264_t *h, int y, float qscale ) |
1541 | 0 | { |
1542 | | /* average between two predictors: |
1543 | | * absolute SATD, and scaled bit cost of the colocated row in the previous frame */ |
1544 | 0 | x264_ratecontrol_t *rc = h->rc; |
1545 | 0 | float pred_s = predict_size( &rc->row_pred[0], qscale, h->fdec->i_row_satd[y] ); |
1546 | 0 | if( h->sh.i_type == SLICE_TYPE_I || qscale >= h->fref[0][0]->f_row_qscale[y] ) |
1547 | 0 | { |
1548 | 0 | if( h->sh.i_type == SLICE_TYPE_P |
1549 | 0 | && h->fref[0][0]->i_type == h->fdec->i_type |
1550 | 0 | && h->fref[0][0]->f_row_qscale[y] > 0 |
1551 | 0 | && h->fref[0][0]->i_row_satd[y] > 0 |
1552 | 0 | && (abs(h->fref[0][0]->i_row_satd[y] - h->fdec->i_row_satd[y]) < h->fdec->i_row_satd[y]/2)) |
1553 | 0 | { |
1554 | 0 | float pred_t = h->fref[0][0]->i_row_bits[y] * h->fdec->i_row_satd[y] / h->fref[0][0]->i_row_satd[y] |
1555 | 0 | * h->fref[0][0]->f_row_qscale[y] / qscale; |
1556 | 0 | return (pred_s + pred_t) * 0.5f; |
1557 | 0 | } |
1558 | 0 | return pred_s; |
1559 | 0 | } |
1560 | | /* Our QP is lower than the reference! */ |
1561 | 0 | else |
1562 | 0 | { |
1563 | 0 | float pred_intra = predict_size( &rc->row_pred[1], qscale, h->fdec->i_row_satds[0][0][y] ); |
1564 | | /* Sum: better to overestimate than underestimate by using only one of the two predictors. */ |
1565 | 0 | return pred_intra + pred_s; |
1566 | 0 | } |
1567 | 0 | } |
1568 | | |
1569 | | static int row_bits_so_far( x264_t *h, int y ) |
1570 | 0 | { |
1571 | 0 | int bits = 0; |
1572 | 0 | for( int i = h->i_threadslice_start; i <= y; i++ ) |
1573 | 0 | bits += h->fdec->i_row_bits[i]; |
1574 | 0 | return bits; |
1575 | 0 | } |
1576 | | |
1577 | | static float predict_row_size_to_end( x264_t *h, int y, float qp ) |
1578 | 0 | { |
1579 | 0 | float qscale = qp2qscale( qp ); |
1580 | 0 | float bits = 0; |
1581 | 0 | for( int i = y+1; i < h->i_threadslice_end; i++ ) |
1582 | 0 | bits += predict_row_size( h, i, qscale ); |
1583 | 0 | return bits; |
1584 | 0 | } |
1585 | | |
1586 | | /* TODO: |
1587 | | * eliminate all use of qp in row ratecontrol: make it entirely qscale-based. |
1588 | | * make this function stop being needlessly O(N^2) |
1589 | | * update more often than once per row? */ |
1590 | | int x264_ratecontrol_mb( x264_t *h, int bits ) |
1591 | 0 | { |
1592 | 0 | x264_ratecontrol_t *rc = h->rc; |
1593 | 0 | const int y = h->mb.i_mb_y; |
1594 | |
|
1595 | 0 | h->fdec->i_row_bits[y] += bits; |
1596 | 0 | rc->qpa_aq += h->mb.i_qp; |
1597 | |
|
1598 | 0 | if( h->mb.i_mb_x != h->mb.i_mb_width - 1 ) |
1599 | 0 | return 0; |
1600 | | |
1601 | 0 | x264_emms(); |
1602 | 0 | rc->qpa_rc += rc->qpm * h->mb.i_mb_width; |
1603 | |
|
1604 | 0 | if( !rc->b_vbv ) |
1605 | 0 | return 0; |
1606 | | |
1607 | 0 | float qscale = qp2qscale( rc->qpm ); |
1608 | 0 | h->fdec->f_row_qp[y] = rc->qpm; |
1609 | 0 | h->fdec->f_row_qscale[y] = qscale; |
1610 | |
|
1611 | 0 | update_predictor( &rc->row_pred[0], qscale, h->fdec->i_row_satd[y], h->fdec->i_row_bits[y] ); |
1612 | 0 | if( h->sh.i_type != SLICE_TYPE_I && rc->qpm < h->fref[0][0]->f_row_qp[y] ) |
1613 | 0 | update_predictor( &rc->row_pred[1], qscale, h->fdec->i_row_satds[0][0][y], h->fdec->i_row_bits[y] ); |
1614 | | |
1615 | | /* update ratecontrol per-mbpair in MBAFF */ |
1616 | 0 | if( SLICE_MBAFF && !(y&1) ) |
1617 | 0 | return 0; |
1618 | | |
1619 | | /* FIXME: We don't currently support the case where there's a slice |
1620 | | * boundary in between. */ |
1621 | 0 | int can_reencode_row = h->sh.i_first_mb <= ((h->mb.i_mb_y - SLICE_MBAFF) * h->mb.i_mb_stride); |
1622 | | |
1623 | | /* tweak quality based on difference from predicted size */ |
1624 | 0 | float prev_row_qp = h->fdec->f_row_qp[y]; |
1625 | 0 | float qp_absolute_max = h->param.rc.i_qp_max; |
1626 | 0 | if( rc->rate_factor_max_increment ) |
1627 | 0 | qp_absolute_max = X264_MIN( qp_absolute_max, rc->qp_novbv + rc->rate_factor_max_increment ); |
1628 | 0 | float qp_max = X264_MIN( prev_row_qp + h->param.rc.i_qp_step, qp_absolute_max ); |
1629 | 0 | float qp_min = X264_MAX( prev_row_qp - h->param.rc.i_qp_step, h->param.rc.i_qp_min ); |
1630 | 0 | float step_size = 0.5f; |
1631 | 0 | float slice_size_planned = h->param.b_sliced_threads ? rc->slice_size_planned : rc->frame_size_planned; |
1632 | 0 | float bits_so_far = row_bits_so_far( h, y ); |
1633 | 0 | rc->bits_so_far = bits_so_far; |
1634 | 0 | float max_frame_error = x264_clip3f( 1.0 / h->mb.i_mb_height, 0.05, 0.25 ); |
1635 | 0 | float max_frame_size = rc->frame_size_maximum - rc->frame_size_maximum * max_frame_error; |
1636 | 0 | max_frame_size = X264_MIN( max_frame_size, rc->buffer_fill - rc->buffer_rate * max_frame_error ); |
1637 | 0 | float size_of_other_slices = 0; |
1638 | 0 | if( h->param.b_sliced_threads ) |
1639 | 0 | { |
1640 | 0 | float bits_so_far_of_other_slices = 0; |
1641 | 0 | for( int i = 0; i < h->param.i_threads; i++ ) |
1642 | 0 | if( h != h->thread[i] ) |
1643 | 0 | { |
1644 | 0 | size_of_other_slices += h->thread[i]->rc->frame_size_estimated; |
1645 | 0 | bits_so_far_of_other_slices += h->thread[i]->rc->bits_so_far; |
1646 | 0 | } |
1647 | 0 | float weight = x264_clip3f( (bits_so_far_of_other_slices + rc->frame_size_estimated) / (size_of_other_slices + rc->frame_size_estimated), 0.0, 1.0 ); |
1648 | 0 | float frame_size_planned = rc->frame_size_planned - rc->frame_size_planned * max_frame_error; |
1649 | 0 | float size_of_other_slices_planned = X264_MIN( frame_size_planned, max_frame_size ) - rc->slice_size_planned; |
1650 | 0 | size_of_other_slices_planned = X264_MAX( size_of_other_slices_planned, bits_so_far_of_other_slices ); |
1651 | 0 | size_of_other_slices = (size_of_other_slices - size_of_other_slices_planned) * weight + size_of_other_slices_planned; |
1652 | 0 | } |
1653 | 0 | if( y < h->i_threadslice_end-1 ) |
1654 | 0 | { |
1655 | | /* B-frames shouldn't use lower QP than their reference frames. */ |
1656 | 0 | if( h->sh.i_type == SLICE_TYPE_B ) |
1657 | 0 | { |
1658 | 0 | qp_min = X264_MAX( qp_min, X264_MAX( h->fref[0][0]->f_row_qp[y+1], h->fref[1][0]->f_row_qp[y+1] ) ); |
1659 | 0 | rc->qpm = X264_MAX( rc->qpm, qp_min ); |
1660 | 0 | } |
1661 | |
|
1662 | 0 | float buffer_left_planned = rc->buffer_fill - rc->frame_size_planned; |
1663 | 0 | buffer_left_planned = X264_MAX( buffer_left_planned, 0.f ); |
1664 | | /* More threads means we have to be more cautious in letting ratecontrol use up extra bits. */ |
1665 | 0 | float rc_tol = buffer_left_planned / h->param.i_threads * rc->rate_tolerance; |
1666 | 0 | float b1 = bits_so_far + predict_row_size_to_end( h, y, rc->qpm ) + size_of_other_slices; |
1667 | 0 | float trust_coeff = x264_clip3f( bits_so_far / slice_size_planned, 0.0, 1.0 ); |
1668 | | |
1669 | | /* Don't increase the row QPs until a sufficient amount of the bits of the frame have been processed, in case a flat */ |
1670 | | /* area at the top of the frame was measured inaccurately. */ |
1671 | 0 | if( trust_coeff < 0.05f ) |
1672 | 0 | qp_max = qp_absolute_max = prev_row_qp; |
1673 | |
|
1674 | 0 | if( h->sh.i_type != SLICE_TYPE_I ) |
1675 | 0 | rc_tol *= 0.5f; |
1676 | |
|
1677 | 0 | if( !rc->b_vbv_min_rate ) |
1678 | 0 | qp_min = X264_MAX( qp_min, rc->qp_novbv ); |
1679 | |
|
1680 | 0 | while( rc->qpm < qp_max |
1681 | 0 | && ((b1 > rc->frame_size_planned + rc_tol) || |
1682 | 0 | (b1 > rc->frame_size_planned && rc->qpm < rc->qp_novbv) || |
1683 | 0 | (b1 > rc->buffer_fill - buffer_left_planned * 0.5f)) ) |
1684 | 0 | { |
1685 | 0 | rc->qpm += step_size; |
1686 | 0 | b1 = bits_so_far + predict_row_size_to_end( h, y, rc->qpm ) + size_of_other_slices; |
1687 | 0 | } |
1688 | |
|
1689 | 0 | float b_max = b1 + ((rc->buffer_fill - rc->buffer_size + rc->buffer_rate) * 0.90f - b1) * trust_coeff; |
1690 | 0 | rc->qpm -= step_size; |
1691 | 0 | float b2 = bits_so_far + predict_row_size_to_end( h, y, rc->qpm ) + size_of_other_slices; |
1692 | 0 | while( rc->qpm > qp_min && rc->qpm < prev_row_qp |
1693 | 0 | && (rc->qpm > h->fdec->f_row_qp[0] || rc->single_frame_vbv) |
1694 | 0 | && (b2 < max_frame_size) |
1695 | 0 | && ((b2 < rc->frame_size_planned * 0.8f) || (b2 < b_max)) ) |
1696 | 0 | { |
1697 | 0 | b1 = b2; |
1698 | 0 | rc->qpm -= step_size; |
1699 | 0 | b2 = bits_so_far + predict_row_size_to_end( h, y, rc->qpm ) + size_of_other_slices; |
1700 | 0 | } |
1701 | 0 | rc->qpm += step_size; |
1702 | | |
1703 | | /* avoid VBV underflow or MinCR violation */ |
1704 | 0 | while( rc->qpm < qp_absolute_max && (b1 > max_frame_size) ) |
1705 | 0 | { |
1706 | 0 | rc->qpm += step_size; |
1707 | 0 | b1 = bits_so_far + predict_row_size_to_end( h, y, rc->qpm ) + size_of_other_slices; |
1708 | 0 | } |
1709 | |
|
1710 | 0 | rc->frame_size_estimated = b1 - size_of_other_slices; |
1711 | | |
1712 | | /* If the current row was large enough to cause a large QP jump, try re-encoding it. */ |
1713 | 0 | if( rc->qpm > qp_max && prev_row_qp < qp_max && can_reencode_row ) |
1714 | 0 | { |
1715 | | /* Bump QP to halfway in between... close enough. */ |
1716 | 0 | rc->qpm = x264_clip3f( (prev_row_qp + rc->qpm)*0.5f, prev_row_qp + 1.0f, qp_max ); |
1717 | 0 | rc->qpa_rc = rc->qpa_rc_prev; |
1718 | 0 | rc->qpa_aq = rc->qpa_aq_prev; |
1719 | 0 | h->fdec->i_row_bits[y] = 0; |
1720 | 0 | h->fdec->i_row_bits[y-SLICE_MBAFF] = 0; |
1721 | 0 | return -1; |
1722 | 0 | } |
1723 | 0 | } |
1724 | 0 | else |
1725 | 0 | { |
1726 | 0 | rc->frame_size_estimated = bits_so_far; |
1727 | | |
1728 | | /* Last-ditch attempt: if the last row of the frame underflowed the VBV, |
1729 | | * try again. */ |
1730 | 0 | if( rc->qpm < qp_max && can_reencode_row |
1731 | 0 | && (bits_so_far + size_of_other_slices > X264_MIN( rc->frame_size_maximum, rc->buffer_fill )) ) |
1732 | 0 | { |
1733 | 0 | rc->qpm = qp_max; |
1734 | 0 | rc->qpa_rc = rc->qpa_rc_prev; |
1735 | 0 | rc->qpa_aq = rc->qpa_aq_prev; |
1736 | 0 | h->fdec->i_row_bits[y] = 0; |
1737 | 0 | h->fdec->i_row_bits[y-SLICE_MBAFF] = 0; |
1738 | 0 | return -1; |
1739 | 0 | } |
1740 | 0 | } |
1741 | | |
1742 | 0 | rc->qpa_rc_prev = rc->qpa_rc; |
1743 | 0 | rc->qpa_aq_prev = rc->qpa_aq; |
1744 | |
|
1745 | 0 | return 0; |
1746 | 0 | } Unexecuted instantiation: x264_8_ratecontrol_mb Unexecuted instantiation: x264_10_ratecontrol_mb |
1747 | | |
1748 | | int x264_ratecontrol_qp( x264_t *h ) |
1749 | 0 | { |
1750 | 0 | x264_emms(); |
1751 | 0 | return x264_clip3( h->rc->qpm + 0.5f, h->param.rc.i_qp_min, h->param.rc.i_qp_max ); |
1752 | 0 | } Unexecuted instantiation: x264_8_ratecontrol_qp Unexecuted instantiation: x264_10_ratecontrol_qp |
1753 | | |
1754 | | int x264_ratecontrol_mb_qp( x264_t *h ) |
1755 | 0 | { |
1756 | 0 | x264_emms(); |
1757 | 0 | float qp = h->rc->qpm; |
1758 | 0 | if( h->param.rc.i_aq_mode ) |
1759 | 0 | { |
1760 | | /* MB-tree currently doesn't adjust quantizers in unreferenced frames. */ |
1761 | 0 | float qp_offset = h->fdec->b_kept_as_ref ? h->fenc->f_qp_offset[h->mb.i_mb_xy] : h->fenc->f_qp_offset_aq[h->mb.i_mb_xy]; |
1762 | | /* Scale AQ's effect towards zero in emergency mode. */ |
1763 | 0 | if( qp > QP_MAX_SPEC ) |
1764 | 0 | qp_offset *= (QP_MAX - qp) / (QP_MAX - QP_MAX_SPEC); |
1765 | 0 | qp += qp_offset; |
1766 | 0 | } |
1767 | 0 | return x264_clip3( qp + 0.5f, h->param.rc.i_qp_min, h->param.rc.i_qp_max ); |
1768 | 0 | } Unexecuted instantiation: x264_8_ratecontrol_mb_qp Unexecuted instantiation: x264_10_ratecontrol_mb_qp |
1769 | | |
1770 | | /* In 2pass, force the same frame types as in the 1st pass */ |
1771 | | int x264_ratecontrol_slice_type( x264_t *h, int frame_num ) |
1772 | 0 | { |
1773 | 0 | x264_ratecontrol_t *rc = h->rc; |
1774 | 0 | if( h->param.rc.b_stat_read ) |
1775 | 0 | { |
1776 | 0 | if( frame_num >= rc->num_entries ) |
1777 | 0 | { |
1778 | | /* We could try to initialize everything required for ABR and |
1779 | | * adaptive B-frames, but that would be complicated. |
1780 | | * So just calculate the average QP used so far. */ |
1781 | 0 | h->param.rc.i_qp_constant = (h->stat.i_frame_count[SLICE_TYPE_P] == 0) ? 24 + QP_BD_OFFSET |
1782 | 0 | : 1 + h->stat.f_frame_qp[SLICE_TYPE_P] / h->stat.i_frame_count[SLICE_TYPE_P]; |
1783 | 0 | rc->qp_constant[SLICE_TYPE_P] = x264_clip3( h->param.rc.i_qp_constant, 0, QP_MAX ); |
1784 | 0 | rc->qp_constant[SLICE_TYPE_I] = x264_clip3( (int)( qscale2qp( qp2qscale( h->param.rc.i_qp_constant ) / h->param.rc.f_ip_factor ) + 0.5 ), 0, QP_MAX ); |
1785 | 0 | rc->qp_constant[SLICE_TYPE_B] = x264_clip3( (int)( qscale2qp( qp2qscale( h->param.rc.i_qp_constant ) * h->param.rc.f_pb_factor ) + 0.5 ), 0, QP_MAX ); |
1786 | |
|
1787 | 0 | x264_log( h, X264_LOG_ERROR, "2nd pass has more frames than 1st pass (%d)\n", rc->num_entries ); |
1788 | 0 | x264_log( h, X264_LOG_ERROR, "continuing anyway, at constant QP=%d\n", h->param.rc.i_qp_constant ); |
1789 | 0 | if( h->param.i_bframe_adaptive ) |
1790 | 0 | x264_log( h, X264_LOG_ERROR, "disabling adaptive B-frames\n" ); |
1791 | |
|
1792 | 0 | for( int i = 0; i < h->param.i_threads; i++ ) |
1793 | 0 | { |
1794 | 0 | h->thread[i]->rc->b_abr = 0; |
1795 | 0 | h->thread[i]->rc->b_2pass = 0; |
1796 | 0 | h->thread[i]->param.rc.i_rc_method = X264_RC_CQP; |
1797 | 0 | h->thread[i]->param.rc.b_stat_read = 0; |
1798 | 0 | h->thread[i]->param.i_bframe_adaptive = 0; |
1799 | 0 | h->thread[i]->param.i_scenecut_threshold = 0; |
1800 | 0 | h->thread[i]->param.rc.b_mb_tree = 0; |
1801 | 0 | if( h->thread[i]->param.i_bframe > 1 ) |
1802 | 0 | h->thread[i]->param.i_bframe = 1; |
1803 | 0 | } |
1804 | 0 | return X264_TYPE_AUTO; |
1805 | 0 | } |
1806 | 0 | return rc->entry[frame_num].frame_type; |
1807 | 0 | } |
1808 | 0 | else |
1809 | 0 | return X264_TYPE_AUTO; |
1810 | 0 | } Unexecuted instantiation: x264_8_ratecontrol_slice_type Unexecuted instantiation: x264_10_ratecontrol_slice_type |
1811 | | |
1812 | | void x264_ratecontrol_set_weights( x264_t *h, x264_frame_t *frm ) |
1813 | 0 | { |
1814 | 0 | ratecontrol_entry_t *rce = &h->rc->entry[frm->i_frame]; |
1815 | 0 | if( h->param.analyse.i_weighted_pred <= 0 ) |
1816 | 0 | return; |
1817 | | |
1818 | 0 | if( rce->i_weight_denom[0] >= 0 ) |
1819 | 0 | SET_WEIGHT( frm->weight[0][0], 1, rce->weight[0][0], rce->i_weight_denom[0], rce->weight[0][1] ); |
1820 | |
|
1821 | 0 | if( rce->i_weight_denom[1] >= 0 ) |
1822 | 0 | { |
1823 | 0 | SET_WEIGHT( frm->weight[0][1], 1, rce->weight[1][0], rce->i_weight_denom[1], rce->weight[1][1] ); |
1824 | 0 | SET_WEIGHT( frm->weight[0][2], 1, rce->weight[2][0], rce->i_weight_denom[1], rce->weight[2][1] ); |
1825 | 0 | } |
1826 | 0 | } Unexecuted instantiation: x264_8_ratecontrol_set_weights Unexecuted instantiation: x264_10_ratecontrol_set_weights |
1827 | | |
1828 | | /* After encoding one frame, save stats and update ratecontrol state */ |
1829 | | int x264_ratecontrol_end( x264_t *h, int bits, int *filler ) |
1830 | 0 | { |
1831 | 0 | x264_ratecontrol_t *rc = h->rc; |
1832 | 0 | const int *mbs = h->stat.frame.i_mb_count; |
1833 | |
|
1834 | 0 | x264_emms(); |
1835 | |
|
1836 | 0 | h->stat.frame.i_mb_count_skip = mbs[P_SKIP] + mbs[B_SKIP]; |
1837 | 0 | h->stat.frame.i_mb_count_i = mbs[I_16x16] + mbs[I_8x8] + mbs[I_4x4] + mbs[I_PCM]; |
1838 | 0 | h->stat.frame.i_mb_count_p = mbs[P_L0] + mbs[P_8x8]; |
1839 | 0 | for( int i = B_DIRECT; i <= B_8x8; i++ ) |
1840 | 0 | h->stat.frame.i_mb_count_p += mbs[i]; |
1841 | |
|
1842 | 0 | h->fdec->f_qp_avg_rc = rc->qpa_rc /= h->mb.i_mb_count; |
1843 | 0 | h->fdec->f_qp_avg_aq = (float)rc->qpa_aq / h->mb.i_mb_count; |
1844 | 0 | h->fdec->f_crf_avg = h->param.rc.f_rf_constant + h->fdec->f_qp_avg_rc - rc->qp_novbv; |
1845 | |
|
1846 | 0 | if( h->param.rc.b_stat_write ) |
1847 | 0 | { |
1848 | 0 | char c_type = h->sh.i_type==SLICE_TYPE_I ? (h->fenc->i_poc==0 ? 'I' : 'i') |
1849 | 0 | : h->sh.i_type==SLICE_TYPE_P ? 'P' |
1850 | 0 | : h->fenc->b_kept_as_ref ? 'B' : 'b'; |
1851 | 0 | int dir_frame = h->stat.frame.i_direct_score[1] - h->stat.frame.i_direct_score[0]; |
1852 | 0 | int dir_avg = h->stat.i_direct_score[1] - h->stat.i_direct_score[0]; |
1853 | 0 | char c_direct = h->mb.b_direct_auto_write ? |
1854 | 0 | ( dir_frame>0 ? 's' : dir_frame<0 ? 't' : |
1855 | 0 | dir_avg>0 ? 's' : dir_avg<0 ? 't' : '-' ) |
1856 | 0 | : '-'; |
1857 | 0 | if( fprintf( rc->p_stat_file_out, |
1858 | 0 | "in:%d out:%d type:%c dur:%"PRId64" cpbdur:%"PRId64" q:%.2f aq:%.2f tex:%d mv:%d misc:%d imb:%d pmb:%d smb:%d d:%c ref:", |
1859 | 0 | h->fenc->i_frame, h->i_frame, |
1860 | 0 | c_type, h->fenc->i_duration, |
1861 | 0 | h->fenc->i_cpb_duration, |
1862 | 0 | rc->qpa_rc, h->fdec->f_qp_avg_aq, |
1863 | 0 | h->stat.frame.i_tex_bits, |
1864 | 0 | h->stat.frame.i_mv_bits, |
1865 | 0 | h->stat.frame.i_misc_bits, |
1866 | 0 | h->stat.frame.i_mb_count_i, |
1867 | 0 | h->stat.frame.i_mb_count_p, |
1868 | 0 | h->stat.frame.i_mb_count_skip, |
1869 | 0 | c_direct) < 0 ) |
1870 | 0 | goto fail; |
1871 | | |
1872 | | /* Only write information for reference reordering once. */ |
1873 | 0 | int use_old_stats = h->param.rc.b_stat_read && rc->rce->refs > 1; |
1874 | 0 | for( int i = 0; i < (use_old_stats ? rc->rce->refs : h->i_ref[0]); i++ ) |
1875 | 0 | { |
1876 | 0 | int refcount = use_old_stats ? rc->rce->refcount[i] |
1877 | 0 | : PARAM_INTERLACED ? h->stat.frame.i_mb_count_ref[0][i*2] |
1878 | 0 | + h->stat.frame.i_mb_count_ref[0][i*2+1] |
1879 | 0 | : h->stat.frame.i_mb_count_ref[0][i]; |
1880 | 0 | if( fprintf( rc->p_stat_file_out, "%d ", refcount ) < 0 ) |
1881 | 0 | goto fail; |
1882 | 0 | } |
1883 | | |
1884 | 0 | if( h->param.analyse.i_weighted_pred >= X264_WEIGHTP_SIMPLE && h->sh.weight[0][0].weightfn ) |
1885 | 0 | { |
1886 | 0 | if( fprintf( rc->p_stat_file_out, "w:%d,%d,%d", |
1887 | 0 | h->sh.weight[0][0].i_denom, h->sh.weight[0][0].i_scale, h->sh.weight[0][0].i_offset ) < 0 ) |
1888 | 0 | goto fail; |
1889 | 0 | if( h->sh.weight[0][1].weightfn || h->sh.weight[0][2].weightfn ) |
1890 | 0 | { |
1891 | 0 | if( fprintf( rc->p_stat_file_out, ",%d,%d,%d,%d,%d ", |
1892 | 0 | h->sh.weight[0][1].i_denom, h->sh.weight[0][1].i_scale, h->sh.weight[0][1].i_offset, |
1893 | 0 | h->sh.weight[0][2].i_scale, h->sh.weight[0][2].i_offset ) < 0 ) |
1894 | 0 | goto fail; |
1895 | 0 | } |
1896 | 0 | else if( fprintf( rc->p_stat_file_out, " " ) < 0 ) |
1897 | 0 | goto fail; |
1898 | 0 | } |
1899 | | |
1900 | 0 | if( fprintf( rc->p_stat_file_out, ";\n") < 0 ) |
1901 | 0 | goto fail; |
1902 | | |
1903 | | /* Don't re-write the data in multi-pass mode. */ |
1904 | 0 | if( h->param.rc.b_mb_tree && h->fenc->b_kept_as_ref && !h->param.rc.b_stat_read ) |
1905 | 0 | { |
1906 | 0 | uint8_t i_type = h->sh.i_type; |
1907 | 0 | h->mc.mbtree_fix8_pack( rc->mbtree.qp_buffer[0], h->fenc->f_qp_offset, h->mb.i_mb_count ); |
1908 | 0 | if( fwrite( &i_type, 1, 1, rc->p_mbtree_stat_file_out ) < 1 ) |
1909 | 0 | goto fail; |
1910 | 0 | if( fwrite( rc->mbtree.qp_buffer[0], sizeof(uint16_t), h->mb.i_mb_count, rc->p_mbtree_stat_file_out ) < (unsigned)h->mb.i_mb_count ) |
1911 | 0 | goto fail; |
1912 | 0 | } |
1913 | 0 | } |
1914 | | |
1915 | 0 | if( rc->b_abr ) |
1916 | 0 | { |
1917 | 0 | if( h->sh.i_type != SLICE_TYPE_B ) |
1918 | 0 | rc->cplxr_sum += bits * qp2qscale( rc->qpa_rc ) / rc->last_rceq; |
1919 | 0 | else |
1920 | 0 | { |
1921 | | /* Depends on the fact that B-frame's QP is an offset from the following P-frame's. |
1922 | | * Not perfectly accurate with B-refs, but good enough. */ |
1923 | 0 | rc->cplxr_sum += bits * qp2qscale( rc->qpa_rc ) / (rc->last_rceq * h->param.rc.f_pb_factor); |
1924 | 0 | } |
1925 | 0 | rc->cplxr_sum *= rc->cbr_decay; |
1926 | 0 | rc->wanted_bits_window += h->fenc->f_duration * rc->bitrate; |
1927 | 0 | rc->wanted_bits_window *= rc->cbr_decay; |
1928 | 0 | } |
1929 | |
|
1930 | 0 | if( rc->b_2pass ) |
1931 | 0 | rc->expected_bits_sum += qscale2bits( rc->rce, qp2qscale( rc->rce->new_qp ) ); |
1932 | |
|
1933 | 0 | if( h->mb.b_variable_qp ) |
1934 | 0 | { |
1935 | 0 | if( h->sh.i_type == SLICE_TYPE_B ) |
1936 | 0 | { |
1937 | 0 | rc->bframe_bits += bits; |
1938 | 0 | if( h->fenc->b_last_minigop_bframe ) |
1939 | 0 | { |
1940 | 0 | update_predictor( rc->pred_b_from_p, qp2qscale( rc->qpa_rc ), |
1941 | 0 | h->fref[1][h->i_ref[1]-1]->i_satd, rc->bframe_bits / rc->bframes ); |
1942 | 0 | rc->bframe_bits = 0; |
1943 | 0 | } |
1944 | 0 | } |
1945 | 0 | } |
1946 | |
|
1947 | 0 | *filler = update_vbv( h, bits ); |
1948 | 0 | rc->filler_bits_sum += *filler * 8; |
1949 | |
|
1950 | 0 | if( h->sps->vui.b_nal_hrd_parameters_present ) |
1951 | 0 | { |
1952 | 0 | if( h->fenc->i_frame == 0 ) |
1953 | 0 | { |
1954 | | // access unit initialises the HRD |
1955 | 0 | h->fenc->hrd_timing.cpb_initial_arrival_time = 0; |
1956 | 0 | rc->initial_cpb_removal_delay = h->initial_cpb_removal_delay; |
1957 | 0 | rc->initial_cpb_removal_delay_offset = h->initial_cpb_removal_delay_offset; |
1958 | 0 | h->fenc->hrd_timing.cpb_removal_time = rc->nrt_first_access_unit = (double)rc->initial_cpb_removal_delay / 90000; |
1959 | 0 | } |
1960 | 0 | else |
1961 | 0 | { |
1962 | 0 | h->fenc->hrd_timing.cpb_removal_time = rc->nrt_first_access_unit + (double)(h->fenc->i_cpb_delay - h->i_cpb_delay_pir_offset) * |
1963 | 0 | h->sps->vui.i_num_units_in_tick / h->sps->vui.i_time_scale; |
1964 | |
|
1965 | 0 | if( h->fenc->b_keyframe ) |
1966 | 0 | { |
1967 | 0 | rc->nrt_first_access_unit = h->fenc->hrd_timing.cpb_removal_time; |
1968 | 0 | rc->initial_cpb_removal_delay = h->initial_cpb_removal_delay; |
1969 | 0 | rc->initial_cpb_removal_delay_offset = h->initial_cpb_removal_delay_offset; |
1970 | 0 | } |
1971 | |
|
1972 | 0 | double cpb_earliest_arrival_time = h->fenc->hrd_timing.cpb_removal_time - (double)rc->initial_cpb_removal_delay / 90000; |
1973 | 0 | if( !h->fenc->b_keyframe ) |
1974 | 0 | cpb_earliest_arrival_time -= (double)rc->initial_cpb_removal_delay_offset / 90000; |
1975 | |
|
1976 | 0 | if( h->sps->vui.hrd.b_cbr_hrd ) |
1977 | 0 | h->fenc->hrd_timing.cpb_initial_arrival_time = rc->previous_cpb_final_arrival_time; |
1978 | 0 | else |
1979 | 0 | h->fenc->hrd_timing.cpb_initial_arrival_time = X264_MAX( rc->previous_cpb_final_arrival_time, cpb_earliest_arrival_time ); |
1980 | 0 | } |
1981 | 0 | int filler_bits = *filler ? X264_MAX( (FILLER_OVERHEAD - h->param.b_annexb), *filler )*8 : 0; |
1982 | | // Equation C-6 |
1983 | 0 | h->fenc->hrd_timing.cpb_final_arrival_time = rc->previous_cpb_final_arrival_time = h->fenc->hrd_timing.cpb_initial_arrival_time + |
1984 | 0 | (double)(bits + filler_bits) / h->sps->vui.hrd.i_bit_rate_unscaled; |
1985 | |
|
1986 | 0 | h->fenc->hrd_timing.dpb_output_time = (double)h->fenc->i_dpb_output_delay * h->sps->vui.i_num_units_in_tick / h->sps->vui.i_time_scale + |
1987 | 0 | h->fenc->hrd_timing.cpb_removal_time; |
1988 | 0 | } |
1989 | |
|
1990 | 0 | return 0; |
1991 | 0 | fail: |
1992 | 0 | x264_log( h, X264_LOG_ERROR, "ratecontrol_end: stats file could not be written to\n" ); |
1993 | 0 | return -1; |
1994 | 0 | } Unexecuted instantiation: x264_8_ratecontrol_end Unexecuted instantiation: x264_10_ratecontrol_end |
1995 | | |
1996 | | /**************************************************************************** |
1997 | | * 2 pass functions |
1998 | | ***************************************************************************/ |
1999 | | |
2000 | | /** |
2001 | | * modify the bitrate curve from pass1 for one frame |
2002 | | */ |
2003 | | static double get_qscale(x264_t *h, ratecontrol_entry_t *rce, double rate_factor, int frame_num) |
2004 | 0 | { |
2005 | 0 | x264_ratecontrol_t *rcc= h->rc; |
2006 | 0 | x264_zone_t *zone = get_zone( h, frame_num ); |
2007 | 0 | double q; |
2008 | 0 | if( h->param.rc.b_mb_tree ) |
2009 | 0 | { |
2010 | 0 | double timescale = (double)h->sps->vui.i_num_units_in_tick / h->sps->vui.i_time_scale; |
2011 | 0 | q = pow( BASE_FRAME_DURATION / CLIP_DURATION(rce->i_duration * timescale), 1 - h->param.rc.f_qcompress ); |
2012 | 0 | } |
2013 | 0 | else |
2014 | 0 | q = pow( rce->blurred_complexity, 1 - rcc->qcompress ); |
2015 | | |
2016 | | // avoid NaN's in the rc_eq |
2017 | 0 | if( !isfinite(q) || rce->tex_bits + rce->mv_bits == 0 ) |
2018 | 0 | q = rcc->last_qscale_for[rce->pict_type]; |
2019 | 0 | else |
2020 | 0 | { |
2021 | 0 | rcc->last_rceq = q; |
2022 | 0 | q /= rate_factor; |
2023 | 0 | rcc->last_qscale = q; |
2024 | 0 | } |
2025 | |
|
2026 | 0 | if( zone ) |
2027 | 0 | { |
2028 | 0 | if( zone->b_force_qp ) |
2029 | 0 | q = qp2qscale( zone->i_qp ); |
2030 | 0 | else |
2031 | 0 | q /= zone->f_bitrate_factor; |
2032 | 0 | } |
2033 | |
|
2034 | 0 | return q; |
2035 | 0 | } |
2036 | | |
2037 | | static double get_diff_limited_q(x264_t *h, ratecontrol_entry_t *rce, double q, int frame_num) |
2038 | 0 | { |
2039 | 0 | x264_ratecontrol_t *rcc = h->rc; |
2040 | 0 | const int pict_type = rce->pict_type; |
2041 | 0 | x264_zone_t *zone = get_zone( h, frame_num ); |
2042 | | |
2043 | | // force I/B quants as a function of P quants |
2044 | 0 | if( pict_type == SLICE_TYPE_I ) |
2045 | 0 | { |
2046 | 0 | double iq = q; |
2047 | 0 | double pq = qp2qscale( rcc->accum_p_qp / rcc->accum_p_norm ); |
2048 | 0 | double ip_factor = h->param.rc.f_ip_factor; |
2049 | | /* don't apply ip_factor if the following frame is also I */ |
2050 | 0 | if( rcc->accum_p_norm <= 0 ) |
2051 | 0 | q = iq; |
2052 | 0 | else if( rcc->accum_p_norm >= 1 ) |
2053 | 0 | q = pq / ip_factor; |
2054 | 0 | else |
2055 | 0 | q = rcc->accum_p_norm * pq / ip_factor + (1 - rcc->accum_p_norm) * iq; |
2056 | 0 | } |
2057 | 0 | else if( pict_type == SLICE_TYPE_B ) |
2058 | 0 | { |
2059 | 0 | q = rcc->last_qscale_for[rcc->last_non_b_pict_type]; |
2060 | 0 | if( !rce->kept_as_ref ) |
2061 | 0 | q *= h->param.rc.f_pb_factor; |
2062 | 0 | } |
2063 | 0 | else if( pict_type == SLICE_TYPE_P |
2064 | 0 | && rcc->last_non_b_pict_type == SLICE_TYPE_P |
2065 | 0 | && rce->tex_bits == 0 ) |
2066 | 0 | { |
2067 | 0 | q = rcc->last_qscale_for[SLICE_TYPE_P]; |
2068 | 0 | } |
2069 | | |
2070 | | /* last qscale / qdiff stuff */ |
2071 | 0 | if( rcc->last_non_b_pict_type == pict_type && |
2072 | 0 | (pict_type!=SLICE_TYPE_I || rcc->last_accum_p_norm < 1) ) |
2073 | 0 | { |
2074 | 0 | double last_q = rcc->last_qscale_for[pict_type]; |
2075 | 0 | double max_qscale = last_q * rcc->lstep; |
2076 | 0 | double min_qscale = last_q / rcc->lstep; |
2077 | |
|
2078 | 0 | if ( q > max_qscale ) q = max_qscale; |
2079 | 0 | else if( q < min_qscale ) q = min_qscale; |
2080 | 0 | } |
2081 | |
|
2082 | 0 | rcc->last_qscale_for[pict_type] = q; |
2083 | 0 | if( pict_type != SLICE_TYPE_B ) |
2084 | 0 | rcc->last_non_b_pict_type = pict_type; |
2085 | 0 | if( pict_type == SLICE_TYPE_I ) |
2086 | 0 | { |
2087 | 0 | rcc->last_accum_p_norm = rcc->accum_p_norm; |
2088 | 0 | rcc->accum_p_norm = 0; |
2089 | 0 | rcc->accum_p_qp = 0; |
2090 | 0 | } |
2091 | 0 | if( pict_type == SLICE_TYPE_P ) |
2092 | 0 | { |
2093 | 0 | float mask = 1 - pow( (float)rce->i_count / rcc->nmb, 2 ); |
2094 | 0 | rcc->accum_p_qp = mask * (qscale2qp( q ) + rcc->accum_p_qp); |
2095 | 0 | rcc->accum_p_norm = mask * (1 + rcc->accum_p_norm); |
2096 | 0 | } |
2097 | |
|
2098 | 0 | if( zone ) |
2099 | 0 | { |
2100 | 0 | if( zone->b_force_qp ) |
2101 | 0 | q = qp2qscale( zone->i_qp ); |
2102 | 0 | else |
2103 | 0 | q /= zone->f_bitrate_factor; |
2104 | 0 | } |
2105 | |
|
2106 | 0 | return q; |
2107 | 0 | } |
2108 | | |
2109 | | static float predict_size( predictor_t *p, float q, float var ) |
2110 | 0 | { |
2111 | 0 | return (p->coeff*var + p->offset) / (q*p->count); |
2112 | 0 | } |
2113 | | |
2114 | | static void update_predictor( predictor_t *p, float q, float var, float bits ) |
2115 | 0 | { |
2116 | 0 | float range = 1.5; |
2117 | 0 | if( var < 10 ) |
2118 | 0 | return; |
2119 | 0 | float old_coeff = p->coeff / p->count; |
2120 | 0 | float old_offset = p->offset / p->count; |
2121 | 0 | float new_coeff = X264_MAX( (bits*q - old_offset) / var, p->coeff_min ); |
2122 | 0 | float new_coeff_clipped = x264_clip3f( new_coeff, old_coeff/range, old_coeff*range ); |
2123 | 0 | float new_offset = bits*q - new_coeff_clipped * var; |
2124 | 0 | if( new_offset >= 0 ) |
2125 | 0 | new_coeff = new_coeff_clipped; |
2126 | 0 | else |
2127 | 0 | new_offset = 0; |
2128 | 0 | p->count *= p->decay; |
2129 | 0 | p->coeff *= p->decay; |
2130 | 0 | p->offset *= p->decay; |
2131 | 0 | p->count ++; |
2132 | 0 | p->coeff += new_coeff; |
2133 | 0 | p->offset += new_offset; |
2134 | 0 | } |
2135 | | |
2136 | | // update VBV after encoding a frame |
2137 | | static int update_vbv( x264_t *h, int bits ) |
2138 | 0 | { |
2139 | 0 | int filler = 0; |
2140 | 0 | int bitrate = h->sps->vui.hrd.i_bit_rate_unscaled; |
2141 | 0 | x264_ratecontrol_t *rcc = h->rc; |
2142 | 0 | x264_ratecontrol_t *rct = h->thread[0]->rc; |
2143 | 0 | int64_t buffer_size = (int64_t)h->sps->vui.hrd.i_cpb_size_unscaled * h->sps->vui.i_time_scale; |
2144 | |
|
2145 | 0 | if( rcc->last_satd >= h->mb.i_mb_count ) |
2146 | 0 | update_predictor( &rct->pred[h->sh.i_type], qp2qscale( rcc->qpa_rc ), rcc->last_satd, bits ); |
2147 | |
|
2148 | 0 | if( !rcc->b_vbv ) |
2149 | 0 | return filler; |
2150 | | |
2151 | 0 | uint64_t buffer_diff = (uint64_t)bits * h->sps->vui.i_time_scale; |
2152 | 0 | rct->buffer_fill_final -= buffer_diff; |
2153 | 0 | rct->buffer_fill_final_min -= buffer_diff; |
2154 | |
|
2155 | 0 | if( rct->buffer_fill_final_min < 0 ) |
2156 | 0 | { |
2157 | 0 | double underflow = (double)rct->buffer_fill_final_min / h->sps->vui.i_time_scale; |
2158 | 0 | if( rcc->rate_factor_max_increment && rcc->qpm >= rcc->qp_novbv + rcc->rate_factor_max_increment ) |
2159 | 0 | x264_log( h, X264_LOG_DEBUG, "VBV underflow due to CRF-max (frame %d, %.0f bits)\n", h->i_frame, underflow ); |
2160 | 0 | else |
2161 | 0 | x264_log( h, X264_LOG_WARNING, "VBV underflow (frame %d, %.0f bits)\n", h->i_frame, underflow ); |
2162 | 0 | rct->buffer_fill_final = |
2163 | 0 | rct->buffer_fill_final_min = 0; |
2164 | 0 | } |
2165 | |
|
2166 | 0 | if( h->param.i_avcintra_class ) |
2167 | 0 | buffer_diff = buffer_size; |
2168 | 0 | else |
2169 | 0 | buffer_diff = (uint64_t)bitrate * h->sps->vui.i_num_units_in_tick * h->fenc->i_cpb_duration; |
2170 | 0 | rct->buffer_fill_final += buffer_diff; |
2171 | 0 | rct->buffer_fill_final_min += buffer_diff; |
2172 | |
|
2173 | 0 | if( rct->buffer_fill_final > buffer_size ) |
2174 | 0 | { |
2175 | 0 | if( h->param.rc.b_filler ) |
2176 | 0 | { |
2177 | 0 | int64_t scale = (int64_t)h->sps->vui.i_time_scale * 8; |
2178 | 0 | filler = (rct->buffer_fill_final - buffer_size + scale - 1) / scale; |
2179 | 0 | bits = h->param.i_avcintra_class ? filler * 8 : X264_MAX( (FILLER_OVERHEAD - h->param.b_annexb), filler ) * 8; |
2180 | 0 | buffer_diff = (uint64_t)bits * h->sps->vui.i_time_scale; |
2181 | 0 | rct->buffer_fill_final -= buffer_diff; |
2182 | 0 | rct->buffer_fill_final_min -= buffer_diff; |
2183 | 0 | } |
2184 | 0 | else |
2185 | 0 | { |
2186 | 0 | rct->buffer_fill_final = X264_MIN( rct->buffer_fill_final, buffer_size ); |
2187 | 0 | rct->buffer_fill_final_min = X264_MIN( rct->buffer_fill_final_min, buffer_size ); |
2188 | 0 | } |
2189 | 0 | } |
2190 | |
|
2191 | 0 | return filler; |
2192 | 0 | } |
2193 | | |
2194 | | void x264_hrd_fullness( x264_t *h ) |
2195 | 0 | { |
2196 | 0 | x264_ratecontrol_t *rct = h->thread[0]->rc; |
2197 | 0 | uint64_t denom = (uint64_t)h->sps->vui.hrd.i_bit_rate_unscaled * h->sps->vui.i_time_scale / rct->hrd_multiply_denom; |
2198 | 0 | uint64_t cpb_state = rct->buffer_fill_final; |
2199 | 0 | uint64_t cpb_size = (uint64_t)h->sps->vui.hrd.i_cpb_size_unscaled * h->sps->vui.i_time_scale; |
2200 | 0 | uint64_t multiply_factor = 90000 / rct->hrd_multiply_denom; |
2201 | |
|
2202 | 0 | if( rct->buffer_fill_final < 0 || rct->buffer_fill_final > (int64_t)cpb_size ) |
2203 | 0 | { |
2204 | 0 | x264_log( h, X264_LOG_WARNING, "CPB %s: %.0f bits in a %.0f-bit buffer\n", |
2205 | 0 | rct->buffer_fill_final < 0 ? "underflow" : "overflow", |
2206 | 0 | (double)rct->buffer_fill_final / h->sps->vui.i_time_scale, (double)cpb_size / h->sps->vui.i_time_scale ); |
2207 | 0 | } |
2208 | |
|
2209 | 0 | h->initial_cpb_removal_delay = (multiply_factor * cpb_state) / denom; |
2210 | 0 | h->initial_cpb_removal_delay_offset = (multiply_factor * cpb_size) / denom - h->initial_cpb_removal_delay; |
2211 | |
|
2212 | 0 | int64_t decoder_buffer_fill = h->initial_cpb_removal_delay * denom / multiply_factor; |
2213 | 0 | rct->buffer_fill_final_min = X264_MIN( rct->buffer_fill_final_min, decoder_buffer_fill ); |
2214 | 0 | } Unexecuted instantiation: x264_8_hrd_fullness Unexecuted instantiation: x264_10_hrd_fullness |
2215 | | |
2216 | | // provisionally update VBV according to the planned size of all frames currently in progress |
2217 | | static void update_vbv_plan( x264_t *h, int overhead ) |
2218 | 0 | { |
2219 | 0 | x264_ratecontrol_t *rcc = h->rc; |
2220 | 0 | rcc->buffer_fill = h->thread[0]->rc->buffer_fill_final_min / h->sps->vui.i_time_scale; |
2221 | 0 | if( h->i_thread_frames > 1 ) |
2222 | 0 | { |
2223 | 0 | int j = rcc - h->thread[0]->rc; |
2224 | 0 | for( int i = 1; i < h->i_thread_frames; i++ ) |
2225 | 0 | { |
2226 | 0 | x264_t *t = h->thread[ (j+i)%h->i_thread_frames ]; |
2227 | 0 | double bits = t->rc->frame_size_planned; |
2228 | 0 | if( !t->b_thread_active ) |
2229 | 0 | continue; |
2230 | 0 | bits = X264_MAX(bits, t->rc->frame_size_estimated); |
2231 | 0 | rcc->buffer_fill -= bits; |
2232 | 0 | rcc->buffer_fill = X264_MAX( rcc->buffer_fill, 0 ); |
2233 | 0 | rcc->buffer_fill += t->rc->buffer_rate; |
2234 | 0 | rcc->buffer_fill = X264_MIN( rcc->buffer_fill, rcc->buffer_size ); |
2235 | 0 | } |
2236 | 0 | } |
2237 | 0 | rcc->buffer_fill = X264_MIN( rcc->buffer_fill, rcc->buffer_size ); |
2238 | 0 | rcc->buffer_fill -= overhead; |
2239 | 0 | } |
2240 | | |
2241 | | // clip qscale to between lmin and lmax |
2242 | | static double clip_qscale( x264_t *h, int pict_type, double q ) |
2243 | 0 | { |
2244 | 0 | x264_ratecontrol_t *rcc = h->rc; |
2245 | 0 | double lmin = rcc->lmin[pict_type]; |
2246 | 0 | double lmax = rcc->lmax[pict_type]; |
2247 | 0 | if( rcc->rate_factor_max_increment ) |
2248 | 0 | lmax = X264_MIN( lmax, qp2qscale( rcc->qp_novbv + rcc->rate_factor_max_increment ) ); |
2249 | |
|
2250 | 0 | if( lmin==lmax ) |
2251 | 0 | return lmin; |
2252 | 0 | else if( rcc->b_2pass ) |
2253 | 0 | { |
2254 | 0 | double min2 = log( lmin ); |
2255 | 0 | double max2 = log( lmax ); |
2256 | 0 | q = (log(q) - min2)/(max2-min2) - 0.5; |
2257 | 0 | q = 1.0/(1.0 + exp( -4*q )); |
2258 | 0 | q = q*(max2-min2) + min2; |
2259 | 0 | return exp( q ); |
2260 | 0 | } |
2261 | 0 | else |
2262 | 0 | return x264_clip3f( q, lmin, lmax ); |
2263 | 0 | } |
2264 | | |
2265 | | // apply VBV constraints |
2266 | | static double vbv_pass1( x264_t *h, int pict_type, double q ) |
2267 | 0 | { |
2268 | 0 | x264_ratecontrol_t *rcc = h->rc; |
2269 | | /* B-frames are not directly subject to VBV, |
2270 | | * since they are controlled by the P-frames' QPs. */ |
2271 | |
|
2272 | 0 | if( rcc->b_vbv && rcc->last_satd > 0 ) |
2273 | 0 | { |
2274 | 0 | double q0 = q; |
2275 | 0 | double fenc_cpb_duration = (double)h->fenc->i_cpb_duration * |
2276 | 0 | h->sps->vui.i_num_units_in_tick / h->sps->vui.i_time_scale; |
2277 | | /* Lookahead VBV: raise the quantizer as necessary such that no frames in |
2278 | | * the lookahead overflow and such that the buffer is in a reasonable state |
2279 | | * by the end of the lookahead. */ |
2280 | 0 | if( h->param.rc.i_lookahead ) |
2281 | 0 | { |
2282 | 0 | int terminate = 0; |
2283 | | |
2284 | | /* Avoid an infinite loop. */ |
2285 | 0 | for( int iterations = 0; iterations < 1000 && terminate != 3; iterations++ ) |
2286 | 0 | { |
2287 | 0 | double frame_q[3]; |
2288 | 0 | double cur_bits = predict_size( &rcc->pred[h->sh.i_type], q, rcc->last_satd ); |
2289 | 0 | double buffer_fill_cur = rcc->buffer_fill - cur_bits; |
2290 | 0 | double target_fill; |
2291 | 0 | double total_duration = 0; |
2292 | 0 | double last_duration = fenc_cpb_duration; |
2293 | 0 | frame_q[0] = h->sh.i_type == SLICE_TYPE_I ? q * h->param.rc.f_ip_factor : q; |
2294 | 0 | frame_q[1] = frame_q[0] * h->param.rc.f_pb_factor; |
2295 | 0 | frame_q[2] = frame_q[0] / h->param.rc.f_ip_factor; |
2296 | | |
2297 | | /* Loop over the planned future frames. */ |
2298 | 0 | for( int j = 0; buffer_fill_cur >= 0 && buffer_fill_cur <= rcc->buffer_size; j++ ) |
2299 | 0 | { |
2300 | 0 | total_duration += last_duration; |
2301 | 0 | buffer_fill_cur += rcc->vbv_max_rate * last_duration; |
2302 | 0 | int i_type = h->fenc->i_planned_type[j]; |
2303 | 0 | int i_satd = h->fenc->i_planned_satd[j]; |
2304 | 0 | if( i_type == X264_TYPE_AUTO ) |
2305 | 0 | break; |
2306 | 0 | i_type = IS_X264_TYPE_I( i_type ) ? SLICE_TYPE_I : IS_X264_TYPE_B( i_type ) ? SLICE_TYPE_B : SLICE_TYPE_P; |
2307 | 0 | cur_bits = predict_size( &rcc->pred[i_type], frame_q[i_type], i_satd ); |
2308 | 0 | buffer_fill_cur -= cur_bits; |
2309 | 0 | last_duration = h->fenc->f_planned_cpb_duration[j]; |
2310 | 0 | } |
2311 | | /* Try to get to get the buffer at least 50% filled, but don't set an impossible goal. */ |
2312 | 0 | target_fill = X264_MIN( rcc->buffer_fill + total_duration * rcc->vbv_max_rate * 0.5, rcc->buffer_size * 0.5 ); |
2313 | 0 | if( buffer_fill_cur < target_fill ) |
2314 | 0 | { |
2315 | 0 | q *= 1.01; |
2316 | 0 | terminate |= 1; |
2317 | 0 | continue; |
2318 | 0 | } |
2319 | | /* Try to get the buffer no more than 80% filled, but don't set an impossible goal. */ |
2320 | 0 | target_fill = x264_clip3f( rcc->buffer_fill - total_duration * rcc->vbv_max_rate * 0.5, rcc->buffer_size * 0.8, rcc->buffer_size ); |
2321 | 0 | if( rcc->b_vbv_min_rate && buffer_fill_cur > target_fill ) |
2322 | 0 | { |
2323 | 0 | q /= 1.01; |
2324 | 0 | terminate |= 2; |
2325 | 0 | continue; |
2326 | 0 | } |
2327 | 0 | break; |
2328 | 0 | } |
2329 | 0 | } |
2330 | | /* Fallback to old purely-reactive algorithm: no lookahead. */ |
2331 | 0 | else |
2332 | 0 | { |
2333 | 0 | if( ( pict_type == SLICE_TYPE_P || |
2334 | 0 | ( pict_type == SLICE_TYPE_I && rcc->last_non_b_pict_type == SLICE_TYPE_I ) ) && |
2335 | 0 | rcc->buffer_fill/rcc->buffer_size < 0.5 ) |
2336 | 0 | { |
2337 | 0 | q /= x264_clip3f( 2.0*rcc->buffer_fill/rcc->buffer_size, 0.5, 1.0 ); |
2338 | 0 | } |
2339 | | |
2340 | | /* Now a hard threshold to make sure the frame fits in VBV. |
2341 | | * This one is mostly for I-frames. */ |
2342 | 0 | double bits = predict_size( &rcc->pred[h->sh.i_type], q, rcc->last_satd ); |
2343 | | /* For small VBVs, allow the frame to use up the entire VBV. */ |
2344 | 0 | double max_fill_factor = h->param.rc.i_vbv_buffer_size >= 5*h->param.rc.i_vbv_max_bitrate / rcc->fps ? 2 : 1; |
2345 | | /* For single-frame VBVs, request that the frame use up the entire VBV. */ |
2346 | 0 | double min_fill_factor = rcc->single_frame_vbv ? 1 : 2; |
2347 | |
|
2348 | 0 | if( bits > rcc->buffer_fill/max_fill_factor ) |
2349 | 0 | { |
2350 | 0 | double qf = x264_clip3f( rcc->buffer_fill/(max_fill_factor*bits), 0.2, 1.0 ); |
2351 | 0 | q /= qf; |
2352 | 0 | bits *= qf; |
2353 | 0 | } |
2354 | 0 | if( bits < rcc->buffer_rate/min_fill_factor ) |
2355 | 0 | { |
2356 | 0 | double qf = x264_clip3f( bits*min_fill_factor/rcc->buffer_rate, 0.001, 1.0 ); |
2357 | 0 | q *= qf; |
2358 | 0 | } |
2359 | 0 | q = X264_MAX( q0, q ); |
2360 | 0 | } |
2361 | | |
2362 | | /* Check B-frame complexity, and use up any bits that would |
2363 | | * overflow before the next P-frame. */ |
2364 | 0 | if( h->sh.i_type == SLICE_TYPE_P && !rcc->single_frame_vbv ) |
2365 | 0 | { |
2366 | 0 | int nb = rcc->bframes; |
2367 | 0 | double bits = predict_size( &rcc->pred[h->sh.i_type], q, rcc->last_satd ); |
2368 | 0 | double pbbits = bits; |
2369 | 0 | double bbits = predict_size( rcc->pred_b_from_p, q * h->param.rc.f_pb_factor, rcc->last_satd ); |
2370 | 0 | double space; |
2371 | 0 | double bframe_cpb_duration = 0; |
2372 | 0 | double minigop_cpb_duration; |
2373 | 0 | for( int i = 0; i < nb; i++ ) |
2374 | 0 | bframe_cpb_duration += h->fenc->f_planned_cpb_duration[i]; |
2375 | |
|
2376 | 0 | if( bbits * nb > bframe_cpb_duration * rcc->vbv_max_rate ) |
2377 | 0 | { |
2378 | 0 | nb = 0; |
2379 | 0 | bframe_cpb_duration = 0; |
2380 | 0 | } |
2381 | 0 | pbbits += nb * bbits; |
2382 | |
|
2383 | 0 | minigop_cpb_duration = bframe_cpb_duration + fenc_cpb_duration; |
2384 | 0 | space = rcc->buffer_fill + minigop_cpb_duration*rcc->vbv_max_rate - rcc->buffer_size; |
2385 | 0 | if( pbbits < space ) |
2386 | 0 | { |
2387 | 0 | q *= X264_MAX( pbbits / space, bits / (0.5 * rcc->buffer_size) ); |
2388 | 0 | } |
2389 | 0 | q = X264_MAX( q0/2, q ); |
2390 | 0 | } |
2391 | |
|
2392 | 0 | if( !rcc->b_vbv_min_rate ) |
2393 | 0 | q = X264_MAX( q0, q ); |
2394 | 0 | } |
2395 | |
|
2396 | 0 | return clip_qscale( h, pict_type, q ); |
2397 | 0 | } |
2398 | | |
2399 | | // update qscale for 1 frame based on actual bits used so far |
2400 | | static float rate_estimate_qscale( x264_t *h ) |
2401 | 0 | { |
2402 | 0 | float q; |
2403 | 0 | x264_ratecontrol_t *rcc = h->rc; |
2404 | 0 | ratecontrol_entry_t rce = {0}; |
2405 | 0 | int pict_type = h->sh.i_type; |
2406 | 0 | int64_t total_bits = 8*(h->stat.i_frame_size[SLICE_TYPE_I] |
2407 | 0 | + h->stat.i_frame_size[SLICE_TYPE_P] |
2408 | 0 | + h->stat.i_frame_size[SLICE_TYPE_B]) |
2409 | 0 | - rcc->filler_bits_sum; |
2410 | |
|
2411 | 0 | if( rcc->b_2pass ) |
2412 | 0 | { |
2413 | 0 | rce = *rcc->rce; |
2414 | 0 | if( pict_type != rce.pict_type ) |
2415 | 0 | { |
2416 | 0 | x264_log( h, X264_LOG_ERROR, "slice=%c but 2pass stats say %c\n", |
2417 | 0 | slice_type_to_char[pict_type], slice_type_to_char[rce.pict_type] ); |
2418 | 0 | } |
2419 | 0 | } |
2420 | |
|
2421 | 0 | if( pict_type == SLICE_TYPE_B ) |
2422 | 0 | { |
2423 | | /* B-frames don't have independent ratecontrol, but rather get the |
2424 | | * average QP of the two adjacent P-frames + an offset */ |
2425 | |
|
2426 | 0 | int i0 = IS_X264_TYPE_I(h->fref_nearest[0]->i_type); |
2427 | 0 | int i1 = IS_X264_TYPE_I(h->fref_nearest[1]->i_type); |
2428 | 0 | int dt0 = abs(h->fenc->i_poc - h->fref_nearest[0]->i_poc); |
2429 | 0 | int dt1 = abs(h->fenc->i_poc - h->fref_nearest[1]->i_poc); |
2430 | 0 | float q0 = h->fref_nearest[0]->f_qp_avg_rc; |
2431 | 0 | float q1 = h->fref_nearest[1]->f_qp_avg_rc; |
2432 | |
|
2433 | 0 | if( h->fref_nearest[0]->i_type == X264_TYPE_BREF ) |
2434 | 0 | q0 -= rcc->pb_offset/2; |
2435 | 0 | if( h->fref_nearest[1]->i_type == X264_TYPE_BREF ) |
2436 | 0 | q1 -= rcc->pb_offset/2; |
2437 | |
|
2438 | 0 | if( i0 && i1 ) |
2439 | 0 | q = (q0 + q1) / 2 + rcc->ip_offset; |
2440 | 0 | else if( i0 ) |
2441 | 0 | q = q1; |
2442 | 0 | else if( i1 ) |
2443 | 0 | q = q0; |
2444 | 0 | else |
2445 | 0 | q = (q0*dt1 + q1*dt0) / (dt0 + dt1); |
2446 | |
|
2447 | 0 | if( h->fenc->b_kept_as_ref ) |
2448 | 0 | q += rcc->pb_offset/2; |
2449 | 0 | else |
2450 | 0 | q += rcc->pb_offset; |
2451 | |
|
2452 | 0 | rcc->qp_novbv = q; |
2453 | 0 | q = qp2qscale( q ); |
2454 | 0 | if( rcc->b_2pass ) |
2455 | 0 | rcc->frame_size_planned = qscale2bits( &rce, q ); |
2456 | 0 | else |
2457 | 0 | rcc->frame_size_planned = predict_size( rcc->pred_b_from_p, q, h->fref[1][h->i_ref[1]-1]->i_satd ); |
2458 | | |
2459 | | /* Apply MinCR and buffer fill restrictions */ |
2460 | 0 | if( rcc->b_vbv ) |
2461 | 0 | { |
2462 | 0 | double frame_size_maximum = X264_MIN( rcc->frame_size_maximum, X264_MAX( rcc->buffer_fill, 0.001 ) ); |
2463 | 0 | if( rcc->frame_size_planned > frame_size_maximum ) |
2464 | 0 | { |
2465 | 0 | q *= rcc->frame_size_planned / frame_size_maximum; |
2466 | 0 | rcc->frame_size_planned = frame_size_maximum; |
2467 | 0 | } |
2468 | 0 | } |
2469 | |
|
2470 | 0 | rcc->frame_size_estimated = rcc->frame_size_planned; |
2471 | | |
2472 | | /* For row SATDs */ |
2473 | 0 | if( rcc->b_vbv ) |
2474 | 0 | rcc->last_satd = x264_rc_analyse_slice( h ); |
2475 | 0 | return q; |
2476 | 0 | } |
2477 | 0 | else |
2478 | 0 | { |
2479 | 0 | double abr_buffer = 2 * rcc->rate_tolerance * rcc->bitrate; |
2480 | 0 | double predicted_bits = total_bits; |
2481 | 0 | if( h->i_thread_frames > 1 ) |
2482 | 0 | { |
2483 | 0 | int j = rcc - h->thread[0]->rc; |
2484 | 0 | for( int i = 1; i < h->i_thread_frames; i++ ) |
2485 | 0 | { |
2486 | 0 | x264_t *t = h->thread[(j+i) % h->i_thread_frames]; |
2487 | 0 | double bits = t->rc->frame_size_planned; |
2488 | 0 | if( !t->b_thread_active ) |
2489 | 0 | continue; |
2490 | 0 | bits = X264_MAX(bits, t->rc->frame_size_estimated); |
2491 | 0 | predicted_bits += bits; |
2492 | 0 | } |
2493 | 0 | } |
2494 | |
|
2495 | 0 | if( rcc->b_2pass ) |
2496 | 0 | { |
2497 | 0 | double lmin = rcc->lmin[pict_type]; |
2498 | 0 | double lmax = rcc->lmax[pict_type]; |
2499 | 0 | double diff; |
2500 | | |
2501 | | /* Adjust ABR buffer based on distance to the end of the video. */ |
2502 | 0 | if( rcc->num_entries > h->i_frame ) |
2503 | 0 | { |
2504 | 0 | double final_bits = rcc->entry_out[rcc->num_entries-1]->expected_bits; |
2505 | 0 | double video_pos = rce.expected_bits / final_bits; |
2506 | 0 | double scale_factor = sqrt( (1 - video_pos) * rcc->num_entries ); |
2507 | 0 | abr_buffer *= 0.5 * X264_MAX( scale_factor, 0.5 ); |
2508 | 0 | } |
2509 | |
|
2510 | 0 | diff = predicted_bits - rce.expected_bits; |
2511 | 0 | q = rce.new_qscale; |
2512 | 0 | q /= x264_clip3f((abr_buffer - diff) / abr_buffer, .5, 2); |
2513 | 0 | if( h->i_frame >= rcc->fps && rcc->expected_bits_sum >= 1 ) |
2514 | 0 | { |
2515 | | /* Adjust quant based on the difference between |
2516 | | * achieved and expected bitrate so far */ |
2517 | 0 | double cur_time = (double)h->i_frame / rcc->num_entries; |
2518 | 0 | double w = x264_clip3f( cur_time*100, 0.0, 1.0 ); |
2519 | 0 | q *= pow( (double)total_bits / rcc->expected_bits_sum, w ); |
2520 | 0 | } |
2521 | 0 | rcc->qp_novbv = qscale2qp( q ); |
2522 | 0 | if( rcc->b_vbv ) |
2523 | 0 | { |
2524 | | /* Do not overflow vbv */ |
2525 | 0 | double expected_size = qscale2bits( &rce, q ); |
2526 | 0 | double expected_vbv = rcc->buffer_fill + rcc->buffer_rate - expected_size; |
2527 | 0 | double expected_fullness = rce.expected_vbv / rcc->buffer_size; |
2528 | 0 | double qmax = q*(2 - expected_fullness); |
2529 | 0 | double size_constraint = 1 + expected_fullness; |
2530 | 0 | qmax = X264_MAX( qmax, rce.new_qscale ); |
2531 | 0 | if( expected_fullness < .05 ) |
2532 | 0 | qmax = lmax; |
2533 | 0 | qmax = X264_MIN(qmax, lmax); |
2534 | 0 | while( ((expected_vbv < rce.expected_vbv/size_constraint) && (q < qmax)) || |
2535 | 0 | ((expected_vbv < 0) && (q < lmax))) |
2536 | 0 | { |
2537 | 0 | q *= 1.05; |
2538 | 0 | expected_size = qscale2bits(&rce, q); |
2539 | 0 | expected_vbv = rcc->buffer_fill + rcc->buffer_rate - expected_size; |
2540 | 0 | } |
2541 | 0 | rcc->last_satd = x264_rc_analyse_slice( h ); |
2542 | 0 | } |
2543 | 0 | q = x264_clip3f( q, lmin, lmax ); |
2544 | 0 | } |
2545 | 0 | else /* 1pass ABR */ |
2546 | 0 | { |
2547 | | /* Calculate the quantizer which would have produced the desired |
2548 | | * average bitrate if it had been applied to all frames so far. |
2549 | | * Then modulate that quant based on the current frame's complexity |
2550 | | * relative to the average complexity so far (using the 2pass RCEQ). |
2551 | | * Then bias the quant up or down if total size so far was far from |
2552 | | * the target. |
2553 | | * Result: Depending on the value of rate_tolerance, there is a |
2554 | | * tradeoff between quality and bitrate precision. But at large |
2555 | | * tolerances, the bit distribution approaches that of 2pass. */ |
2556 | |
|
2557 | 0 | double wanted_bits, overflow = 1; |
2558 | |
|
2559 | 0 | rcc->last_satd = x264_rc_analyse_slice( h ); |
2560 | 0 | rcc->short_term_cplxsum *= 0.5; |
2561 | 0 | rcc->short_term_cplxcount *= 0.5; |
2562 | 0 | rcc->short_term_cplxsum += rcc->last_satd / (CLIP_DURATION(h->fenc->f_duration) / BASE_FRAME_DURATION); |
2563 | 0 | rcc->short_term_cplxcount ++; |
2564 | |
|
2565 | 0 | rce.tex_bits = rcc->last_satd; |
2566 | 0 | rce.blurred_complexity = rcc->short_term_cplxsum / rcc->short_term_cplxcount; |
2567 | 0 | rce.mv_bits = 0; |
2568 | 0 | rce.p_count = rcc->nmb; |
2569 | 0 | rce.i_count = 0; |
2570 | 0 | rce.s_count = 0; |
2571 | 0 | rce.qscale = 1; |
2572 | 0 | rce.pict_type = pict_type; |
2573 | 0 | rce.i_duration = h->fenc->i_duration; |
2574 | |
|
2575 | 0 | if( h->param.rc.i_rc_method == X264_RC_CRF ) |
2576 | 0 | { |
2577 | 0 | q = get_qscale( h, &rce, rcc->rate_factor_constant, h->fenc->i_frame ); |
2578 | 0 | } |
2579 | 0 | else |
2580 | 0 | { |
2581 | 0 | q = get_qscale( h, &rce, rcc->wanted_bits_window / rcc->cplxr_sum, h->fenc->i_frame ); |
2582 | | |
2583 | | /* ABR code can potentially be counterproductive in CBR, so just don't bother. |
2584 | | * Don't run it if the frame complexity is zero either. */ |
2585 | 0 | if( !rcc->b_vbv_min_rate && rcc->last_satd ) |
2586 | 0 | { |
2587 | | // FIXME is it simpler to keep track of wanted_bits in ratecontrol_end? |
2588 | 0 | int i_frame_done = h->i_frame; |
2589 | 0 | double time_done = i_frame_done / rcc->fps; |
2590 | 0 | if( h->param.b_vfr_input && i_frame_done > 0 ) |
2591 | 0 | time_done = ((double)(h->fenc->i_reordered_pts - h->i_reordered_pts_delay)) * h->param.i_timebase_num / h->param.i_timebase_den; |
2592 | 0 | wanted_bits = time_done * rcc->bitrate; |
2593 | 0 | if( wanted_bits > 0 ) |
2594 | 0 | { |
2595 | 0 | abr_buffer *= X264_MAX( 1, sqrt( time_done ) ); |
2596 | 0 | overflow = x264_clip3f( 1.0 + (predicted_bits - wanted_bits) / abr_buffer, .5, 2 ); |
2597 | 0 | q *= overflow; |
2598 | 0 | } |
2599 | 0 | } |
2600 | 0 | } |
2601 | |
|
2602 | 0 | if( pict_type == SLICE_TYPE_I && h->param.i_keyint_max > 1 |
2603 | | /* should test _next_ pict type, but that isn't decided yet */ |
2604 | 0 | && rcc->last_non_b_pict_type != SLICE_TYPE_I ) |
2605 | 0 | { |
2606 | 0 | q = qp2qscale( rcc->accum_p_qp / rcc->accum_p_norm ); |
2607 | 0 | q /= h->param.rc.f_ip_factor; |
2608 | 0 | } |
2609 | 0 | else if( h->i_frame > 0 ) |
2610 | 0 | { |
2611 | 0 | if( h->param.rc.i_rc_method != X264_RC_CRF ) |
2612 | 0 | { |
2613 | | /* Asymmetric clipping, because symmetric would prevent |
2614 | | * overflow control in areas of rapidly oscillating complexity */ |
2615 | 0 | double lmin = rcc->last_qscale_for[pict_type] / rcc->lstep; |
2616 | 0 | double lmax = rcc->last_qscale_for[pict_type] * rcc->lstep; |
2617 | 0 | if( overflow > 1.1 && h->i_frame > 3 ) |
2618 | 0 | lmax *= rcc->lstep; |
2619 | 0 | else if( overflow < 0.9 ) |
2620 | 0 | lmin /= rcc->lstep; |
2621 | |
|
2622 | 0 | q = x264_clip3f(q, lmin, lmax); |
2623 | 0 | } |
2624 | 0 | } |
2625 | 0 | else if( h->param.rc.i_rc_method == X264_RC_CRF && rcc->qcompress != 1 ) |
2626 | 0 | { |
2627 | 0 | q = qp2qscale( ABR_INIT_QP ) / h->param.rc.f_ip_factor; |
2628 | 0 | } |
2629 | 0 | rcc->qp_novbv = qscale2qp( q ); |
2630 | |
|
2631 | 0 | q = vbv_pass1( h, pict_type, q ); |
2632 | 0 | } |
2633 | |
|
2634 | 0 | rcc->last_qscale_for[pict_type] = |
2635 | 0 | rcc->last_qscale = q; |
2636 | |
|
2637 | 0 | if( !(rcc->b_2pass && !rcc->b_vbv) && h->fenc->i_frame == 0 ) |
2638 | 0 | rcc->last_qscale_for[SLICE_TYPE_P] = q * h->param.rc.f_ip_factor; |
2639 | |
|
2640 | 0 | if( rcc->b_2pass ) |
2641 | 0 | rcc->frame_size_planned = qscale2bits( &rce, q ); |
2642 | 0 | else |
2643 | 0 | rcc->frame_size_planned = predict_size( &rcc->pred[h->sh.i_type], q, rcc->last_satd ); |
2644 | | |
2645 | | /* Apply MinCR and buffer fill restrictions */ |
2646 | 0 | if( rcc->b_vbv ) |
2647 | 0 | { |
2648 | 0 | double frame_size_maximum = X264_MIN( rcc->frame_size_maximum, X264_MAX( rcc->buffer_fill, 0.001 ) ); |
2649 | 0 | if( rcc->frame_size_planned > frame_size_maximum ) |
2650 | 0 | { |
2651 | 0 | q *= rcc->frame_size_planned / frame_size_maximum; |
2652 | 0 | rcc->frame_size_planned = frame_size_maximum; |
2653 | 0 | } |
2654 | | |
2655 | | /* Always use up the whole VBV in this case. */ |
2656 | 0 | if( rcc->single_frame_vbv ) |
2657 | 0 | rcc->frame_size_planned = X264_MIN( rcc->buffer_rate, frame_size_maximum ); |
2658 | 0 | } |
2659 | |
|
2660 | 0 | rcc->frame_size_estimated = rcc->frame_size_planned; |
2661 | 0 | return q; |
2662 | 0 | } |
2663 | 0 | } |
2664 | | |
2665 | | static void threads_normalize_predictors( x264_t *h ) |
2666 | 0 | { |
2667 | 0 | double totalsize = 0; |
2668 | 0 | for( int i = 0; i < h->param.i_threads; i++ ) |
2669 | 0 | totalsize += h->thread[i]->rc->slice_size_planned; |
2670 | 0 | double factor = h->rc->frame_size_planned / totalsize; |
2671 | 0 | for( int i = 0; i < h->param.i_threads; i++ ) |
2672 | 0 | h->thread[i]->rc->slice_size_planned *= factor; |
2673 | 0 | } |
2674 | | |
2675 | | void x264_threads_distribute_ratecontrol( x264_t *h ) |
2676 | 0 | { |
2677 | 0 | int row; |
2678 | 0 | x264_ratecontrol_t *rc = h->rc; |
2679 | 0 | x264_emms(); |
2680 | 0 | float qscale = qp2qscale( rc->qpm ); |
2681 | | |
2682 | | /* Initialize row predictors */ |
2683 | 0 | if( h->i_frame == 0 ) |
2684 | 0 | for( int i = 0; i < h->param.i_threads; i++ ) |
2685 | 0 | { |
2686 | 0 | x264_t *t = h->thread[i]; |
2687 | 0 | if( t != h ) |
2688 | 0 | memcpy( t->rc->row_preds, rc->row_preds, sizeof(rc->row_preds) ); |
2689 | 0 | } |
2690 | |
|
2691 | 0 | for( int i = 0; i < h->param.i_threads; i++ ) |
2692 | 0 | { |
2693 | 0 | x264_t *t = h->thread[i]; |
2694 | 0 | if( t != h ) |
2695 | 0 | memcpy( t->rc, rc, offsetof(x264_ratecontrol_t, row_pred) ); |
2696 | 0 | t->rc->row_pred = t->rc->row_preds[h->sh.i_type]; |
2697 | | /* Calculate the planned slice size. */ |
2698 | 0 | if( rc->b_vbv && rc->frame_size_planned ) |
2699 | 0 | { |
2700 | 0 | int size = 0; |
2701 | 0 | for( row = t->i_threadslice_start; row < t->i_threadslice_end; row++ ) |
2702 | 0 | size += h->fdec->i_row_satd[row]; |
2703 | 0 | t->rc->slice_size_planned = predict_size( &rc->pred[h->sh.i_type + (i+1)*5], qscale, size ); |
2704 | 0 | } |
2705 | 0 | else |
2706 | 0 | t->rc->slice_size_planned = 0; |
2707 | 0 | } |
2708 | 0 | if( rc->b_vbv && rc->frame_size_planned ) |
2709 | 0 | { |
2710 | 0 | threads_normalize_predictors( h ); |
2711 | |
|
2712 | 0 | if( rc->single_frame_vbv ) |
2713 | 0 | { |
2714 | | /* Compensate for our max frame error threshold: give more bits (proportionally) to smaller slices. */ |
2715 | 0 | for( int i = 0; i < h->param.i_threads; i++ ) |
2716 | 0 | { |
2717 | 0 | x264_t *t = h->thread[i]; |
2718 | 0 | float max_frame_error = x264_clip3f( 1.0 / (t->i_threadslice_end - t->i_threadslice_start), 0.05, 0.25 ); |
2719 | 0 | t->rc->slice_size_planned += 2 * max_frame_error * rc->frame_size_planned; |
2720 | 0 | } |
2721 | 0 | threads_normalize_predictors( h ); |
2722 | 0 | } |
2723 | |
|
2724 | 0 | for( int i = 0; i < h->param.i_threads; i++ ) |
2725 | 0 | h->thread[i]->rc->frame_size_estimated = h->thread[i]->rc->slice_size_planned; |
2726 | 0 | } |
2727 | 0 | } Unexecuted instantiation: x264_8_threads_distribute_ratecontrol Unexecuted instantiation: x264_10_threads_distribute_ratecontrol |
2728 | | |
2729 | | void x264_threads_merge_ratecontrol( x264_t *h ) |
2730 | 0 | { |
2731 | 0 | x264_ratecontrol_t *rc = h->rc; |
2732 | 0 | x264_emms(); |
2733 | |
|
2734 | 0 | for( int i = 0; i < h->param.i_threads; i++ ) |
2735 | 0 | { |
2736 | 0 | x264_t *t = h->thread[i]; |
2737 | 0 | x264_ratecontrol_t *rct = h->thread[i]->rc; |
2738 | 0 | if( h->param.rc.i_vbv_buffer_size ) |
2739 | 0 | { |
2740 | 0 | int size = 0; |
2741 | 0 | for( int row = t->i_threadslice_start; row < t->i_threadslice_end; row++ ) |
2742 | 0 | size += h->fdec->i_row_satd[row]; |
2743 | 0 | int bits = t->stat.frame.i_mv_bits + t->stat.frame.i_tex_bits + t->stat.frame.i_misc_bits; |
2744 | 0 | int mb_count = (t->i_threadslice_end - t->i_threadslice_start) * h->mb.i_mb_width; |
2745 | 0 | update_predictor( &rc->pred[h->sh.i_type+(i+1)*5], qp2qscale( rct->qpa_rc/mb_count ), size, bits ); |
2746 | 0 | } |
2747 | 0 | if( !i ) |
2748 | 0 | continue; |
2749 | 0 | rc->qpa_rc += rct->qpa_rc; |
2750 | 0 | rc->qpa_aq += rct->qpa_aq; |
2751 | 0 | } |
2752 | 0 | } Unexecuted instantiation: x264_8_threads_merge_ratecontrol Unexecuted instantiation: x264_10_threads_merge_ratecontrol |
2753 | | |
2754 | | void x264_thread_sync_ratecontrol( x264_t *cur, x264_t *prev, x264_t *next ) |
2755 | 0 | { |
2756 | 0 | if( cur != prev ) |
2757 | 0 | { |
2758 | 0 | #define COPY(var) memcpy(&cur->rc->var, &prev->rc->var, sizeof(cur->rc->var)) |
2759 | | /* these vars are updated in x264_ratecontrol_start() |
2760 | | * so copy them from the context that most recently started (prev) |
2761 | | * to the context that's about to start (cur). */ |
2762 | 0 | COPY(accum_p_qp); |
2763 | 0 | COPY(accum_p_norm); |
2764 | 0 | COPY(last_satd); |
2765 | 0 | COPY(last_rceq); |
2766 | 0 | COPY(last_qscale_for); |
2767 | 0 | COPY(last_non_b_pict_type); |
2768 | 0 | COPY(short_term_cplxsum); |
2769 | 0 | COPY(short_term_cplxcount); |
2770 | 0 | COPY(bframes); |
2771 | 0 | COPY(prev_zone); |
2772 | 0 | COPY(mbtree.qpbuf_pos); |
2773 | | /* these vars can be updated by x264_ratecontrol_init_reconfigurable */ |
2774 | 0 | COPY(bitrate); |
2775 | 0 | COPY(buffer_size); |
2776 | 0 | COPY(buffer_rate); |
2777 | 0 | COPY(vbv_max_rate); |
2778 | 0 | COPY(single_frame_vbv); |
2779 | 0 | COPY(cbr_decay); |
2780 | 0 | COPY(rate_factor_constant); |
2781 | 0 | COPY(rate_factor_max_increment); |
2782 | 0 | #undef COPY |
2783 | 0 | } |
2784 | 0 | if( cur != next ) |
2785 | 0 | { |
2786 | 0 | #define COPY(var) next->rc->var = cur->rc->var |
2787 | | /* these vars are updated in x264_ratecontrol_end() |
2788 | | * so copy them from the context that most recently ended (cur) |
2789 | | * to the context that's about to end (next) */ |
2790 | 0 | COPY(cplxr_sum); |
2791 | 0 | COPY(expected_bits_sum); |
2792 | 0 | COPY(filler_bits_sum); |
2793 | 0 | COPY(wanted_bits_window); |
2794 | 0 | COPY(bframe_bits); |
2795 | 0 | COPY(initial_cpb_removal_delay); |
2796 | 0 | COPY(initial_cpb_removal_delay_offset); |
2797 | 0 | COPY(nrt_first_access_unit); |
2798 | 0 | COPY(previous_cpb_final_arrival_time); |
2799 | 0 | #undef COPY |
2800 | 0 | } |
2801 | | //FIXME row_preds[] (not strictly necessary, but would improve prediction) |
2802 | | /* the rest of the variables are either constant or thread-local */ |
2803 | 0 | } Unexecuted instantiation: x264_8_thread_sync_ratecontrol Unexecuted instantiation: x264_10_thread_sync_ratecontrol |
2804 | | |
2805 | | static int find_underflow( x264_t *h, double *fills, int *t0, int *t1, int over ) |
2806 | 0 | { |
2807 | | /* find an interval ending on an overflow or underflow (depending on whether |
2808 | | * we're adding or removing bits), and starting on the earliest frame that |
2809 | | * can influence the buffer fill of that end frame. */ |
2810 | 0 | x264_ratecontrol_t *rcc = h->rc; |
2811 | 0 | const double buffer_min = .1 * rcc->buffer_size; |
2812 | 0 | const double buffer_max = .9 * rcc->buffer_size; |
2813 | 0 | double fill = fills[*t0-1]; |
2814 | 0 | double parity = over ? 1. : -1.; |
2815 | 0 | int start = -1, end = -1; |
2816 | 0 | for( int i = *t0; i < rcc->num_entries; i++ ) |
2817 | 0 | { |
2818 | 0 | fill += (rcc->entry_out[i]->i_cpb_duration * rcc->vbv_max_rate * h->sps->vui.i_num_units_in_tick / h->sps->vui.i_time_scale - |
2819 | 0 | qscale2bits( rcc->entry_out[i], rcc->entry_out[i]->new_qscale )) * parity; |
2820 | 0 | fill = x264_clip3f(fill, 0, rcc->buffer_size); |
2821 | 0 | fills[i] = fill; |
2822 | 0 | if( fill <= buffer_min || i == 0 ) |
2823 | 0 | { |
2824 | 0 | if( end >= 0 ) |
2825 | 0 | break; |
2826 | 0 | start = i; |
2827 | 0 | } |
2828 | 0 | else if( fill >= buffer_max && start >= 0 ) |
2829 | 0 | end = i; |
2830 | 0 | } |
2831 | 0 | *t0 = start; |
2832 | 0 | *t1 = end; |
2833 | 0 | return start >= 0 && end >= 0; |
2834 | 0 | } |
2835 | | |
2836 | | static int fix_underflow( x264_t *h, int t0, int t1, double adjustment, double qscale_min, double qscale_max ) |
2837 | 0 | { |
2838 | 0 | x264_ratecontrol_t *rcc = h->rc; |
2839 | 0 | double qscale_orig, qscale_new; |
2840 | 0 | int adjusted = 0; |
2841 | 0 | if( t0 > 0 ) |
2842 | 0 | t0++; |
2843 | 0 | for( int i = t0; i <= t1; i++ ) |
2844 | 0 | { |
2845 | 0 | qscale_orig = rcc->entry_out[i]->new_qscale; |
2846 | 0 | qscale_orig = x264_clip3f( qscale_orig, qscale_min, qscale_max ); |
2847 | 0 | qscale_new = qscale_orig * adjustment; |
2848 | 0 | qscale_new = x264_clip3f( qscale_new, qscale_min, qscale_max ); |
2849 | 0 | rcc->entry_out[i]->new_qscale = qscale_new; |
2850 | 0 | adjusted = adjusted || (qscale_new != qscale_orig); |
2851 | 0 | } |
2852 | 0 | return adjusted; |
2853 | 0 | } |
2854 | | |
2855 | | static double count_expected_bits( x264_t *h ) |
2856 | 0 | { |
2857 | 0 | x264_ratecontrol_t *rcc = h->rc; |
2858 | 0 | double expected_bits = 0; |
2859 | 0 | for( int i = 0; i < rcc->num_entries; i++ ) |
2860 | 0 | { |
2861 | 0 | ratecontrol_entry_t *rce = rcc->entry_out[i]; |
2862 | 0 | rce->expected_bits = expected_bits; |
2863 | 0 | expected_bits += qscale2bits( rce, rce->new_qscale ); |
2864 | 0 | } |
2865 | 0 | return expected_bits; |
2866 | 0 | } |
2867 | | |
2868 | | static int vbv_pass2( x264_t *h, double all_available_bits ) |
2869 | 0 | { |
2870 | | /* for each interval of buffer_full .. underflow, uniformly increase the qp of all |
2871 | | * frames in the interval until either buffer is full at some intermediate frame or the |
2872 | | * last frame in the interval no longer underflows. Recompute intervals and repeat. |
2873 | | * Then do the converse to put bits back into overflow areas until target size is met */ |
2874 | |
|
2875 | 0 | x264_ratecontrol_t *rcc = h->rc; |
2876 | 0 | double *fills; |
2877 | 0 | double expected_bits = 0; |
2878 | 0 | double adjustment; |
2879 | 0 | double prev_bits = 0; |
2880 | 0 | int t0, t1; |
2881 | 0 | double qscale_min = qp2qscale( h->param.rc.i_qp_min ); |
2882 | 0 | double qscale_max = qp2qscale( h->param.rc.i_qp_max ); |
2883 | 0 | int iterations = 0; |
2884 | 0 | int adj_min, adj_max; |
2885 | 0 | CHECKED_MALLOC( fills, (rcc->num_entries+1)*sizeof(double) ); |
2886 | | |
2887 | 0 | fills++; |
2888 | | |
2889 | | /* adjust overall stream size */ |
2890 | 0 | do |
2891 | 0 | { |
2892 | 0 | iterations++; |
2893 | 0 | prev_bits = expected_bits; |
2894 | |
|
2895 | 0 | if( expected_bits ) |
2896 | 0 | { /* not first iteration */ |
2897 | 0 | adjustment = X264_MAX(X264_MIN(expected_bits / all_available_bits, 0.999), 0.9); |
2898 | 0 | fills[-1] = rcc->buffer_size * h->param.rc.f_vbv_buffer_init; |
2899 | 0 | t0 = 0; |
2900 | | /* fix overflows */ |
2901 | 0 | adj_min = 1; |
2902 | 0 | while( adj_min && find_underflow( h, fills, &t0, &t1, 1 ) ) |
2903 | 0 | { |
2904 | 0 | adj_min = fix_underflow( h, t0, t1, adjustment, qscale_min, qscale_max ); |
2905 | 0 | t0 = t1; |
2906 | 0 | } |
2907 | 0 | } |
2908 | |
|
2909 | 0 | fills[-1] = rcc->buffer_size * (1. - h->param.rc.f_vbv_buffer_init); |
2910 | 0 | t0 = 0; |
2911 | | /* fix underflows -- should be done after overflow, as we'd better undersize target than underflowing VBV */ |
2912 | 0 | adj_max = 1; |
2913 | 0 | while( adj_max && find_underflow( h, fills, &t0, &t1, 0 ) ) |
2914 | 0 | adj_max = fix_underflow( h, t0, t1, 1.001, qscale_min, qscale_max ); |
2915 | |
|
2916 | 0 | expected_bits = count_expected_bits( h ); |
2917 | 0 | } while( (expected_bits < .995*all_available_bits) && ((int64_t)(expected_bits+.5) > (int64_t)(prev_bits+.5)) ); |
2918 | |
|
2919 | 0 | if( !adj_max ) |
2920 | 0 | x264_log( h, X264_LOG_WARNING, "vbv-maxrate issue, qpmax or vbv-maxrate too low\n"); |
2921 | | |
2922 | | /* store expected vbv filling values for tracking when encoding */ |
2923 | 0 | for( int i = 0; i < rcc->num_entries; i++ ) |
2924 | 0 | rcc->entry_out[i]->expected_vbv = rcc->buffer_size - fills[i]; |
2925 | |
|
2926 | 0 | x264_free( fills-1 ); |
2927 | 0 | return 0; |
2928 | 0 | fail: |
2929 | 0 | return -1; |
2930 | 0 | } |
2931 | | |
2932 | | static int init_pass2( x264_t *h ) |
2933 | 0 | { |
2934 | 0 | x264_ratecontrol_t *rcc = h->rc; |
2935 | 0 | uint64_t all_const_bits = 0; |
2936 | 0 | double timescale = (double)h->sps->vui.i_num_units_in_tick / h->sps->vui.i_time_scale; |
2937 | 0 | double duration = 0; |
2938 | 0 | for( int i = 0; i < rcc->num_entries; i++ ) |
2939 | 0 | duration += rcc->entry[i].i_duration; |
2940 | 0 | duration *= timescale; |
2941 | 0 | uint64_t all_available_bits = h->param.rc.i_bitrate * 1000. * duration; |
2942 | 0 | double rate_factor, step_mult; |
2943 | 0 | double qblur = h->param.rc.f_qblur; |
2944 | 0 | double cplxblur = h->param.rc.f_complexity_blur; |
2945 | 0 | const int filter_size = (int)(qblur*4) | 1; |
2946 | 0 | double expected_bits; |
2947 | 0 | double *qscale, *blurred_qscale; |
2948 | 0 | double base_cplx = h->mb.i_mb_count * (h->param.i_bframe ? 120 : 80); |
2949 | | |
2950 | | /* find total/average complexity & const_bits */ |
2951 | 0 | for( int i = 0; i < rcc->num_entries; i++ ) |
2952 | 0 | { |
2953 | 0 | ratecontrol_entry_t *rce = &rcc->entry[i]; |
2954 | 0 | all_const_bits += rce->misc_bits; |
2955 | 0 | } |
2956 | |
|
2957 | 0 | if( all_available_bits < all_const_bits) |
2958 | 0 | { |
2959 | 0 | x264_log( h, X264_LOG_ERROR, "requested bitrate is too low. estimated minimum is %d kbps\n", |
2960 | 0 | (int)(all_const_bits * rcc->fps / (rcc->num_entries * 1000.)) ); |
2961 | 0 | return -1; |
2962 | 0 | } |
2963 | | |
2964 | | /* Blur complexities, to reduce local fluctuation of QP. |
2965 | | * We don't blur the QPs directly, because then one very simple frame |
2966 | | * could drag down the QP of a nearby complex frame and give it more |
2967 | | * bits than intended. */ |
2968 | 0 | for( int i = 0; i < rcc->num_entries; i++ ) |
2969 | 0 | { |
2970 | 0 | ratecontrol_entry_t *rce = &rcc->entry[i]; |
2971 | 0 | double weight_sum = 0; |
2972 | 0 | double cplx_sum = 0; |
2973 | 0 | double weight = 1.0; |
2974 | 0 | double gaussian_weight; |
2975 | | /* weighted average of cplx of future frames */ |
2976 | 0 | for( int j = 1; j < cplxblur*2 && j < rcc->num_entries-i; j++ ) |
2977 | 0 | { |
2978 | 0 | ratecontrol_entry_t *rcj = &rcc->entry[i+j]; |
2979 | 0 | double frame_duration = CLIP_DURATION(rcj->i_duration * timescale) / BASE_FRAME_DURATION; |
2980 | 0 | weight *= 1 - pow( (float)rcj->i_count / rcc->nmb, 2 ); |
2981 | 0 | if( weight < .0001 ) |
2982 | 0 | break; |
2983 | 0 | gaussian_weight = weight * exp( -j*j/200.0 ); |
2984 | 0 | weight_sum += gaussian_weight; |
2985 | 0 | cplx_sum += gaussian_weight * (qscale2bits( rcj, 1 ) - rcj->misc_bits) / frame_duration; |
2986 | 0 | } |
2987 | | /* weighted average of cplx of past frames */ |
2988 | 0 | weight = 1.0; |
2989 | 0 | for( int j = 0; j <= cplxblur*2 && j <= i; j++ ) |
2990 | 0 | { |
2991 | 0 | ratecontrol_entry_t *rcj = &rcc->entry[i-j]; |
2992 | 0 | double frame_duration = CLIP_DURATION(rcj->i_duration * timescale) / BASE_FRAME_DURATION; |
2993 | 0 | gaussian_weight = weight * exp( -j*j/200.0 ); |
2994 | 0 | weight_sum += gaussian_weight; |
2995 | 0 | cplx_sum += gaussian_weight * (qscale2bits( rcj, 1 ) - rcj->misc_bits) / frame_duration; |
2996 | 0 | weight *= 1 - pow( (float)rcj->i_count / rcc->nmb, 2 ); |
2997 | 0 | if( weight < .0001 ) |
2998 | 0 | break; |
2999 | 0 | } |
3000 | 0 | rce->blurred_complexity = cplx_sum / weight_sum; |
3001 | 0 | } |
3002 | |
|
3003 | 0 | CHECKED_MALLOC( qscale, sizeof(double)*rcc->num_entries ); |
3004 | 0 | if( filter_size > 1 ) |
3005 | 0 | CHECKED_MALLOC( blurred_qscale, sizeof(double)*rcc->num_entries ); |
3006 | 0 | else |
3007 | 0 | blurred_qscale = qscale; |
3008 | | |
3009 | | /* Search for a factor which, when multiplied by the RCEQ values from |
3010 | | * each frame, adds up to the desired total size. |
3011 | | * There is no exact closed-form solution because of VBV constraints and |
3012 | | * because qscale2bits is not invertible, but we can start with the simple |
3013 | | * approximation of scaling the 1st pass by the ratio of bitrates. |
3014 | | * The search range is probably overkill, but speed doesn't matter here. */ |
3015 | | |
3016 | 0 | expected_bits = 1; |
3017 | 0 | for( int i = 0; i < rcc->num_entries; i++ ) |
3018 | 0 | { |
3019 | 0 | double q = get_qscale(h, &rcc->entry[i], 1.0, i); |
3020 | 0 | expected_bits += qscale2bits(&rcc->entry[i], q); |
3021 | 0 | rcc->last_qscale_for[rcc->entry[i].pict_type] = q; |
3022 | 0 | } |
3023 | 0 | step_mult = all_available_bits / expected_bits; |
3024 | |
|
3025 | 0 | rate_factor = 0; |
3026 | 0 | for( double step = 1E4 * step_mult; step > 1E-7 * step_mult; step *= 0.5) |
3027 | 0 | { |
3028 | 0 | expected_bits = 0; |
3029 | 0 | rate_factor += step; |
3030 | |
|
3031 | 0 | rcc->last_non_b_pict_type = -1; |
3032 | 0 | rcc->last_accum_p_norm = 1; |
3033 | 0 | rcc->accum_p_norm = 0; |
3034 | |
|
3035 | 0 | rcc->last_qscale_for[0] = |
3036 | 0 | rcc->last_qscale_for[1] = |
3037 | 0 | rcc->last_qscale_for[2] = pow( base_cplx, 1 - rcc->qcompress ) / rate_factor; |
3038 | | |
3039 | | /* find qscale */ |
3040 | 0 | for( int i = 0; i < rcc->num_entries; i++ ) |
3041 | 0 | { |
3042 | 0 | qscale[i] = get_qscale( h, &rcc->entry[i], rate_factor, -1 ); |
3043 | 0 | rcc->last_qscale_for[rcc->entry[i].pict_type] = qscale[i]; |
3044 | 0 | } |
3045 | | |
3046 | | /* fixed I/B qscale relative to P */ |
3047 | 0 | for( int i = rcc->num_entries-1; i >= 0; i-- ) |
3048 | 0 | { |
3049 | 0 | qscale[i] = get_diff_limited_q( h, &rcc->entry[i], qscale[i], i ); |
3050 | 0 | assert(qscale[i] >= 0); |
3051 | 0 | } |
3052 | | |
3053 | | /* smooth curve */ |
3054 | 0 | if( filter_size > 1 ) |
3055 | 0 | { |
3056 | 0 | assert( filter_size%2 == 1 ); |
3057 | 0 | for( int i = 0; i < rcc->num_entries; i++ ) |
3058 | 0 | { |
3059 | 0 | ratecontrol_entry_t *rce = &rcc->entry[i]; |
3060 | 0 | double q = 0.0, sum = 0.0; |
3061 | |
|
3062 | 0 | for( int j = 0; j < filter_size; j++ ) |
3063 | 0 | { |
3064 | 0 | int idx = i+j-filter_size/2; |
3065 | 0 | double d = idx-i; |
3066 | 0 | double coeff = qblur==0 ? 1.0 : exp( -d*d/(qblur*qblur) ); |
3067 | 0 | if( idx < 0 || idx >= rcc->num_entries ) |
3068 | 0 | continue; |
3069 | 0 | if( rce->pict_type != rcc->entry[idx].pict_type ) |
3070 | 0 | continue; |
3071 | 0 | q += qscale[idx] * coeff; |
3072 | 0 | sum += coeff; |
3073 | 0 | } |
3074 | 0 | blurred_qscale[i] = q/sum; |
3075 | 0 | } |
3076 | 0 | } |
3077 | | |
3078 | | /* find expected bits */ |
3079 | 0 | for( int i = 0; i < rcc->num_entries; i++ ) |
3080 | 0 | { |
3081 | 0 | ratecontrol_entry_t *rce = &rcc->entry[i]; |
3082 | 0 | rce->new_qscale = clip_qscale( h, rce->pict_type, blurred_qscale[i] ); |
3083 | 0 | assert(rce->new_qscale >= 0); |
3084 | 0 | expected_bits += qscale2bits( rce, rce->new_qscale ); |
3085 | 0 | } |
3086 | |
|
3087 | 0 | if( expected_bits > all_available_bits ) |
3088 | 0 | rate_factor -= step; |
3089 | 0 | } |
3090 | |
|
3091 | 0 | x264_free( qscale ); |
3092 | 0 | if( filter_size > 1 ) |
3093 | 0 | x264_free( blurred_qscale ); |
3094 | |
|
3095 | 0 | if( rcc->b_vbv ) |
3096 | 0 | if( vbv_pass2( h, all_available_bits ) ) |
3097 | 0 | return -1; |
3098 | 0 | expected_bits = count_expected_bits( h ); |
3099 | |
|
3100 | 0 | if( fabs( expected_bits/all_available_bits - 1.0 ) > 0.01 ) |
3101 | 0 | { |
3102 | 0 | double avgq = 0; |
3103 | 0 | for( int i = 0; i < rcc->num_entries; i++ ) |
3104 | 0 | avgq += rcc->entry[i].new_qscale; |
3105 | 0 | avgq = qscale2qp( avgq / rcc->num_entries ); |
3106 | |
|
3107 | 0 | if( expected_bits > all_available_bits || !rcc->b_vbv ) |
3108 | 0 | x264_log( h, X264_LOG_WARNING, "Error: 2pass curve failed to converge\n" ); |
3109 | 0 | x264_log( h, X264_LOG_WARNING, "target: %.2f kbit/s, expected: %.2f kbit/s, avg QP: %.4f\n", |
3110 | 0 | (float)h->param.rc.i_bitrate, |
3111 | 0 | expected_bits * rcc->fps / (rcc->num_entries * 1000.), |
3112 | 0 | avgq ); |
3113 | 0 | if( expected_bits < all_available_bits && avgq < h->param.rc.i_qp_min + 2 ) |
3114 | 0 | { |
3115 | 0 | if( h->param.rc.i_qp_min > 0 ) |
3116 | 0 | x264_log( h, X264_LOG_WARNING, "try reducing target bitrate or reducing qp_min (currently %d)\n", h->param.rc.i_qp_min ); |
3117 | 0 | else |
3118 | 0 | x264_log( h, X264_LOG_WARNING, "try reducing target bitrate\n" ); |
3119 | 0 | } |
3120 | 0 | else if( expected_bits > all_available_bits && avgq > h->param.rc.i_qp_max - 2 ) |
3121 | 0 | { |
3122 | 0 | if( h->param.rc.i_qp_max < QP_MAX ) |
3123 | 0 | x264_log( h, X264_LOG_WARNING, "try increasing target bitrate or increasing qp_max (currently %d)\n", h->param.rc.i_qp_max ); |
3124 | 0 | else |
3125 | 0 | x264_log( h, X264_LOG_WARNING, "try increasing target bitrate\n"); |
3126 | 0 | } |
3127 | 0 | else if( !(rcc->b_2pass && rcc->b_vbv) ) |
3128 | 0 | x264_log( h, X264_LOG_WARNING, "internal error\n" ); |
3129 | 0 | } |
3130 | |
|
3131 | 0 | return 0; |
3132 | 0 | fail: |
3133 | 0 | return -1; |
3134 | 0 | } |