/src/ffmpeg/libavcodec/opus/enc_psy.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Opus encoder |
3 | | * Copyright (c) 2017 Rostislav Pehlivanov <atomnuker@gmail.com> |
4 | | * |
5 | | * This file is part of FFmpeg. |
6 | | * |
7 | | * FFmpeg is free software; you can redistribute it and/or |
8 | | * modify it under the terms of the GNU Lesser General Public |
9 | | * License as published by the Free Software Foundation; either |
10 | | * version 2.1 of the License, or (at your option) any later version. |
11 | | * |
12 | | * FFmpeg is distributed in the hope that it will be useful, |
13 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
15 | | * Lesser General Public License for more details. |
16 | | * |
17 | | * You should have received a copy of the GNU Lesser General Public |
18 | | * License along with FFmpeg; if not, write to the Free Software |
19 | | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
20 | | */ |
21 | | |
22 | | #include <float.h> |
23 | | |
24 | | #include "libavutil/mem.h" |
25 | | #include "enc_psy.h" |
26 | | #include "celt.h" |
27 | | #include "pvq.h" |
28 | | #include "tab.h" |
29 | | #include "libavfilter/window_func.h" |
30 | | |
31 | | static float pvq_band_cost(CeltPVQ *pvq, CeltFrame *f, OpusRangeCoder *rc, int band, |
32 | | float *bits, float lambda) |
33 | 0 | { |
34 | 0 | int i, b = 0; |
35 | 0 | uint32_t cm[2] = { (1 << f->blocks) - 1, (1 << f->blocks) - 1 }; |
36 | 0 | const int band_size = ff_celt_freq_range[band] << f->size; |
37 | 0 | float buf[176 * 2], lowband_scratch[176], norm1[176], norm2[176]; |
38 | 0 | float dist, cost, err_x = 0.0f, err_y = 0.0f; |
39 | 0 | float *X = buf; |
40 | 0 | float *X_orig = f->block[0].coeffs + (ff_celt_freq_bands[band] << f->size); |
41 | 0 | float *Y = (f->channels == 2) ? &buf[176] : NULL; |
42 | 0 | float *Y_orig = f->block[1].coeffs + (ff_celt_freq_bands[band] << f->size); |
43 | 0 | OPUS_RC_CHECKPOINT_SPAWN(rc); |
44 | |
|
45 | 0 | memcpy(X, X_orig, band_size*sizeof(float)); |
46 | 0 | if (Y) |
47 | 0 | memcpy(Y, Y_orig, band_size*sizeof(float)); |
48 | |
|
49 | 0 | f->remaining2 = ((f->framebits << 3) - f->anticollapse_needed) - opus_rc_tell_frac(rc) - 1; |
50 | 0 | if (band <= f->coded_bands - 1) { |
51 | 0 | int curr_balance = f->remaining / FFMIN(3, f->coded_bands - band); |
52 | 0 | b = av_clip_uintp2(FFMIN(f->remaining2 + 1, f->pulses[band] + curr_balance), 14); |
53 | 0 | } |
54 | |
|
55 | 0 | if (f->dual_stereo) { |
56 | 0 | pvq->quant_band(pvq, f, rc, band, X, NULL, band_size, b / 2, f->blocks, NULL, |
57 | 0 | f->size, norm1, 0, 1.0f, lowband_scratch, cm[0]); |
58 | |
|
59 | 0 | pvq->quant_band(pvq, f, rc, band, Y, NULL, band_size, b / 2, f->blocks, NULL, |
60 | 0 | f->size, norm2, 0, 1.0f, lowband_scratch, cm[1]); |
61 | 0 | } else { |
62 | 0 | pvq->quant_band(pvq, f, rc, band, X, Y, band_size, b, f->blocks, NULL, f->size, |
63 | 0 | norm1, 0, 1.0f, lowband_scratch, cm[0] | cm[1]); |
64 | 0 | } |
65 | |
|
66 | 0 | for (i = 0; i < band_size; i++) { |
67 | 0 | err_x += (X[i] - X_orig[i])*(X[i] - X_orig[i]); |
68 | 0 | if (Y) |
69 | 0 | err_y += (Y[i] - Y_orig[i])*(Y[i] - Y_orig[i]); |
70 | 0 | } |
71 | |
|
72 | 0 | dist = sqrtf(err_x) + sqrtf(err_y); |
73 | 0 | cost = OPUS_RC_CHECKPOINT_BITS(rc)/8.0f; |
74 | 0 | *bits += cost; |
75 | |
|
76 | 0 | OPUS_RC_CHECKPOINT_ROLLBACK(rc); |
77 | |
|
78 | 0 | return lambda*dist*cost; |
79 | 0 | } |
80 | | |
81 | | /* Populate metrics without taking into consideration neighbouring steps */ |
82 | | static void step_collect_psy_metrics(OpusPsyContext *s, int index) |
83 | 0 | { |
84 | 0 | int silence = 0, ch, i, j; |
85 | 0 | OpusPsyStep *st = s->steps[index]; |
86 | |
|
87 | 0 | st->index = index; |
88 | |
|
89 | 0 | for (ch = 0; ch < s->avctx->ch_layout.nb_channels; ch++) { |
90 | 0 | const int lap_size = (1 << s->bsize_analysis); |
91 | 0 | for (i = 1; i <= FFMIN(lap_size, index); i++) { |
92 | 0 | const int offset = i*120; |
93 | 0 | AVFrame *cur = ff_bufqueue_peek(s->bufqueue, index - i); |
94 | 0 | memcpy(&s->scratch[offset], cur->extended_data[ch], cur->nb_samples*sizeof(float)); |
95 | 0 | } |
96 | 0 | for (i = 0; i < lap_size; i++) { |
97 | 0 | const int offset = i*120 + lap_size; |
98 | 0 | AVFrame *cur = ff_bufqueue_peek(s->bufqueue, index + i); |
99 | 0 | memcpy(&s->scratch[offset], cur->extended_data[ch], cur->nb_samples*sizeof(float)); |
100 | 0 | } |
101 | |
|
102 | 0 | s->dsp->vector_fmul(s->scratch, s->scratch, s->window[s->bsize_analysis], |
103 | 0 | (OPUS_BLOCK_SIZE(s->bsize_analysis) << 1)); |
104 | |
|
105 | 0 | s->mdct_fn[s->bsize_analysis](s->mdct[s->bsize_analysis], st->coeffs[ch], |
106 | 0 | s->scratch, sizeof(float)); |
107 | |
|
108 | 0 | for (i = 0; i < CELT_MAX_BANDS; i++) |
109 | 0 | st->bands[ch][i] = &st->coeffs[ch][ff_celt_freq_bands[i] << s->bsize_analysis]; |
110 | 0 | } |
111 | |
|
112 | 0 | for (ch = 0; ch < s->avctx->ch_layout.nb_channels; ch++) { |
113 | 0 | for (i = 0; i < CELT_MAX_BANDS; i++) { |
114 | 0 | float avg_c_s, energy = 0.0f, dist_dev = 0.0f; |
115 | 0 | const int range = ff_celt_freq_range[i] << s->bsize_analysis; |
116 | 0 | const float *coeffs = st->bands[ch][i]; |
117 | 0 | for (j = 0; j < range; j++) |
118 | 0 | energy += coeffs[j]*coeffs[j]; |
119 | |
|
120 | 0 | st->energy[ch][i] += sqrtf(energy); |
121 | 0 | silence |= !!st->energy[ch][i]; |
122 | 0 | avg_c_s = energy / range; |
123 | |
|
124 | 0 | for (j = 0; j < range; j++) { |
125 | 0 | const float c_s = coeffs[j]*coeffs[j]; |
126 | 0 | dist_dev += (avg_c_s - c_s)*(avg_c_s - c_s); |
127 | 0 | } |
128 | |
|
129 | 0 | st->tone[ch][i] += sqrtf(dist_dev); |
130 | 0 | } |
131 | 0 | } |
132 | |
|
133 | 0 | st->silence = !silence; |
134 | |
|
135 | 0 | if (s->avctx->ch_layout.nb_channels > 1) { |
136 | 0 | for (i = 0; i < CELT_MAX_BANDS; i++) { |
137 | 0 | float incompat = 0.0f; |
138 | 0 | const float *coeffs1 = st->bands[0][i]; |
139 | 0 | const float *coeffs2 = st->bands[1][i]; |
140 | 0 | const int range = ff_celt_freq_range[i] << s->bsize_analysis; |
141 | 0 | for (j = 0; j < range; j++) |
142 | 0 | incompat += (coeffs1[j] - coeffs2[j])*(coeffs1[j] - coeffs2[j]); |
143 | 0 | st->stereo[i] = sqrtf(incompat); |
144 | 0 | } |
145 | 0 | } |
146 | |
|
147 | 0 | for (ch = 0; ch < s->avctx->ch_layout.nb_channels; ch++) { |
148 | 0 | for (i = 0; i < CELT_MAX_BANDS; i++) { |
149 | 0 | OpusBandExcitation *ex = &s->ex[ch][i]; |
150 | 0 | float bp_e = bessel_filter(&s->bfilter_lo[ch][i], st->energy[ch][i]); |
151 | 0 | bp_e = bessel_filter(&s->bfilter_hi[ch][i], bp_e); |
152 | 0 | bp_e *= bp_e; |
153 | 0 | if (bp_e > ex->excitation) { |
154 | 0 | st->change_amp[ch][i] = bp_e - ex->excitation; |
155 | 0 | st->total_change += st->change_amp[ch][i]; |
156 | 0 | ex->excitation = ex->excitation_init = bp_e; |
157 | 0 | ex->excitation_dist = 0.0f; |
158 | 0 | } |
159 | 0 | if (ex->excitation > 0.0f) { |
160 | 0 | ex->excitation -= av_clipf((1/expf(ex->excitation_dist)), ex->excitation_init/20, ex->excitation_init/1.09); |
161 | 0 | ex->excitation = FFMAX(ex->excitation, 0.0f); |
162 | 0 | ex->excitation_dist += 1.0f; |
163 | 0 | } |
164 | 0 | } |
165 | 0 | } |
166 | 0 | } |
167 | | |
168 | | static void search_for_change_points(OpusPsyContext *s, float tgt_change, |
169 | | int offset_s, int offset_e, int resolution, |
170 | | int level) |
171 | 0 | { |
172 | 0 | int i; |
173 | 0 | float c_change = 0.0f; |
174 | 0 | if ((offset_e - offset_s) <= resolution) |
175 | 0 | return; |
176 | 0 | for (i = offset_s; i < offset_e; i++) { |
177 | 0 | c_change += s->steps[i]->total_change; |
178 | 0 | if (c_change > tgt_change) |
179 | 0 | break; |
180 | 0 | } |
181 | 0 | if (i == offset_e) |
182 | 0 | return; |
183 | 0 | search_for_change_points(s, tgt_change / 2.0f, offset_s, i + 0, resolution, level + 1); |
184 | 0 | s->inflection_points[s->inflection_points_count++] = i; |
185 | 0 | search_for_change_points(s, tgt_change / 2.0f, i + 1, offset_e, resolution, level + 1); |
186 | 0 | } |
187 | | |
188 | | static int flush_silent_frames(OpusPsyContext *s) |
189 | 0 | { |
190 | 0 | int fsize, silent_frames; |
191 | |
|
192 | 0 | for (silent_frames = 0; silent_frames < s->buffered_steps; silent_frames++) |
193 | 0 | if (!s->steps[silent_frames]->silence) |
194 | 0 | break; |
195 | 0 | if (--silent_frames < 0) |
196 | 0 | return 0; |
197 | | |
198 | 0 | for (fsize = CELT_BLOCK_960; fsize > CELT_BLOCK_120; fsize--) { |
199 | 0 | if ((1 << fsize) > silent_frames) |
200 | 0 | continue; |
201 | 0 | s->p.frames = FFMIN(silent_frames / (1 << fsize), 48 >> fsize); |
202 | 0 | s->p.framesize = fsize; |
203 | 0 | return 1; |
204 | 0 | } |
205 | | |
206 | 0 | return 0; |
207 | 0 | } |
208 | | |
209 | | /* Main function which decides frame size and frames per current packet */ |
210 | | static void psy_output_groups(OpusPsyContext *s) |
211 | 0 | { |
212 | 0 | int max_delay_samples = (s->options->max_delay_ms*s->avctx->sample_rate)/1000; |
213 | 0 | int max_bsize = FFMIN(OPUS_SAMPLES_TO_BLOCK_SIZE(max_delay_samples), CELT_BLOCK_960); |
214 | | |
215 | | /* These don't change for now */ |
216 | 0 | s->p.mode = OPUS_MODE_CELT; |
217 | 0 | s->p.bandwidth = OPUS_BANDWIDTH_FULLBAND; |
218 | | |
219 | | /* Flush silent frames ASAP */ |
220 | 0 | if (s->steps[0]->silence && flush_silent_frames(s)) |
221 | 0 | return; |
222 | | |
223 | 0 | s->p.framesize = FFMIN(max_bsize, CELT_BLOCK_960); |
224 | 0 | s->p.frames = 1; |
225 | 0 | } |
226 | | |
227 | | int ff_opus_psy_process(OpusPsyContext *s, OpusPacketInfo *p) |
228 | 0 | { |
229 | 0 | int i; |
230 | 0 | float total_energy_change = 0.0f; |
231 | |
|
232 | 0 | if (s->buffered_steps < s->max_steps && !s->eof) { |
233 | 0 | const int awin = (1 << s->bsize_analysis); |
234 | 0 | if (++s->steps_to_process >= awin) { |
235 | 0 | step_collect_psy_metrics(s, s->buffered_steps - awin + 1); |
236 | 0 | s->steps_to_process = 0; |
237 | 0 | } |
238 | 0 | if ((++s->buffered_steps) < s->max_steps) |
239 | 0 | return 1; |
240 | 0 | } |
241 | | |
242 | 0 | for (i = 0; i < s->buffered_steps; i++) |
243 | 0 | total_energy_change += s->steps[i]->total_change; |
244 | |
|
245 | 0 | search_for_change_points(s, total_energy_change / 2.0f, 0, |
246 | 0 | s->buffered_steps, 1, 0); |
247 | |
|
248 | 0 | psy_output_groups(s); |
249 | |
|
250 | 0 | p->frames = s->p.frames; |
251 | 0 | p->framesize = s->p.framesize; |
252 | 0 | p->mode = s->p.mode; |
253 | 0 | p->bandwidth = s->p.bandwidth; |
254 | |
|
255 | 0 | return 0; |
256 | 0 | } |
257 | | |
258 | | void ff_opus_psy_celt_frame_init(OpusPsyContext *s, CeltFrame *f, int index) |
259 | 0 | { |
260 | 0 | int i, neighbouring_points = 0, start_offset = 0; |
261 | 0 | int radius = (1 << s->p.framesize), step_offset = radius*index; |
262 | 0 | int silence = 1; |
263 | |
|
264 | 0 | f->start_band = (s->p.mode == OPUS_MODE_HYBRID) ? 17 : 0; |
265 | 0 | f->end_band = ff_celt_band_end[s->p.bandwidth]; |
266 | 0 | f->channels = s->avctx->ch_layout.nb_channels; |
267 | 0 | f->size = s->p.framesize; |
268 | |
|
269 | 0 | for (i = 0; i < (1 << f->size); i++) |
270 | 0 | silence &= s->steps[index*(1 << f->size) + i]->silence; |
271 | |
|
272 | 0 | f->silence = silence; |
273 | 0 | if (f->silence) { |
274 | 0 | f->framebits = 0; /* Otherwise the silence flag eats up 16(!) bits */ |
275 | 0 | return; |
276 | 0 | } |
277 | | |
278 | 0 | for (i = 0; i < s->inflection_points_count; i++) { |
279 | 0 | if (s->inflection_points[i] >= step_offset) { |
280 | 0 | start_offset = i; |
281 | 0 | break; |
282 | 0 | } |
283 | 0 | } |
284 | |
|
285 | 0 | for (i = start_offset; i < FFMIN(radius, s->inflection_points_count - start_offset); i++) { |
286 | 0 | if (s->inflection_points[i] < (step_offset + radius)) { |
287 | 0 | neighbouring_points++; |
288 | 0 | } |
289 | 0 | } |
290 | | |
291 | | /* Transient flagging */ |
292 | 0 | f->transient = neighbouring_points > 0; |
293 | 0 | f->blocks = f->transient ? OPUS_BLOCK_SIZE(s->p.framesize)/CELT_OVERLAP : 1; |
294 | | |
295 | | /* Some sane defaults */ |
296 | 0 | f->pfilter = 0; |
297 | 0 | f->pf_gain = 0.5f; |
298 | 0 | f->pf_octave = 2; |
299 | 0 | f->pf_period = 1; |
300 | 0 | f->pf_tapset = 2; |
301 | | |
302 | | /* More sane defaults */ |
303 | 0 | f->tf_select = 0; |
304 | 0 | f->anticollapse = 1; |
305 | 0 | f->alloc_trim = 5; |
306 | 0 | f->skip_band_floor = f->end_band; |
307 | 0 | f->intensity_stereo = f->end_band; |
308 | 0 | f->dual_stereo = 0; |
309 | 0 | f->spread = CELT_SPREAD_NORMAL; |
310 | 0 | memset(f->tf_change, 0, sizeof(int)*CELT_MAX_BANDS); |
311 | 0 | memset(f->alloc_boost, 0, sizeof(int)*CELT_MAX_BANDS); |
312 | 0 | } |
313 | | |
314 | | static void celt_gauge_psy_weight(OpusPsyContext *s, OpusPsyStep **start, |
315 | | CeltFrame *f_out) |
316 | 0 | { |
317 | 0 | int i, f, ch; |
318 | 0 | int frame_size = OPUS_BLOCK_SIZE(s->p.framesize); |
319 | 0 | float rate, frame_bits = 0; |
320 | | |
321 | | /* Used for the global ROTATE flag */ |
322 | 0 | float tonal = 0.0f; |
323 | | |
324 | | /* Pseudo-weights */ |
325 | 0 | float band_score[CELT_MAX_BANDS] = { 0 }; |
326 | 0 | float max_score = 1.0f; |
327 | | |
328 | | /* Pass one - one loop around each band, computing unquant stuff */ |
329 | 0 | for (i = 0; i < CELT_MAX_BANDS; i++) { |
330 | 0 | float weight = 0.0f; |
331 | 0 | float tonal_contrib = 0.0f; |
332 | 0 | for (f = 0; f < (1 << s->p.framesize); f++) { |
333 | 0 | weight = start[f]->stereo[i]; |
334 | 0 | for (ch = 0; ch < s->avctx->ch_layout.nb_channels; ch++) { |
335 | 0 | weight += start[f]->change_amp[ch][i] + start[f]->tone[ch][i] + start[f]->energy[ch][i]; |
336 | 0 | tonal_contrib += start[f]->tone[ch][i]; |
337 | 0 | } |
338 | 0 | } |
339 | 0 | tonal += tonal_contrib; |
340 | 0 | band_score[i] = weight; |
341 | 0 | } |
342 | |
|
343 | 0 | tonal /= (float)CELT_MAX_BANDS; |
344 | |
|
345 | 0 | for (i = 0; i < CELT_MAX_BANDS; i++) { |
346 | 0 | if (band_score[i] > max_score) |
347 | 0 | max_score = band_score[i]; |
348 | 0 | } |
349 | |
|
350 | 0 | for (i = 0; i < CELT_MAX_BANDS; i++) { |
351 | 0 | f_out->alloc_boost[i] = (int)((band_score[i]/max_score)*3.0f); |
352 | 0 | frame_bits += band_score[i]*8.0f; |
353 | 0 | } |
354 | |
|
355 | 0 | tonal /= 1333136.0f; |
356 | 0 | f_out->spread = av_clip_uintp2(lrintf(tonal), 2); |
357 | |
|
358 | 0 | rate = ((float)s->avctx->bit_rate) + frame_bits*frame_size*16; |
359 | 0 | rate *= s->lambda; |
360 | 0 | rate /= s->avctx->sample_rate/frame_size; |
361 | |
|
362 | 0 | f_out->framebits = lrintf(rate); |
363 | 0 | f_out->framebits = FFMIN(f_out->framebits, OPUS_MAX_FRAME_SIZE * 8); |
364 | 0 | f_out->framebits = FFALIGN(f_out->framebits, 8); |
365 | 0 | } |
366 | | |
367 | | static int bands_dist(OpusPsyContext *s, CeltFrame *f, float *total_dist) |
368 | 0 | { |
369 | 0 | int i, tdist = 0.0f; |
370 | 0 | OpusRangeCoder dump; |
371 | |
|
372 | 0 | ff_opus_rc_enc_init(&dump); |
373 | 0 | ff_celt_bitalloc(f, &dump, 1); |
374 | |
|
375 | 0 | for (i = 0; i < CELT_MAX_BANDS; i++) { |
376 | 0 | float bits = 0.0f; |
377 | 0 | float dist = pvq_band_cost(f->pvq, f, &dump, i, &bits, s->lambda); |
378 | 0 | tdist += dist; |
379 | 0 | } |
380 | |
|
381 | 0 | *total_dist = tdist; |
382 | |
|
383 | 0 | return 0; |
384 | 0 | } |
385 | | |
386 | | static void celt_search_for_dual_stereo(OpusPsyContext *s, CeltFrame *f) |
387 | 0 | { |
388 | 0 | float td1, td2; |
389 | 0 | f->dual_stereo = 0; |
390 | |
|
391 | 0 | if (s->avctx->ch_layout.nb_channels < 2) |
392 | 0 | return; |
393 | | |
394 | 0 | bands_dist(s, f, &td1); |
395 | 0 | f->dual_stereo = 1; |
396 | 0 | bands_dist(s, f, &td2); |
397 | |
|
398 | 0 | f->dual_stereo = td2 < td1; |
399 | 0 | s->dual_stereo_used += td2 < td1; |
400 | 0 | } |
401 | | |
402 | | static void celt_search_for_intensity(OpusPsyContext *s, CeltFrame *f) |
403 | 0 | { |
404 | 0 | int i, best_band = CELT_MAX_BANDS - 1; |
405 | 0 | float dist, best_dist = FLT_MAX; |
406 | | /* TODO: fix, make some heuristic up here using the lambda value */ |
407 | 0 | float end_band = 0; |
408 | |
|
409 | 0 | if (s->avctx->ch_layout.nb_channels < 2) |
410 | 0 | return; |
411 | | |
412 | 0 | for (i = f->end_band; i >= end_band; i--) { |
413 | 0 | f->intensity_stereo = i; |
414 | 0 | bands_dist(s, f, &dist); |
415 | 0 | if (best_dist > dist) { |
416 | 0 | best_dist = dist; |
417 | 0 | best_band = i; |
418 | 0 | } |
419 | 0 | } |
420 | |
|
421 | 0 | f->intensity_stereo = best_band; |
422 | 0 | s->avg_is_band = (s->avg_is_band + f->intensity_stereo)/2.0f; |
423 | 0 | } |
424 | | |
425 | | static int celt_search_for_tf(OpusPsyContext *s, OpusPsyStep **start, CeltFrame *f) |
426 | 0 | { |
427 | 0 | int i, j, k, cway, config[2][CELT_MAX_BANDS] = { { 0 } }; |
428 | 0 | float score[2] = { 0 }; |
429 | |
|
430 | 0 | for (cway = 0; cway < 2; cway++) { |
431 | 0 | int mag[2]; |
432 | 0 | int base = f->transient ? 120 : 960; |
433 | |
|
434 | 0 | for (i = 0; i < 2; i++) { |
435 | 0 | int c = ff_celt_tf_select[f->size][f->transient][cway][i]; |
436 | 0 | mag[i] = c < 0 ? base >> FFABS(c) : base << FFABS(c); |
437 | 0 | } |
438 | |
|
439 | 0 | for (i = 0; i < CELT_MAX_BANDS; i++) { |
440 | 0 | float iscore0 = 0.0f; |
441 | 0 | float iscore1 = 0.0f; |
442 | 0 | for (j = 0; j < (1 << f->size); j++) { |
443 | 0 | for (k = 0; k < s->avctx->ch_layout.nb_channels; k++) { |
444 | 0 | iscore0 += start[j]->tone[k][i]*start[j]->change_amp[k][i]/mag[0]; |
445 | 0 | iscore1 += start[j]->tone[k][i]*start[j]->change_amp[k][i]/mag[1]; |
446 | 0 | } |
447 | 0 | } |
448 | 0 | config[cway][i] = FFABS(iscore0 - 1.0f) < FFABS(iscore1 - 1.0f); |
449 | 0 | score[cway] += config[cway][i] ? iscore1 : iscore0; |
450 | 0 | } |
451 | 0 | } |
452 | |
|
453 | 0 | f->tf_select = score[0] < score[1]; |
454 | 0 | memcpy(f->tf_change, config[f->tf_select], sizeof(int)*CELT_MAX_BANDS); |
455 | |
|
456 | 0 | return 0; |
457 | 0 | } |
458 | | |
459 | | int ff_opus_psy_celt_frame_process(OpusPsyContext *s, CeltFrame *f, int index) |
460 | 0 | { |
461 | 0 | int start_transient_flag = f->transient; |
462 | 0 | OpusPsyStep **start = &s->steps[index * (1 << s->p.framesize)]; |
463 | |
|
464 | 0 | if (f->silence) |
465 | 0 | return 0; |
466 | | |
467 | 0 | celt_gauge_psy_weight(s, start, f); |
468 | 0 | celt_search_for_intensity(s, f); |
469 | 0 | celt_search_for_dual_stereo(s, f); |
470 | 0 | celt_search_for_tf(s, start, f); |
471 | |
|
472 | 0 | if (f->transient != start_transient_flag) { |
473 | 0 | f->blocks = f->transient ? OPUS_BLOCK_SIZE(s->p.framesize)/CELT_OVERLAP : 1; |
474 | 0 | return 1; |
475 | 0 | } |
476 | | |
477 | 0 | return 0; |
478 | 0 | } |
479 | | |
480 | | void ff_opus_psy_postencode_update(OpusPsyContext *s, CeltFrame *f) |
481 | 0 | { |
482 | 0 | int i, frame_size = OPUS_BLOCK_SIZE(s->p.framesize); |
483 | 0 | int steps_out = s->p.frames*(frame_size/120); |
484 | 0 | void *tmp[FF_BUFQUEUE_SIZE]; |
485 | 0 | float ideal_fbits; |
486 | |
|
487 | 0 | for (i = 0; i < steps_out; i++) |
488 | 0 | memset(s->steps[i], 0, sizeof(OpusPsyStep)); |
489 | |
|
490 | 0 | for (i = 0; i < s->max_steps; i++) |
491 | 0 | tmp[i] = s->steps[i]; |
492 | |
|
493 | 0 | for (i = 0; i < s->max_steps; i++) { |
494 | 0 | const int i_new = i - steps_out; |
495 | 0 | s->steps[i_new < 0 ? s->max_steps + i_new : i_new] = tmp[i]; |
496 | 0 | } |
497 | |
|
498 | 0 | for (i = steps_out; i < s->buffered_steps; i++) |
499 | 0 | s->steps[i]->index -= steps_out; |
500 | |
|
501 | 0 | ideal_fbits = s->avctx->bit_rate/(s->avctx->sample_rate/frame_size); |
502 | |
|
503 | 0 | for (i = 0; i < s->p.frames; i++) { |
504 | 0 | s->avg_is_band += f[i].intensity_stereo; |
505 | 0 | s->lambda *= ideal_fbits / f[i].framebits; |
506 | 0 | } |
507 | |
|
508 | 0 | s->avg_is_band /= (s->p.frames + 1); |
509 | |
|
510 | 0 | s->steps_to_process = 0; |
511 | 0 | s->buffered_steps -= steps_out; |
512 | 0 | s->total_packets_out += s->p.frames; |
513 | 0 | s->inflection_points_count = 0; |
514 | 0 | } |
515 | | |
516 | | av_cold int ff_opus_psy_init(OpusPsyContext *s, AVCodecContext *avctx, |
517 | | struct FFBufQueue *bufqueue, OpusEncOptions *options) |
518 | 0 | { |
519 | 0 | int i, ch, ret; |
520 | |
|
521 | 0 | s->lambda = 1.0f; |
522 | 0 | s->options = options; |
523 | 0 | s->avctx = avctx; |
524 | 0 | s->bufqueue = bufqueue; |
525 | 0 | s->max_steps = ceilf(s->options->max_delay_ms/2.5f); |
526 | 0 | s->bsize_analysis = CELT_BLOCK_960; |
527 | 0 | s->avg_is_band = CELT_MAX_BANDS - 1; |
528 | 0 | s->inflection_points_count = 0; |
529 | |
|
530 | 0 | s->inflection_points = av_mallocz(sizeof(*s->inflection_points)*s->max_steps); |
531 | 0 | if (!s->inflection_points) { |
532 | 0 | ret = AVERROR(ENOMEM); |
533 | 0 | goto fail; |
534 | 0 | } |
535 | | |
536 | 0 | s->dsp = avpriv_float_dsp_alloc(avctx->flags & AV_CODEC_FLAG_BITEXACT); |
537 | 0 | if (!s->dsp) { |
538 | 0 | ret = AVERROR(ENOMEM); |
539 | 0 | goto fail; |
540 | 0 | } |
541 | | |
542 | 0 | for (ch = 0; ch < s->avctx->ch_layout.nb_channels; ch++) { |
543 | 0 | for (i = 0; i < CELT_MAX_BANDS; i++) { |
544 | 0 | bessel_init(&s->bfilter_hi[ch][i], 1.0f, 19.0f, 100.0f, 1); |
545 | 0 | bessel_init(&s->bfilter_lo[ch][i], 1.0f, 20.0f, 100.0f, 0); |
546 | 0 | } |
547 | 0 | } |
548 | |
|
549 | 0 | for (i = 0; i < s->max_steps; i++) { |
550 | 0 | s->steps[i] = av_mallocz(sizeof(OpusPsyStep)); |
551 | 0 | if (!s->steps[i]) { |
552 | 0 | ret = AVERROR(ENOMEM); |
553 | 0 | goto fail; |
554 | 0 | } |
555 | 0 | } |
556 | | |
557 | 0 | for (i = 0; i < CELT_BLOCK_NB; i++) { |
558 | 0 | float tmp; |
559 | 0 | const int len = OPUS_BLOCK_SIZE(i); |
560 | 0 | const float scale = 68 << (CELT_BLOCK_NB - 1 - i); |
561 | 0 | s->window[i] = av_malloc(2*len*sizeof(float)); |
562 | 0 | if (!s->window[i]) { |
563 | 0 | ret = AVERROR(ENOMEM); |
564 | 0 | goto fail; |
565 | 0 | } |
566 | 0 | generate_window_func(s->window[i], 2*len, WFUNC_SINE, &tmp); |
567 | 0 | ret = av_tx_init(&s->mdct[i], &s->mdct_fn[i], AV_TX_FLOAT_MDCT, |
568 | 0 | 0, 15 << (i + 3), &scale, 0); |
569 | 0 | if (ret < 0) |
570 | 0 | goto fail; |
571 | 0 | } |
572 | | |
573 | 0 | return 0; |
574 | | |
575 | 0 | fail: |
576 | 0 | av_freep(&s->inflection_points); |
577 | 0 | av_freep(&s->dsp); |
578 | |
|
579 | 0 | for (i = 0; i < CELT_BLOCK_NB; i++) { |
580 | 0 | av_tx_uninit(&s->mdct[i]); |
581 | 0 | av_freep(&s->window[i]); |
582 | 0 | } |
583 | |
|
584 | 0 | for (i = 0; i < s->max_steps; i++) |
585 | 0 | av_freep(&s->steps[i]); |
586 | |
|
587 | 0 | return ret; |
588 | 0 | } |
589 | | |
590 | | void ff_opus_psy_signal_eof(OpusPsyContext *s) |
591 | 0 | { |
592 | 0 | s->eof = 1; |
593 | 0 | } |
594 | | |
595 | | av_cold int ff_opus_psy_end(OpusPsyContext *s) |
596 | 0 | { |
597 | 0 | int i; |
598 | |
|
599 | 0 | av_freep(&s->inflection_points); |
600 | 0 | av_freep(&s->dsp); |
601 | |
|
602 | 0 | for (i = 0; i < CELT_BLOCK_NB; i++) { |
603 | 0 | av_tx_uninit(&s->mdct[i]); |
604 | 0 | av_freep(&s->window[i]); |
605 | 0 | } |
606 | |
|
607 | 0 | for (i = 0; i < s->max_steps; i++) |
608 | 0 | av_freep(&s->steps[i]); |
609 | |
|
610 | 0 | av_log(s->avctx, AV_LOG_INFO, "Average Intensity Stereo band: %0.1f\n", s->avg_is_band); |
611 | 0 | av_log(s->avctx, AV_LOG_INFO, "Dual Stereo used: %0.2f%%\n", ((float)s->dual_stereo_used/s->total_packets_out)*100.0f); |
612 | |
|
613 | 0 | return 0; |
614 | 0 | } |