/src/ffmpeg/libavcodec/aaccoder_twoloop.h
Line | Count | Source |
1 | | /* |
2 | | * AAC encoder twoloop coder |
3 | | * Copyright (C) 2008-2009 Konstantin Shishkov |
4 | | * |
5 | | * This file is part of FFmpeg. |
6 | | * |
7 | | * FFmpeg is free software; you can redistribute it and/or |
8 | | * modify it under the terms of the GNU Lesser General Public |
9 | | * License as published by the Free Software Foundation; either |
10 | | * version 2.1 of the License, or (at your option) any later version. |
11 | | * |
12 | | * FFmpeg is distributed in the hope that it will be useful, |
13 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
15 | | * Lesser General Public License for more details. |
16 | | * |
17 | | * You should have received a copy of the GNU Lesser General Public |
18 | | * License along with FFmpeg; if not, write to the Free Software |
19 | | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
20 | | */ |
21 | | |
22 | | /** |
23 | | * @file |
24 | | * AAC encoder twoloop coder |
25 | | * @author Konstantin Shishkov, Claudio Freire |
26 | | */ |
27 | | |
28 | | /** |
29 | | * This file contains a template for the twoloop coder function. |
30 | | * It needs to be provided, externally, as an already included declaration, |
31 | | * the following functions from aacenc_quantization/util.h. They're not included |
32 | | * explicitly here to make it possible to provide alternative implementations: |
33 | | * - quantize_band_cost |
34 | | * - abs_pow34_v |
35 | | * - find_max_val |
36 | | * - find_min_book |
37 | | * - find_form_factor |
38 | | */ |
39 | | |
40 | | #ifndef AVCODEC_AACCODER_TWOLOOP_H |
41 | | #define AVCODEC_AACCODER_TWOLOOP_H |
42 | | |
43 | | #include <float.h> |
44 | | #include "libavutil/mathematics.h" |
45 | | #include "mathops.h" |
46 | | #include "avcodec.h" |
47 | | #include "put_bits.h" |
48 | | #include "aac.h" |
49 | | #include "aacenc.h" |
50 | | #include "aactab.h" |
51 | | #include "aacenctab.h" |
52 | | |
53 | | /** Frequency in Hz for lower limit of noise substitution **/ |
54 | 0 | #define NOISE_LOW_LIMIT 4000 |
55 | | |
56 | | /* Reflects the cost to change codebooks */ |
57 | | static inline int ff_pns_bits(SingleChannelElement *sce, int w, int g) |
58 | 0 | { |
59 | 0 | return (!g || !sce->zeroes[w*16+g-1] || !sce->can_pns[w*16+g-1]) ? 9 : 5; |
60 | 0 | } |
61 | | |
62 | | /** |
63 | | * two-loop quantizers search taken from ISO 13818-7 Appendix C |
64 | | */ |
65 | | static void search_for_quantizers_twoloop(AVCodecContext *avctx, |
66 | | AACEncContext *s, |
67 | | SingleChannelElement *sce, |
68 | | const float lambda) |
69 | 0 | { |
70 | 0 | int start = 0, i, w, w2, g, recomprd; |
71 | 0 | int destbits = avctx->bit_rate * 1024.0 / avctx->sample_rate |
72 | 0 | / ((avctx->flags & AV_CODEC_FLAG_QSCALE) ? 2.0f : avctx->ch_layout.nb_channels) |
73 | 0 | * (lambda / 120.f); |
74 | 0 | int refbits = destbits; |
75 | 0 | int toomanybits, toofewbits; |
76 | 0 | char nzs[128]; |
77 | 0 | uint8_t nextband[128]; |
78 | 0 | int maxsf[128], minsf[128]; |
79 | 0 | float dists[128] = { 0 }, qenergies[128] = { 0 }, uplims[128], euplims[128], energies[128]; |
80 | 0 | float maxvals[128], spread_thr_r[128]; |
81 | 0 | float min_spread_thr_r, max_spread_thr_r; |
82 | | |
83 | | /** |
84 | | * rdlambda controls the maximum tolerated distortion. Twoloop |
85 | | * will keep iterating until it fails to lower it or it reaches |
86 | | * ulimit * rdlambda. Keeping it low increases quality on difficult |
87 | | * signals, but lower it too much, and bits will be taken from weak |
88 | | * signals, creating "holes". A balance is necessary. |
89 | | * rdmax and rdmin specify the relative deviation from rdlambda |
90 | | * allowed for tonality compensation |
91 | | */ |
92 | 0 | float rdlambda = av_clipf(2.0f * 120.f / lambda, 0.0625f, 16.0f); |
93 | 0 | const float nzslope = 1.5f; |
94 | 0 | float rdmin = 0.03125f; |
95 | 0 | float rdmax = 1.0f; |
96 | | |
97 | | /** |
98 | | * sfoffs controls an offset of optmium allocation that will be |
99 | | * applied based on lambda. Keep it real and modest, the loop |
100 | | * will take care of the rest, this just accelerates convergence |
101 | | */ |
102 | 0 | float sfoffs = av_clipf(log2f(120.0f / lambda) * 4.0f, -5, 10); |
103 | |
|
104 | 0 | int fflag, minscaler, nminscaler; |
105 | 0 | int its = 0; |
106 | 0 | int maxits = 30; |
107 | 0 | int allz = 0; |
108 | 0 | int tbits; |
109 | 0 | int cutoff = 1024; |
110 | 0 | int pns_start_pos; |
111 | 0 | int prev; |
112 | | |
113 | | /** |
114 | | * zeroscale controls a multiplier of the threshold, if band energy |
115 | | * is below this, a zero is forced. Keep it lower than 1, unless |
116 | | * low lambda is used, because energy < threshold doesn't mean there's |
117 | | * no audible signal outright, it's just energy. Also make it rise |
118 | | * slower than rdlambda, as rdscale has due compensation with |
119 | | * noisy band depriorization below, whereas zeroing logic is rather dumb |
120 | | */ |
121 | 0 | float zeroscale; |
122 | 0 | if (lambda > 120.f) { |
123 | 0 | zeroscale = av_clipf(powf(120.f / lambda, 0.25f), 0.0625f, 1.0f); |
124 | 0 | } else { |
125 | 0 | zeroscale = 1.f; |
126 | 0 | } |
127 | |
|
128 | 0 | if (s->psy.bitres.alloc >= 0) { |
129 | | /** |
130 | | * Psy granted us extra bits to use, from the reservoire |
131 | | * adjust for lambda except what psy already did |
132 | | */ |
133 | 0 | destbits = s->psy.bitres.alloc |
134 | 0 | * (lambda / (avctx->global_quality ? avctx->global_quality : 120)); |
135 | 0 | } |
136 | |
|
137 | 0 | if (avctx->flags & AV_CODEC_FLAG_QSCALE) { |
138 | | /** |
139 | | * Constant Q-scale doesn't compensate MS coding on its own |
140 | | * No need to be overly precise, this only controls RD |
141 | | * adjustment CB limits when going overboard |
142 | | */ |
143 | 0 | if (s->options.mid_side && s->cur_type == TYPE_CPE) |
144 | 0 | destbits *= 2; |
145 | | |
146 | | /** |
147 | | * When using a constant Q-scale, don't adjust bits, just use RD |
148 | | * Don't let it go overboard, though... 8x psy target is enough |
149 | | */ |
150 | 0 | toomanybits = 5800; |
151 | 0 | toofewbits = destbits / 16; |
152 | | |
153 | | /** Don't offset scalers, just RD */ |
154 | 0 | sfoffs = sce->ics.num_windows - 1; |
155 | 0 | rdlambda = sqrtf(rdlambda); |
156 | | |
157 | | /** search further */ |
158 | 0 | maxits *= 2; |
159 | 0 | } else { |
160 | | /* When using ABR, be strict, but a reasonable leeway is |
161 | | * critical to allow RC to smoothly track desired bitrate |
162 | | * without sudden quality drops that cause audible artifacts. |
163 | | * Symmetry is also desirable, to avoid systematic bias. |
164 | | */ |
165 | 0 | toomanybits = destbits + destbits/8; |
166 | 0 | toofewbits = destbits - destbits/8; |
167 | |
|
168 | 0 | sfoffs = 0; |
169 | 0 | rdlambda = sqrtf(rdlambda); |
170 | 0 | } |
171 | | |
172 | | /** and zero out above cutoff frequency */ |
173 | 0 | { |
174 | 0 | int wlen = 1024 / sce->ics.num_windows; |
175 | 0 | int bandwidth; |
176 | | |
177 | | /** |
178 | | * Scale, psy gives us constant quality, this LP only scales |
179 | | * bitrate by lambda, so we save bits on subjectively unimportant HF |
180 | | * rather than increase quantization noise. Adjust nominal bitrate |
181 | | * to effective bitrate according to encoding parameters, |
182 | | * AAC_CUTOFF_FROM_BITRATE is calibrated for effective bitrate. |
183 | | */ |
184 | 0 | float rate_bandwidth_multiplier = 1.5f; |
185 | 0 | int frame_bit_rate = (avctx->flags & AV_CODEC_FLAG_QSCALE) |
186 | 0 | ? (refbits * rate_bandwidth_multiplier * avctx->sample_rate / 1024) |
187 | 0 | : (avctx->bit_rate / avctx->ch_layout.nb_channels); |
188 | | |
189 | | /** Compensate for extensions that increase efficiency */ |
190 | 0 | if (s->options.pns || s->options.intensity_stereo) |
191 | 0 | frame_bit_rate *= 1.15f; |
192 | |
|
193 | 0 | if (avctx->cutoff > 0) { |
194 | 0 | bandwidth = avctx->cutoff; |
195 | 0 | } else { |
196 | 0 | bandwidth = FFMAX(3000, AAC_CUTOFF_FROM_BITRATE(frame_bit_rate, 1, avctx->sample_rate)); |
197 | 0 | s->psy.cutoff = bandwidth; |
198 | 0 | } |
199 | |
|
200 | 0 | cutoff = bandwidth * 2 * wlen / avctx->sample_rate; |
201 | 0 | pns_start_pos = NOISE_LOW_LIMIT * 2 * wlen / avctx->sample_rate; |
202 | 0 | } |
203 | | |
204 | | /** |
205 | | * for values above this the decoder might end up in an endless loop |
206 | | * due to always having more bits than what can be encoded. |
207 | | */ |
208 | 0 | destbits = FFMIN(destbits, 5800); |
209 | 0 | toomanybits = FFMIN(toomanybits, 5800); |
210 | 0 | toofewbits = FFMIN(toofewbits, 5800); |
211 | | /** |
212 | | * XXX: some heuristic to determine initial quantizers will reduce search time |
213 | | * determine zero bands and upper distortion limits |
214 | | */ |
215 | 0 | min_spread_thr_r = -1; |
216 | 0 | max_spread_thr_r = -1; |
217 | 0 | for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) { |
218 | 0 | for (g = start = 0; g < sce->ics.num_swb; start += sce->ics.swb_sizes[g++]) { |
219 | 0 | int nz = 0; |
220 | 0 | float uplim = 0.0f, energy = 0.0f, spread = 0.0f; |
221 | 0 | for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) { |
222 | 0 | FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g]; |
223 | 0 | if (start >= cutoff || band->energy <= (band->threshold * zeroscale) || band->threshold == 0.0f) { |
224 | 0 | sce->zeroes[(w+w2)*16+g] = 1; |
225 | 0 | continue; |
226 | 0 | } |
227 | 0 | nz = 1; |
228 | 0 | } |
229 | 0 | if (!nz) { |
230 | 0 | uplim = 0.0f; |
231 | 0 | } else { |
232 | 0 | nz = 0; |
233 | 0 | for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) { |
234 | 0 | FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g]; |
235 | 0 | if (band->energy <= (band->threshold * zeroscale) || band->threshold == 0.0f) |
236 | 0 | continue; |
237 | 0 | uplim += band->threshold; |
238 | 0 | energy += band->energy; |
239 | 0 | spread += band->spread; |
240 | 0 | nz++; |
241 | 0 | } |
242 | 0 | } |
243 | 0 | uplims[w*16+g] = uplim; |
244 | 0 | energies[w*16+g] = energy; |
245 | 0 | nzs[w*16+g] = nz; |
246 | 0 | sce->zeroes[w*16+g] = !nz; |
247 | 0 | allz |= nz; |
248 | 0 | if (nz && sce->can_pns[w*16+g]) { |
249 | 0 | spread_thr_r[w*16+g] = energy * nz / (uplim * spread); |
250 | 0 | if (min_spread_thr_r < 0) { |
251 | 0 | min_spread_thr_r = max_spread_thr_r = spread_thr_r[w*16+g]; |
252 | 0 | } else { |
253 | 0 | min_spread_thr_r = FFMIN(min_spread_thr_r, spread_thr_r[w*16+g]); |
254 | 0 | max_spread_thr_r = FFMAX(max_spread_thr_r, spread_thr_r[w*16+g]); |
255 | 0 | } |
256 | 0 | } |
257 | 0 | } |
258 | 0 | } |
259 | | |
260 | | /** Compute initial scalers */ |
261 | 0 | minscaler = 65535; |
262 | 0 | for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) { |
263 | 0 | for (g = 0; g < sce->ics.num_swb; g++) { |
264 | 0 | if (sce->zeroes[w*16+g]) { |
265 | 0 | sce->sf_idx[w*16+g] = SCALE_ONE_POS; |
266 | 0 | continue; |
267 | 0 | } |
268 | | /** |
269 | | * log2f-to-distortion ratio is, technically, 2 (1.5db = 4, but it's power vs level so it's 2). |
270 | | * But, as offsets are applied, low-frequency signals are too sensitive to the induced distortion, |
271 | | * so we make scaling more conservative by choosing a lower log2f-to-distortion ratio, and thus |
272 | | * more robust. |
273 | | */ |
274 | 0 | sce->sf_idx[w*16+g] = av_clip( |
275 | 0 | SCALE_ONE_POS |
276 | 0 | + 1.75*log2f(FFMAX(0.00125f,uplims[w*16+g]) / sce->ics.swb_sizes[g]) |
277 | 0 | + sfoffs, |
278 | 0 | 60, SCALE_MAX_POS); |
279 | 0 | minscaler = FFMIN(minscaler, sce->sf_idx[w*16+g]); |
280 | 0 | } |
281 | 0 | } |
282 | | |
283 | | /** Clip */ |
284 | 0 | minscaler = av_clip(minscaler, SCALE_ONE_POS - SCALE_DIV_512, SCALE_MAX_POS - SCALE_DIV_512); |
285 | 0 | for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) |
286 | 0 | for (g = 0; g < sce->ics.num_swb; g++) |
287 | 0 | if (!sce->zeroes[w*16+g]) |
288 | 0 | sce->sf_idx[w*16+g] = av_clip(sce->sf_idx[w*16+g], minscaler, minscaler + SCALE_MAX_DIFF - 1); |
289 | |
|
290 | 0 | if (!allz) |
291 | 0 | return; |
292 | 0 | s->aacdsp.abs_pow34(s->scoefs, sce->coeffs, 1024); |
293 | 0 | ff_quantize_band_cost_cache_init(s); |
294 | |
|
295 | 0 | for (i = 0; i < sizeof(minsf) / sizeof(minsf[0]); ++i) |
296 | 0 | minsf[i] = 0; |
297 | 0 | for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) { |
298 | 0 | start = w*128; |
299 | 0 | for (g = 0; g < sce->ics.num_swb; g++) { |
300 | 0 | const float *scaled = s->scoefs + start; |
301 | 0 | int minsfidx; |
302 | 0 | maxvals[w*16+g] = find_max_val(sce->ics.group_len[w], sce->ics.swb_sizes[g], scaled); |
303 | 0 | if (maxvals[w*16+g] > 0) { |
304 | 0 | minsfidx = coef2minsf(maxvals[w*16+g]); |
305 | 0 | for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) |
306 | 0 | minsf[(w+w2)*16+g] = minsfidx; |
307 | 0 | } |
308 | 0 | start += sce->ics.swb_sizes[g]; |
309 | 0 | } |
310 | 0 | } |
311 | | |
312 | | /** |
313 | | * Scale uplims to match rate distortion to quality |
314 | | * bu applying noisy band depriorization and tonal band prioritization. |
315 | | * Maxval-energy ratio gives us an idea of how noisy/tonal the band is. |
316 | | * If maxval^2 ~ energy, then that band is mostly noise, and we can relax |
317 | | * rate distortion requirements. |
318 | | */ |
319 | 0 | memcpy(euplims, uplims, sizeof(euplims)); |
320 | 0 | for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) { |
321 | | /** psy already prioritizes transients to some extent */ |
322 | 0 | float de_psy_factor = (sce->ics.num_windows > 1) ? 8.0f / sce->ics.group_len[w] : 1.0f; |
323 | 0 | start = w*128; |
324 | 0 | for (g = 0; g < sce->ics.num_swb; g++) { |
325 | 0 | if (nzs[g] > 0) { |
326 | 0 | float cleanup_factor = ff_sqrf(av_clipf(start / (cutoff * 0.75f), 1.0f, 2.0f)); |
327 | 0 | float energy2uplim = find_form_factor( |
328 | 0 | sce->ics.group_len[w], sce->ics.swb_sizes[g], |
329 | 0 | uplims[w*16+g] / (nzs[g] * sce->ics.swb_sizes[w]), |
330 | 0 | sce->coeffs + start, |
331 | 0 | nzslope * cleanup_factor); |
332 | 0 | energy2uplim *= de_psy_factor; |
333 | 0 | if (!(avctx->flags & AV_CODEC_FLAG_QSCALE)) { |
334 | | /** In ABR, we need to prioritize less and let rate control do its thing */ |
335 | 0 | energy2uplim = sqrtf(energy2uplim); |
336 | 0 | } |
337 | 0 | energy2uplim = FFMAX(0.015625f, FFMIN(1.0f, energy2uplim)); |
338 | 0 | uplims[w*16+g] *= av_clipf(rdlambda * energy2uplim, rdmin, rdmax) |
339 | 0 | * sce->ics.group_len[w]; |
340 | |
|
341 | 0 | energy2uplim = find_form_factor( |
342 | 0 | sce->ics.group_len[w], sce->ics.swb_sizes[g], |
343 | 0 | uplims[w*16+g] / (nzs[g] * sce->ics.swb_sizes[w]), |
344 | 0 | sce->coeffs + start, |
345 | 0 | 2.0f); |
346 | 0 | energy2uplim *= de_psy_factor; |
347 | 0 | if (!(avctx->flags & AV_CODEC_FLAG_QSCALE)) { |
348 | | /** In ABR, we need to prioritize less and let rate control do its thing */ |
349 | 0 | energy2uplim = sqrtf(energy2uplim); |
350 | 0 | } |
351 | 0 | energy2uplim = FFMAX(0.015625f, FFMIN(1.0f, energy2uplim)); |
352 | 0 | euplims[w*16+g] *= av_clipf(rdlambda * energy2uplim * sce->ics.group_len[w], |
353 | 0 | 0.5f, 1.0f); |
354 | 0 | } |
355 | 0 | start += sce->ics.swb_sizes[g]; |
356 | 0 | } |
357 | 0 | } |
358 | |
|
359 | 0 | for (i = 0; i < sizeof(maxsf) / sizeof(maxsf[0]); ++i) |
360 | 0 | maxsf[i] = SCALE_MAX_POS; |
361 | | |
362 | | //perform two-loop search |
363 | | //outer loop - improve quality |
364 | 0 | do { |
365 | | //inner loop - quantize spectrum to fit into given number of bits |
366 | 0 | int overdist; |
367 | 0 | int qstep = its ? 1 : 32; |
368 | 0 | do { |
369 | 0 | int changed = 0; |
370 | 0 | prev = -1; |
371 | 0 | recomprd = 0; |
372 | 0 | tbits = 0; |
373 | 0 | for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) { |
374 | 0 | start = w*128; |
375 | 0 | for (g = 0; g < sce->ics.num_swb; g++) { |
376 | 0 | const float *coefs = &sce->coeffs[start]; |
377 | 0 | const float *scaled = &s->scoefs[start]; |
378 | 0 | int bits = 0; |
379 | 0 | int cb; |
380 | 0 | float dist = 0.0f; |
381 | 0 | float qenergy = 0.0f; |
382 | |
|
383 | 0 | if (sce->zeroes[w*16+g] || sce->sf_idx[w*16+g] >= 218) { |
384 | 0 | start += sce->ics.swb_sizes[g]; |
385 | 0 | if (sce->can_pns[w*16+g]) { |
386 | | /** PNS isn't free */ |
387 | 0 | tbits += ff_pns_bits(sce, w, g); |
388 | 0 | } |
389 | 0 | continue; |
390 | 0 | } |
391 | 0 | cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]); |
392 | 0 | for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) { |
393 | 0 | int b; |
394 | 0 | float sqenergy; |
395 | 0 | dist += quantize_band_cost_cached(s, w + w2, g, coefs + w2*128, |
396 | 0 | scaled + w2*128, |
397 | 0 | sce->ics.swb_sizes[g], |
398 | 0 | sce->sf_idx[w*16+g], |
399 | 0 | cb, |
400 | 0 | 1.0f, |
401 | 0 | INFINITY, |
402 | 0 | &b, &sqenergy, |
403 | 0 | 0); |
404 | 0 | bits += b; |
405 | 0 | qenergy += sqenergy; |
406 | 0 | } |
407 | 0 | dists[w*16+g] = dist - bits; |
408 | 0 | qenergies[w*16+g] = qenergy; |
409 | 0 | if (prev != -1) { |
410 | 0 | int sfdiff = av_clip(sce->sf_idx[w*16+g] - prev + SCALE_DIFF_ZERO, 0, 2*SCALE_MAX_DIFF); |
411 | 0 | bits += ff_aac_scalefactor_bits[sfdiff]; |
412 | 0 | } |
413 | 0 | tbits += bits; |
414 | 0 | start += sce->ics.swb_sizes[g]; |
415 | 0 | prev = sce->sf_idx[w*16+g]; |
416 | 0 | } |
417 | 0 | } |
418 | 0 | if (tbits > toomanybits) { |
419 | 0 | recomprd = 1; |
420 | 0 | for (i = 0; i < 128; i++) { |
421 | 0 | if (sce->sf_idx[i] < (SCALE_MAX_POS - SCALE_DIV_512)) { |
422 | 0 | int maxsf_i = (tbits > 5800) ? SCALE_MAX_POS : maxsf[i]; |
423 | 0 | int new_sf = FFMIN(maxsf_i, sce->sf_idx[i] + qstep); |
424 | 0 | if (new_sf != sce->sf_idx[i]) { |
425 | 0 | sce->sf_idx[i] = new_sf; |
426 | 0 | changed = 1; |
427 | 0 | } |
428 | 0 | } |
429 | 0 | } |
430 | 0 | } else if (tbits < toofewbits) { |
431 | 0 | recomprd = 1; |
432 | 0 | for (i = 0; i < 128; i++) { |
433 | 0 | if (sce->sf_idx[i] > SCALE_ONE_POS) { |
434 | 0 | int new_sf = FFMAX3(minsf[i], SCALE_ONE_POS, sce->sf_idx[i] - qstep); |
435 | 0 | if (new_sf != sce->sf_idx[i]) { |
436 | 0 | sce->sf_idx[i] = new_sf; |
437 | 0 | changed = 1; |
438 | 0 | } |
439 | 0 | } |
440 | 0 | } |
441 | 0 | } |
442 | 0 | qstep >>= 1; |
443 | 0 | if (!qstep && tbits > toomanybits && sce->sf_idx[0] < 217 && changed) |
444 | 0 | qstep = 1; |
445 | 0 | } while (qstep); |
446 | |
|
447 | 0 | overdist = 1; |
448 | 0 | fflag = tbits < toofewbits; |
449 | 0 | for (i = 0; i < 2 && (overdist || recomprd); ++i) { |
450 | 0 | if (recomprd) { |
451 | | /** Must recompute distortion */ |
452 | 0 | prev = -1; |
453 | 0 | tbits = 0; |
454 | 0 | for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) { |
455 | 0 | start = w*128; |
456 | 0 | for (g = 0; g < sce->ics.num_swb; g++) { |
457 | 0 | const float *coefs = sce->coeffs + start; |
458 | 0 | const float *scaled = s->scoefs + start; |
459 | 0 | int bits = 0; |
460 | 0 | int cb; |
461 | 0 | float dist = 0.0f; |
462 | 0 | float qenergy = 0.0f; |
463 | |
|
464 | 0 | if (sce->zeroes[w*16+g] || sce->sf_idx[w*16+g] >= 218) { |
465 | 0 | start += sce->ics.swb_sizes[g]; |
466 | 0 | if (sce->can_pns[w*16+g]) { |
467 | | /** PNS isn't free */ |
468 | 0 | tbits += ff_pns_bits(sce, w, g); |
469 | 0 | } |
470 | 0 | continue; |
471 | 0 | } |
472 | 0 | cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]); |
473 | 0 | for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) { |
474 | 0 | int b; |
475 | 0 | float sqenergy; |
476 | 0 | dist += quantize_band_cost_cached(s, w + w2, g, coefs + w2*128, |
477 | 0 | scaled + w2*128, |
478 | 0 | sce->ics.swb_sizes[g], |
479 | 0 | sce->sf_idx[w*16+g], |
480 | 0 | cb, |
481 | 0 | 1.0f, |
482 | 0 | INFINITY, |
483 | 0 | &b, &sqenergy, |
484 | 0 | 0); |
485 | 0 | bits += b; |
486 | 0 | qenergy += sqenergy; |
487 | 0 | } |
488 | 0 | dists[w*16+g] = dist - bits; |
489 | 0 | qenergies[w*16+g] = qenergy; |
490 | 0 | if (prev != -1) { |
491 | 0 | int sfdiff = av_clip(sce->sf_idx[w*16+g] - prev + SCALE_DIFF_ZERO, 0, 2*SCALE_MAX_DIFF); |
492 | 0 | bits += ff_aac_scalefactor_bits[sfdiff]; |
493 | 0 | } |
494 | 0 | tbits += bits; |
495 | 0 | start += sce->ics.swb_sizes[g]; |
496 | 0 | prev = sce->sf_idx[w*16+g]; |
497 | 0 | } |
498 | 0 | } |
499 | 0 | } |
500 | 0 | if (!i && s->options.pns && its > maxits/2 && tbits > toofewbits) { |
501 | 0 | float maxoverdist = 0.0f; |
502 | 0 | float ovrfactor = 1.f+(maxits-its)*16.f/maxits; |
503 | 0 | overdist = recomprd = 0; |
504 | 0 | for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) { |
505 | 0 | for (g = start = 0; g < sce->ics.num_swb; start += sce->ics.swb_sizes[g++]) { |
506 | 0 | if (!sce->zeroes[w*16+g] && sce->sf_idx[w*16+g] > SCALE_ONE_POS && dists[w*16+g] > uplims[w*16+g]*ovrfactor) { |
507 | 0 | float ovrdist = dists[w*16+g] / FFMAX(uplims[w*16+g],euplims[w*16+g]); |
508 | 0 | maxoverdist = FFMAX(maxoverdist, ovrdist); |
509 | 0 | overdist++; |
510 | 0 | } |
511 | 0 | } |
512 | 0 | } |
513 | 0 | if (overdist) { |
514 | | /* We have overdistorted bands, trade for zeroes (that can be noise) |
515 | | * Zero the bands in the lowest 1.25% spread-energy-threshold ranking |
516 | | */ |
517 | 0 | float minspread = max_spread_thr_r; |
518 | 0 | float maxspread = min_spread_thr_r; |
519 | 0 | float zspread; |
520 | 0 | int zeroable = 0; |
521 | 0 | int zeroed = 0; |
522 | 0 | int maxzeroed, zloop; |
523 | 0 | for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) { |
524 | 0 | for (g = start = 0; g < sce->ics.num_swb; start += sce->ics.swb_sizes[g++]) { |
525 | 0 | if (start >= pns_start_pos && !sce->zeroes[w*16+g] && sce->can_pns[w*16+g]) { |
526 | 0 | minspread = FFMIN(minspread, spread_thr_r[w*16+g]); |
527 | 0 | maxspread = FFMAX(maxspread, spread_thr_r[w*16+g]); |
528 | 0 | zeroable++; |
529 | 0 | } |
530 | 0 | } |
531 | 0 | } |
532 | 0 | zspread = (maxspread-minspread) * 0.0125f + minspread; |
533 | | /* Don't PNS everything even if allowed. It suppresses bit starvation signals from RC, |
534 | | * and forced the hand of the later search_for_pns step. |
535 | | * Instead, PNS a fraction of the spread_thr_r range depending on how starved for bits we are, |
536 | | * and leave further PNSing to search_for_pns if worthwhile. |
537 | | */ |
538 | 0 | zspread = FFMIN3(min_spread_thr_r * 8.f, zspread, |
539 | 0 | ((toomanybits - tbits) * min_spread_thr_r + (tbits - toofewbits) * max_spread_thr_r) / (toomanybits - toofewbits + 1)); |
540 | 0 | maxzeroed = FFMIN(zeroable, FFMAX(1, (zeroable * its + maxits - 1) / (2 * maxits))); |
541 | 0 | for (zloop = 0; zloop < 2; zloop++) { |
542 | | /* Two passes: first distorted stuff - two birds in one shot and all that, |
543 | | * then anything viable. Viable means not zero, but either CB=zero-able |
544 | | * (too high SF), not SF <= 1 (that means we'd be operating at very high |
545 | | * quality, we don't want PNS when doing VHQ), PNS allowed, and within |
546 | | * the lowest ranking percentile. |
547 | | */ |
548 | 0 | float loopovrfactor = (zloop) ? 1.0f : ovrfactor; |
549 | 0 | int loopminsf = (zloop) ? (SCALE_ONE_POS - SCALE_DIV_512) : SCALE_ONE_POS; |
550 | 0 | int mcb; |
551 | 0 | for (g = sce->ics.num_swb-1; g > 0 && zeroed < maxzeroed; g--) { |
552 | 0 | if (sce->ics.swb_offset[g] < pns_start_pos) |
553 | 0 | continue; |
554 | 0 | for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) { |
555 | 0 | if (!sce->zeroes[w*16+g] && sce->can_pns[w*16+g] && spread_thr_r[w*16+g] <= zspread |
556 | 0 | && sce->sf_idx[w*16+g] > loopminsf |
557 | 0 | && (dists[w*16+g] > loopovrfactor*uplims[w*16+g] || !(mcb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g])) |
558 | 0 | || (mcb <= 1 && dists[w*16+g] > FFMIN(uplims[w*16+g], euplims[w*16+g]))) ) { |
559 | 0 | sce->zeroes[w*16+g] = 1; |
560 | 0 | sce->band_type[w*16+g] = 0; |
561 | 0 | zeroed++; |
562 | 0 | } |
563 | 0 | } |
564 | 0 | } |
565 | 0 | } |
566 | 0 | if (zeroed) |
567 | 0 | recomprd = fflag = 1; |
568 | 0 | } else { |
569 | 0 | overdist = 0; |
570 | 0 | } |
571 | 0 | } |
572 | 0 | } |
573 | |
|
574 | 0 | minscaler = SCALE_MAX_POS; |
575 | 0 | for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) { |
576 | 0 | for (g = 0; g < sce->ics.num_swb; g++) { |
577 | 0 | if (!sce->zeroes[w*16+g]) { |
578 | 0 | minscaler = FFMIN(minscaler, sce->sf_idx[w*16+g]); |
579 | 0 | } |
580 | 0 | } |
581 | 0 | } |
582 | |
|
583 | 0 | minscaler = nminscaler = av_clip(minscaler, SCALE_ONE_POS - SCALE_DIV_512, SCALE_MAX_POS - SCALE_DIV_512); |
584 | 0 | prev = -1; |
585 | 0 | for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) { |
586 | | /** Start with big steps, end up fine-tunning */ |
587 | 0 | int depth = (its > maxits/2) ? ((its > maxits*2/3) ? 1 : 3) : 10; |
588 | 0 | int edepth = depth+2; |
589 | 0 | float uplmax = its / (maxits*0.25f) + 1.0f; |
590 | 0 | uplmax *= (tbits > destbits) ? FFMIN(2.0f, tbits / (float)FFMAX(1,destbits)) : 1.0f; |
591 | 0 | start = w * 128; |
592 | 0 | for (g = 0; g < sce->ics.num_swb; g++) { |
593 | 0 | int prevsc = sce->sf_idx[w*16+g]; |
594 | 0 | if (prev < 0 && !sce->zeroes[w*16+g]) |
595 | 0 | prev = sce->sf_idx[0]; |
596 | 0 | if (!sce->zeroes[w*16+g]) { |
597 | 0 | const float *coefs = sce->coeffs + start; |
598 | 0 | const float *scaled = s->scoefs + start; |
599 | 0 | int cmb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]); |
600 | 0 | int mindeltasf = FFMAX(0, prev - SCALE_MAX_DIFF); |
601 | 0 | int maxdeltasf = FFMIN(SCALE_MAX_POS - SCALE_DIV_512, prev + SCALE_MAX_DIFF); |
602 | 0 | if ((!cmb || dists[w*16+g] > uplims[w*16+g]) && sce->sf_idx[w*16+g] > FFMAX(mindeltasf, minsf[w*16+g])) { |
603 | | /* Try to make sure there is some energy in every nonzero band |
604 | | * NOTE: This algorithm must be forcibly imbalanced, pushing harder |
605 | | * on holes or more distorted bands at first, otherwise there's |
606 | | * no net gain (since the next iteration will offset all bands |
607 | | * on the opposite direction to compensate for extra bits) |
608 | | */ |
609 | 0 | for (i = 0; i < edepth && sce->sf_idx[w*16+g] > mindeltasf; ++i) { |
610 | 0 | int cb, bits; |
611 | 0 | float dist, qenergy; |
612 | 0 | int mb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]-1); |
613 | 0 | cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]); |
614 | 0 | dist = qenergy = 0.f; |
615 | 0 | bits = 0; |
616 | 0 | if (!cb) { |
617 | 0 | maxsf[w*16+g] = FFMIN(sce->sf_idx[w*16+g]-1, maxsf[w*16+g]); |
618 | 0 | } else if (i >= depth && dists[w*16+g] < euplims[w*16+g]) { |
619 | 0 | break; |
620 | 0 | } |
621 | | /* !g is the DC band, it's important, since quantization error here |
622 | | * applies to less than a cycle, it creates horrible intermodulation |
623 | | * distortion if it doesn't stick to what psy requests |
624 | | */ |
625 | 0 | if (!g && sce->ics.num_windows > 1 && dists[w*16+g] >= euplims[w*16+g]) |
626 | 0 | maxsf[w*16+g] = FFMIN(sce->sf_idx[w*16+g], maxsf[w*16+g]); |
627 | 0 | for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) { |
628 | 0 | int b; |
629 | 0 | float sqenergy; |
630 | 0 | dist += quantize_band_cost_cached(s, w + w2, g, coefs + w2*128, |
631 | 0 | scaled + w2*128, |
632 | 0 | sce->ics.swb_sizes[g], |
633 | 0 | sce->sf_idx[w*16+g]-1, |
634 | 0 | cb, |
635 | 0 | 1.0f, |
636 | 0 | INFINITY, |
637 | 0 | &b, &sqenergy, |
638 | 0 | 0); |
639 | 0 | bits += b; |
640 | 0 | qenergy += sqenergy; |
641 | 0 | } |
642 | 0 | sce->sf_idx[w*16+g]--; |
643 | 0 | dists[w*16+g] = dist - bits; |
644 | 0 | qenergies[w*16+g] = qenergy; |
645 | 0 | if (mb && (sce->sf_idx[w*16+g] < mindeltasf || ( |
646 | 0 | (dists[w*16+g] < FFMIN(uplmax*uplims[w*16+g], euplims[w*16+g])) |
647 | 0 | && (fabsf(qenergies[w*16+g]-energies[w*16+g]) < euplims[w*16+g]) |
648 | 0 | ) )) { |
649 | 0 | break; |
650 | 0 | } |
651 | 0 | } |
652 | 0 | } else if (tbits > toofewbits && sce->sf_idx[w*16+g] < FFMIN(maxdeltasf, maxsf[w*16+g]) |
653 | 0 | && (dists[w*16+g] < FFMIN(euplims[w*16+g], uplims[w*16+g])) |
654 | 0 | && (fabsf(qenergies[w*16+g]-energies[w*16+g]) < euplims[w*16+g]) |
655 | 0 | ) { |
656 | | /** Um... over target. Save bits for more important stuff. */ |
657 | 0 | for (i = 0; i < depth && sce->sf_idx[w*16+g] < maxdeltasf; ++i) { |
658 | 0 | int cb, bits; |
659 | 0 | float dist, qenergy; |
660 | 0 | cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]+1); |
661 | 0 | if (cb > 0) { |
662 | 0 | dist = qenergy = 0.f; |
663 | 0 | bits = 0; |
664 | 0 | for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) { |
665 | 0 | int b; |
666 | 0 | float sqenergy; |
667 | 0 | dist += quantize_band_cost_cached(s, w + w2, g, coefs + w2*128, |
668 | 0 | scaled + w2*128, |
669 | 0 | sce->ics.swb_sizes[g], |
670 | 0 | sce->sf_idx[w*16+g]+1, |
671 | 0 | cb, |
672 | 0 | 1.0f, |
673 | 0 | INFINITY, |
674 | 0 | &b, &sqenergy, |
675 | 0 | 0); |
676 | 0 | bits += b; |
677 | 0 | qenergy += sqenergy; |
678 | 0 | } |
679 | 0 | dist -= bits; |
680 | 0 | if (dist < FFMIN(euplims[w*16+g], uplims[w*16+g])) { |
681 | 0 | sce->sf_idx[w*16+g]++; |
682 | 0 | dists[w*16+g] = dist; |
683 | 0 | qenergies[w*16+g] = qenergy; |
684 | 0 | } else { |
685 | 0 | break; |
686 | 0 | } |
687 | 0 | } else { |
688 | 0 | maxsf[w*16+g] = FFMIN(sce->sf_idx[w*16+g], maxsf[w*16+g]); |
689 | 0 | break; |
690 | 0 | } |
691 | 0 | } |
692 | 0 | } |
693 | 0 | prev = sce->sf_idx[w*16+g] = av_clip(sce->sf_idx[w*16+g], mindeltasf, maxdeltasf); |
694 | 0 | if (sce->sf_idx[w*16+g] != prevsc) |
695 | 0 | fflag = 1; |
696 | 0 | nminscaler = FFMIN(nminscaler, sce->sf_idx[w*16+g]); |
697 | 0 | sce->band_type[w*16+g] = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]); |
698 | 0 | } |
699 | 0 | start += sce->ics.swb_sizes[g]; |
700 | 0 | } |
701 | 0 | } |
702 | | |
703 | | /** SF difference limit violation risk. Must re-clamp. */ |
704 | 0 | prev = -1; |
705 | 0 | for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) { |
706 | 0 | for (g = 0; g < sce->ics.num_swb; g++) { |
707 | 0 | if (!sce->zeroes[w*16+g]) { |
708 | 0 | int prevsf = sce->sf_idx[w*16+g]; |
709 | 0 | if (prev < 0) |
710 | 0 | prev = prevsf; |
711 | 0 | sce->sf_idx[w*16+g] = av_clip(sce->sf_idx[w*16+g], prev - SCALE_MAX_DIFF, prev + SCALE_MAX_DIFF); |
712 | 0 | sce->band_type[w*16+g] = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]); |
713 | 0 | prev = sce->sf_idx[w*16+g]; |
714 | 0 | if (!fflag && prevsf != sce->sf_idx[w*16+g]) |
715 | 0 | fflag = 1; |
716 | 0 | } |
717 | 0 | } |
718 | 0 | } |
719 | |
|
720 | 0 | its++; |
721 | 0 | } while (fflag && its < maxits); |
722 | | |
723 | | /** Scout out next nonzero bands */ |
724 | 0 | ff_init_nextband_map(sce, nextband); |
725 | |
|
726 | 0 | prev = -1; |
727 | 0 | for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) { |
728 | | /** Make sure proper codebooks are set */ |
729 | 0 | for (g = 0; g < sce->ics.num_swb; g++) { |
730 | 0 | if (!sce->zeroes[w*16+g]) { |
731 | 0 | sce->band_type[w*16+g] = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]); |
732 | 0 | if (sce->band_type[w*16+g] <= 0) { |
733 | 0 | if (!ff_sfdelta_can_remove_band(sce, nextband, prev, w*16+g)) { |
734 | | /** Cannot zero out, make sure it's not attempted */ |
735 | 0 | sce->band_type[w*16+g] = 1; |
736 | 0 | } else { |
737 | 0 | sce->zeroes[w*16+g] = 1; |
738 | 0 | sce->band_type[w*16+g] = 0; |
739 | 0 | } |
740 | 0 | } |
741 | 0 | } else { |
742 | 0 | sce->band_type[w*16+g] = 0; |
743 | 0 | } |
744 | | /** Check that there's no SF delta range violations */ |
745 | 0 | if (!sce->zeroes[w*16+g]) { |
746 | 0 | if (prev != -1) { |
747 | 0 | av_unused int sfdiff = sce->sf_idx[w*16+g] - prev + SCALE_DIFF_ZERO; |
748 | 0 | av_assert1(sfdiff >= 0 && sfdiff <= 2*SCALE_MAX_DIFF); |
749 | 0 | } else if (sce->zeroes[0]) { |
750 | | /** Set global gain to something useful */ |
751 | 0 | sce->sf_idx[0] = sce->sf_idx[w*16+g]; |
752 | 0 | } |
753 | 0 | prev = sce->sf_idx[w*16+g]; |
754 | 0 | } |
755 | 0 | } |
756 | 0 | } |
757 | 0 | } |
758 | | |
759 | | #endif /* AVCODEC_AACCODER_TWOLOOP_H */ |