/src/ffmpeg/libavcodec/wmaenc.c
Line | Count | Source |
1 | | /* |
2 | | * WMA compatible encoder |
3 | | * Copyright (c) 2007 Michael Niedermayer |
4 | | * |
5 | | * This file is part of FFmpeg. |
6 | | * |
7 | | * FFmpeg is free software; you can redistribute it and/or |
8 | | * modify it under the terms of the GNU Lesser General Public |
9 | | * License as published by the Free Software Foundation; either |
10 | | * version 2.1 of the License, or (at your option) any later version. |
11 | | * |
12 | | * FFmpeg is distributed in the hope that it will be useful, |
13 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
15 | | * Lesser General Public License for more details. |
16 | | * |
17 | | * You should have received a copy of the GNU Lesser General Public |
18 | | * License along with FFmpeg; if not, write to the Free Software |
19 | | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
20 | | */ |
21 | | |
22 | | #include "config_components.h" |
23 | | |
24 | | #include "libavutil/attributes.h" |
25 | | #include "libavutil/ffmath.h" |
26 | | #include "libavutil/mem.h" |
27 | | |
28 | | #include "avcodec.h" |
29 | | #include "codec_internal.h" |
30 | | #include "encode.h" |
31 | | #include "wma.h" |
32 | | #include "libavutil/avassert.h" |
33 | | |
34 | | |
35 | | static av_cold int encode_init(AVCodecContext *avctx) |
36 | 0 | { |
37 | 0 | WMACodecContext *s = avctx->priv_data; |
38 | 0 | int i, flags1, flags2, block_align; |
39 | 0 | uint8_t *extradata; |
40 | 0 | int ret; |
41 | |
|
42 | 0 | s->avctx = avctx; |
43 | |
|
44 | 0 | if (avctx->ch_layout.nb_channels > MAX_CHANNELS) { |
45 | 0 | av_log(avctx, AV_LOG_ERROR, |
46 | 0 | "too many channels: got %i, need %i or fewer\n", |
47 | 0 | avctx->ch_layout.nb_channels, MAX_CHANNELS); |
48 | 0 | return AVERROR(EINVAL); |
49 | 0 | } |
50 | | |
51 | 0 | if (avctx->sample_rate > 48000) { |
52 | 0 | av_log(avctx, AV_LOG_ERROR, "sample rate is too high: %d > 48kHz\n", |
53 | 0 | avctx->sample_rate); |
54 | 0 | return AVERROR(EINVAL); |
55 | 0 | } |
56 | | |
57 | 0 | if (avctx->bit_rate < 24 * 1000) { |
58 | 0 | av_log(avctx, AV_LOG_ERROR, |
59 | 0 | "bitrate too low: got %"PRId64", need 24000 or higher\n", |
60 | 0 | avctx->bit_rate); |
61 | 0 | return AVERROR(EINVAL); |
62 | 0 | } |
63 | | |
64 | | /* extract flag info */ |
65 | 0 | flags1 = 0; |
66 | 0 | flags2 = 1; |
67 | 0 | if (avctx->codec->id == AV_CODEC_ID_WMAV1) { |
68 | 0 | extradata = av_mallocz(4 + AV_INPUT_BUFFER_PADDING_SIZE); |
69 | 0 | if (!extradata) |
70 | 0 | return AVERROR(ENOMEM); |
71 | 0 | avctx->extradata_size = 4; |
72 | 0 | AV_WL16(extradata, flags1); |
73 | 0 | AV_WL16(extradata + 2, flags2); |
74 | 0 | } else if (avctx->codec->id == AV_CODEC_ID_WMAV2) { |
75 | 0 | extradata = av_mallocz(10 + AV_INPUT_BUFFER_PADDING_SIZE); |
76 | 0 | if (!extradata) |
77 | 0 | return AVERROR(ENOMEM); |
78 | 0 | avctx->extradata_size = 10; |
79 | 0 | AV_WL32(extradata, flags1); |
80 | 0 | AV_WL16(extradata + 4, flags2); |
81 | 0 | } else { |
82 | 0 | av_unreachable("This function is only used with WMAV1/2 encoders"); |
83 | 0 | } |
84 | 0 | avctx->extradata = extradata; |
85 | 0 | s->use_exp_vlc = flags2 & 0x0001; |
86 | 0 | s->use_bit_reservoir = flags2 & 0x0002; |
87 | 0 | s->use_variable_block_len = flags2 & 0x0004; |
88 | 0 | if (avctx->ch_layout.nb_channels == 2) |
89 | 0 | s->ms_stereo = 1; |
90 | |
|
91 | 0 | if ((ret = ff_wma_init(avctx, flags2)) < 0) |
92 | 0 | return ret; |
93 | | |
94 | | /* init MDCT */ |
95 | 0 | for (i = 0; i < s->nb_block_sizes; i++) { |
96 | 0 | float scale = 1.0f; |
97 | 0 | ret = av_tx_init(&s->mdct_ctx[i], &s->mdct_fn[i], AV_TX_FLOAT_MDCT, |
98 | 0 | 0, 1 << (s->frame_len_bits - i), &scale, 0); |
99 | 0 | if (ret < 0) |
100 | 0 | return ret; |
101 | 0 | } |
102 | | |
103 | 0 | block_align = avctx->bit_rate * (int64_t) s->frame_len / |
104 | 0 | (avctx->sample_rate * 8); |
105 | 0 | block_align = FFMIN(block_align, MAX_CODED_SUPERFRAME_SIZE); |
106 | 0 | avctx->block_align = block_align; |
107 | 0 | avctx->frame_size = avctx->initial_padding = s->frame_len; |
108 | |
|
109 | 0 | return 0; |
110 | 0 | } |
111 | | |
112 | | static int apply_window_and_mdct(AVCodecContext *avctx, const AVFrame *frame) |
113 | 0 | { |
114 | 0 | WMACodecContext *s = avctx->priv_data; |
115 | 0 | const float *const *audio = (const float *const *) frame->extended_data; |
116 | 0 | int len = frame->nb_samples; |
117 | 0 | int window_index = s->frame_len_bits - s->block_len_bits; |
118 | 0 | AVTXContext *mdct = s->mdct_ctx[window_index]; |
119 | 0 | av_tx_fn mdct_fn = s->mdct_fn[window_index]; |
120 | 0 | int ch; |
121 | 0 | const float *win = s->windows[window_index]; |
122 | 0 | int window_len = 1 << s->block_len_bits; |
123 | 0 | float n = 2.0 * 32768.0 / window_len; |
124 | |
|
125 | 0 | for (ch = 0; ch < avctx->ch_layout.nb_channels; ch++) { |
126 | 0 | memcpy(s->output, s->frame_out[ch], window_len * sizeof(*s->output)); |
127 | 0 | s->fdsp->vector_fmul_scalar(s->frame_out[ch], audio[ch], n, len); |
128 | 0 | s->fdsp->vector_fmul_reverse(&s->output[window_len], s->frame_out[ch], |
129 | 0 | win, len); |
130 | 0 | s->fdsp->vector_fmul(s->frame_out[ch], s->frame_out[ch], win, len); |
131 | 0 | mdct_fn(mdct, s->coefs[ch], s->output, sizeof(float)); |
132 | 0 | if (!isfinite(s->coefs[ch][0])) { |
133 | 0 | av_log(avctx, AV_LOG_ERROR, "Input contains NaN/+-Inf\n"); |
134 | 0 | return AVERROR(EINVAL); |
135 | 0 | } |
136 | 0 | } |
137 | | |
138 | 0 | return 0; |
139 | 0 | } |
140 | | |
141 | | // FIXME use for decoding too |
142 | | static void init_exp(WMACodecContext *s, int ch, const int *exp_param) |
143 | 0 | { |
144 | 0 | int n; |
145 | 0 | const uint16_t *ptr; |
146 | 0 | float v, *q, max_scale, *q_end; |
147 | |
|
148 | 0 | ptr = s->exponent_bands[s->frame_len_bits - s->block_len_bits]; |
149 | 0 | q = s->exponents[ch]; |
150 | 0 | q_end = q + s->block_len; |
151 | 0 | max_scale = 0; |
152 | 0 | while (q < q_end) { |
153 | | /* XXX: use a table */ |
154 | 0 | v = ff_exp10(*exp_param++ *(1.0 / 16.0)); |
155 | 0 | max_scale = FFMAX(max_scale, v); |
156 | 0 | n = *ptr++; |
157 | 0 | do { |
158 | 0 | *q++ = v; |
159 | 0 | } while (--n); |
160 | 0 | } |
161 | 0 | s->max_exponent[ch] = max_scale; |
162 | 0 | } |
163 | | |
164 | | static void encode_exp_vlc(WMACodecContext *s, int ch, const int *exp_param) |
165 | 0 | { |
166 | 0 | int last_exp; |
167 | 0 | const uint16_t *ptr; |
168 | 0 | float *q, *q_end; |
169 | |
|
170 | 0 | ptr = s->exponent_bands[s->frame_len_bits - s->block_len_bits]; |
171 | 0 | q = s->exponents[ch]; |
172 | 0 | q_end = q + s->block_len; |
173 | 0 | if (s->version == 1) { |
174 | 0 | last_exp = *exp_param++; |
175 | 0 | av_assert0(last_exp - 10 >= 0 && last_exp - 10 < 32); |
176 | 0 | put_bits(&s->pb, 5, last_exp - 10); |
177 | 0 | q += *ptr++; |
178 | 0 | } else |
179 | 0 | last_exp = 36; |
180 | 0 | while (q < q_end) { |
181 | 0 | int exp = *exp_param++; |
182 | 0 | int code = exp - last_exp + 60; |
183 | 0 | av_assert1(code >= 0 && code < 120); |
184 | 0 | put_bits(&s->pb, ff_aac_scalefactor_bits[code], |
185 | 0 | ff_aac_scalefactor_code[code]); |
186 | | /* XXX: use a table */ |
187 | 0 | q += *ptr++; |
188 | 0 | last_exp = exp; |
189 | 0 | } |
190 | 0 | } |
191 | | |
192 | | static int encode_block(WMACodecContext *s, float (*src_coefs)[BLOCK_MAX_SIZE], |
193 | | int total_gain) |
194 | 0 | { |
195 | 0 | int channels = s->avctx->ch_layout.nb_channels; |
196 | 0 | int v, bsize, ch, coef_nb_bits, parse_exponents; |
197 | 0 | float mdct_norm; |
198 | 0 | int nb_coefs[MAX_CHANNELS]; |
199 | 0 | static const int fixed_exp[25] = { |
200 | 0 | 20, 20, 20, 20, 20, |
201 | 0 | 20, 20, 20, 20, 20, |
202 | 0 | 20, 20, 20, 20, 20, |
203 | 0 | 20, 20, 20, 20, 20, |
204 | 0 | 20, 20, 20, 20, 20 |
205 | 0 | }; |
206 | | |
207 | | // FIXME remove duplication relative to decoder |
208 | 0 | if (s->use_variable_block_len) { |
209 | 0 | av_unreachable("use_variable_block_len unimplemented, set to 0 during init"); |
210 | 0 | } else { |
211 | | /* fixed block len */ |
212 | 0 | s->next_block_len_bits = s->frame_len_bits; |
213 | 0 | s->prev_block_len_bits = s->frame_len_bits; |
214 | 0 | s->block_len_bits = s->frame_len_bits; |
215 | 0 | } |
216 | | |
217 | 0 | s->block_len = 1 << s->block_len_bits; |
218 | | // av_assert0((s->block_pos + s->block_len) <= s->frame_len); |
219 | 0 | bsize = s->frame_len_bits - s->block_len_bits; |
220 | | |
221 | | // FIXME factor |
222 | 0 | v = s->coefs_end[bsize] - s->coefs_start; |
223 | 0 | for (ch = 0; ch < channels; ch++) |
224 | 0 | nb_coefs[ch] = v; |
225 | 0 | { |
226 | 0 | int n4 = s->block_len / 2; |
227 | 0 | mdct_norm = 1.0 / (float) n4; |
228 | 0 | if (s->version == 1) |
229 | 0 | mdct_norm *= sqrt(n4); |
230 | 0 | } |
231 | |
|
232 | 0 | if (channels == 2) |
233 | 0 | put_bits(&s->pb, 1, !!s->ms_stereo); |
234 | |
|
235 | 0 | for (ch = 0; ch < channels; ch++) { |
236 | | // FIXME only set channel_coded when needed, instead of always |
237 | 0 | s->channel_coded[ch] = 1; |
238 | 0 | if (s->channel_coded[ch]) |
239 | 0 | init_exp(s, ch, fixed_exp); |
240 | 0 | } |
241 | |
|
242 | 0 | for (ch = 0; ch < channels; ch++) { |
243 | 0 | if (s->channel_coded[ch]) { |
244 | 0 | WMACoef *coefs1; |
245 | 0 | float *coefs, *exponents, mult; |
246 | 0 | int i, n; |
247 | |
|
248 | 0 | coefs1 = s->coefs1[ch]; |
249 | 0 | exponents = s->exponents[ch]; |
250 | 0 | mult = ff_exp10(total_gain * 0.05) / s->max_exponent[ch]; |
251 | 0 | mult *= mdct_norm; |
252 | 0 | coefs = src_coefs[ch]; |
253 | 0 | if (s->use_noise_coding && 0) { |
254 | 0 | av_assert0(0); // FIXME not implemented |
255 | 0 | } else { |
256 | 0 | coefs += s->coefs_start; |
257 | 0 | n = nb_coefs[ch]; |
258 | 0 | for (i = 0; i < n; i++) { |
259 | 0 | double t = *coefs++ / (exponents[i] * mult); |
260 | 0 | if (t < -32768 || t > 32767) |
261 | 0 | return -1; |
262 | | |
263 | 0 | coefs1[i] = lrint(t); |
264 | 0 | } |
265 | 0 | } |
266 | 0 | } |
267 | 0 | } |
268 | | |
269 | 0 | v = 0; |
270 | 0 | for (ch = 0; ch < channels; ch++) { |
271 | 0 | int a = s->channel_coded[ch]; |
272 | 0 | put_bits(&s->pb, 1, a); |
273 | 0 | v |= a; |
274 | 0 | } |
275 | |
|
276 | 0 | if (!v) |
277 | 0 | return 1; |
278 | | |
279 | 0 | for (v = total_gain - 1; v >= 127; v -= 127) |
280 | 0 | put_bits(&s->pb, 7, 127); |
281 | 0 | put_bits(&s->pb, 7, v); |
282 | |
|
283 | 0 | coef_nb_bits = ff_wma_total_gain_to_bits(total_gain); |
284 | |
|
285 | 0 | if (s->use_noise_coding) { |
286 | 0 | for (ch = 0; ch < channels; ch++) { |
287 | 0 | if (s->channel_coded[ch]) { |
288 | 0 | int i, n; |
289 | 0 | n = s->exponent_high_sizes[bsize]; |
290 | 0 | for (i = 0; i < n; i++) { |
291 | 0 | put_bits(&s->pb, 1, s->high_band_coded[ch][i] = 0); |
292 | 0 | if (0) |
293 | 0 | nb_coefs[ch] -= s->exponent_high_bands[bsize][i]; |
294 | 0 | } |
295 | 0 | } |
296 | 0 | } |
297 | 0 | } |
298 | |
|
299 | 0 | parse_exponents = 1; |
300 | 0 | if (s->block_len_bits != s->frame_len_bits) |
301 | 0 | put_bits(&s->pb, 1, parse_exponents); |
302 | |
|
303 | 0 | if (parse_exponents) { |
304 | 0 | for (ch = 0; ch < channels; ch++) { |
305 | 0 | if (s->channel_coded[ch]) { |
306 | 0 | if (s->use_exp_vlc) { |
307 | 0 | encode_exp_vlc(s, ch, fixed_exp); |
308 | 0 | } else { |
309 | 0 | av_unreachable("use_exp_vlc always set to 1 during init"); |
310 | | // FIXME not implemented |
311 | | // encode_exp_lsp(s, ch); |
312 | 0 | } |
313 | 0 | } |
314 | 0 | } |
315 | 0 | } else |
316 | 0 | av_assert0(0); // FIXME not implemented |
317 | | |
318 | 0 | for (ch = 0; ch < channels; ch++) { |
319 | 0 | if (s->channel_coded[ch]) { |
320 | 0 | int run, tindex; |
321 | 0 | WMACoef *ptr, *eptr; |
322 | 0 | tindex = (ch == 1 && s->ms_stereo); |
323 | 0 | ptr = &s->coefs1[ch][0]; |
324 | 0 | eptr = ptr + nb_coefs[ch]; |
325 | |
|
326 | 0 | run = 0; |
327 | 0 | for (; ptr < eptr; ptr++) { |
328 | 0 | if (*ptr) { |
329 | 0 | int level = *ptr; |
330 | 0 | int abs_level = FFABS(level); |
331 | 0 | int code = 0; |
332 | 0 | if (abs_level <= s->coef_vlcs[tindex]->max_level) |
333 | 0 | if (run < s->coef_vlcs[tindex]->levels[abs_level - 1]) |
334 | 0 | code = run + s->int_table[tindex][abs_level - 1]; |
335 | |
|
336 | 0 | av_assert2(code < s->coef_vlcs[tindex]->n); |
337 | 0 | put_bits(&s->pb, s->coef_vlcs[tindex]->huffbits[code], |
338 | 0 | s->coef_vlcs[tindex]->huffcodes[code]); |
339 | |
|
340 | 0 | if (code == 0) { |
341 | 0 | if (1 << coef_nb_bits <= abs_level) |
342 | 0 | return -1; |
343 | | |
344 | 0 | put_bits(&s->pb, coef_nb_bits, abs_level); |
345 | 0 | put_bits(&s->pb, s->frame_len_bits, run); |
346 | 0 | } |
347 | | // FIXME the sign is flipped somewhere |
348 | 0 | put_bits(&s->pb, 1, level < 0); |
349 | 0 | run = 0; |
350 | 0 | } else |
351 | 0 | run++; |
352 | 0 | } |
353 | 0 | if (run) |
354 | 0 | put_bits(&s->pb, s->coef_vlcs[tindex]->huffbits[1], |
355 | 0 | s->coef_vlcs[tindex]->huffcodes[1]); |
356 | 0 | } |
357 | 0 | if (s->version == 1 && channels >= 2) |
358 | 0 | align_put_bits(&s->pb); |
359 | 0 | } |
360 | 0 | return 0; |
361 | 0 | } |
362 | | |
363 | | static int encode_frame(WMACodecContext *s, float (*src_coefs)[BLOCK_MAX_SIZE], |
364 | | uint8_t *buf, int buf_size, int total_gain) |
365 | 0 | { |
366 | 0 | init_put_bits(&s->pb, buf, buf_size); |
367 | |
|
368 | 0 | if (s->use_bit_reservoir) |
369 | 0 | av_unreachable("use_bit_reseroir unimplemented, set to 0 during init"); |
370 | 0 | else if (encode_block(s, src_coefs, total_gain) < 0) |
371 | 0 | return INT_MAX; |
372 | | |
373 | 0 | align_put_bits(&s->pb); |
374 | |
|
375 | 0 | return put_bits_count(&s->pb) / 8 - s->avctx->block_align; |
376 | 0 | } |
377 | | |
378 | | static int encode_superframe(AVCodecContext *avctx, AVPacket *avpkt, |
379 | | const AVFrame *frame, int *got_packet_ptr) |
380 | 0 | { |
381 | 0 | WMACodecContext *s = avctx->priv_data; |
382 | 0 | int total_gain, ret, error; |
383 | |
|
384 | 0 | s->block_len_bits = s->frame_len_bits; // required by non variable block len |
385 | 0 | s->block_len = 1 << s->block_len_bits; |
386 | |
|
387 | 0 | ret = apply_window_and_mdct(avctx, frame); |
388 | |
|
389 | 0 | if (ret < 0) |
390 | 0 | return ret; |
391 | | |
392 | 0 | if (s->ms_stereo) { |
393 | 0 | float a, b; |
394 | |
|
395 | 0 | for (int i = 0; i < s->block_len; i++) { |
396 | 0 | a = s->coefs[0][i] * 0.5; |
397 | 0 | b = s->coefs[1][i] * 0.5; |
398 | 0 | s->coefs[0][i] = a + b; |
399 | 0 | s->coefs[1][i] = a - b; |
400 | 0 | } |
401 | 0 | } |
402 | |
|
403 | 0 | if ((ret = ff_alloc_packet(avctx, avpkt, 2 * MAX_CODED_SUPERFRAME_SIZE)) < 0) |
404 | 0 | return ret; |
405 | | |
406 | 0 | total_gain = 128; |
407 | 0 | for (int i = 64; i; i >>= 1) { |
408 | 0 | error = encode_frame(s, s->coefs, avpkt->data, avpkt->size, |
409 | 0 | total_gain - i); |
410 | 0 | if (error <= 0) |
411 | 0 | total_gain -= i; |
412 | 0 | } |
413 | |
|
414 | 0 | while(total_gain <= 128 && error > 0) |
415 | 0 | error = encode_frame(s, s->coefs, avpkt->data, avpkt->size, total_gain++); |
416 | 0 | if (error > 0) { |
417 | 0 | av_log(avctx, AV_LOG_ERROR, "Invalid input data or requested bitrate too low, cannot encode\n"); |
418 | 0 | return AVERROR(EINVAL); |
419 | 0 | } |
420 | 0 | av_assert0((put_bits_count(&s->pb) & 7) == 0); |
421 | 0 | int pad = avctx->block_align - put_bytes_count(&s->pb, 0); |
422 | 0 | av_assert0(pad >= 0); |
423 | 0 | while (pad--) |
424 | 0 | put_bits(&s->pb, 8, 'N'); |
425 | |
|
426 | 0 | flush_put_bits(&s->pb); |
427 | 0 | av_assert0(put_bytes_output(&s->pb) == avctx->block_align); |
428 | | |
429 | 0 | if (frame->pts != AV_NOPTS_VALUE) |
430 | 0 | avpkt->pts = frame->pts - ff_samples_to_time_base(avctx, avctx->initial_padding); |
431 | |
|
432 | 0 | avpkt->size = avctx->block_align; |
433 | 0 | *got_packet_ptr = 1; |
434 | 0 | return 0; |
435 | 0 | } |
436 | | |
437 | | #if CONFIG_WMAV1_ENCODER |
438 | | const FFCodec ff_wmav1_encoder = { |
439 | | .p.name = "wmav1", |
440 | | CODEC_LONG_NAME("Windows Media Audio 1"), |
441 | | .p.type = AVMEDIA_TYPE_AUDIO, |
442 | | .p.id = AV_CODEC_ID_WMAV1, |
443 | | .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_ENCODER_REORDERED_OPAQUE, |
444 | | .priv_data_size = sizeof(WMACodecContext), |
445 | | .init = encode_init, |
446 | | FF_CODEC_ENCODE_CB(encode_superframe), |
447 | | .close = ff_wma_end, |
448 | | CODEC_SAMPLEFMTS(AV_SAMPLE_FMT_FLTP), |
449 | | .caps_internal = FF_CODEC_CAP_INIT_CLEANUP, |
450 | | }; |
451 | | #endif |
452 | | #if CONFIG_WMAV2_ENCODER |
453 | | const FFCodec ff_wmav2_encoder = { |
454 | | .p.name = "wmav2", |
455 | | CODEC_LONG_NAME("Windows Media Audio 2"), |
456 | | .p.type = AVMEDIA_TYPE_AUDIO, |
457 | | .p.id = AV_CODEC_ID_WMAV2, |
458 | | .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_ENCODER_REORDERED_OPAQUE, |
459 | | .priv_data_size = sizeof(WMACodecContext), |
460 | | .init = encode_init, |
461 | | FF_CODEC_ENCODE_CB(encode_superframe), |
462 | | .close = ff_wma_end, |
463 | | CODEC_SAMPLEFMTS(AV_SAMPLE_FMT_FLTP), |
464 | | .caps_internal = FF_CODEC_CAP_INIT_CLEANUP, |
465 | | }; |
466 | | #endif |