/src/ffmpeg/libavcodec/alacenc.c
Line | Count | Source |
1 | | /* |
2 | | * ALAC audio encoder |
3 | | * Copyright (c) 2008 Jaikrishnan Menon <realityman@gmx.net> |
4 | | * |
5 | | * This file is part of FFmpeg. |
6 | | * |
7 | | * FFmpeg is free software; you can redistribute it and/or |
8 | | * modify it under the terms of the GNU Lesser General Public |
9 | | * License as published by the Free Software Foundation; either |
10 | | * version 2.1 of the License, or (at your option) any later version. |
11 | | * |
12 | | * FFmpeg is distributed in the hope that it will be useful, |
13 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
15 | | * Lesser General Public License for more details. |
16 | | * |
17 | | * You should have received a copy of the GNU Lesser General Public |
18 | | * License along with FFmpeg; if not, write to the Free Software |
19 | | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
20 | | */ |
21 | | |
22 | | #include "libavutil/mem.h" |
23 | | #include "libavutil/opt.h" |
24 | | |
25 | | #include "avcodec.h" |
26 | | #include "codec_internal.h" |
27 | | #include "encode.h" |
28 | | #include "put_bits.h" |
29 | | #include "lpc.h" |
30 | | #include "mathops.h" |
31 | | #include "alac_data.h" |
32 | | |
33 | 0 | #define DEFAULT_FRAME_SIZE 4096 |
34 | 0 | #define ALAC_EXTRADATA_SIZE 36 |
35 | | #define ALAC_FRAME_HEADER_SIZE 55 |
36 | | #define ALAC_FRAME_FOOTER_SIZE 3 |
37 | | |
38 | 0 | #define ALAC_ESCAPE_CODE 0x1FF |
39 | | #define ALAC_MAX_LPC_ORDER 30 |
40 | | #define DEFAULT_MAX_PRED_ORDER 6 |
41 | | #define DEFAULT_MIN_PRED_ORDER 4 |
42 | 0 | #define ALAC_MAX_LPC_PRECISION 9 |
43 | 0 | #define ALAC_MIN_LPC_SHIFT 0 |
44 | 0 | #define ALAC_MAX_LPC_SHIFT 9 |
45 | | |
46 | 0 | #define ALAC_CHMODE_LEFT_RIGHT 0 |
47 | 0 | #define ALAC_CHMODE_LEFT_SIDE 1 |
48 | 0 | #define ALAC_CHMODE_RIGHT_SIDE 2 |
49 | | #define ALAC_CHMODE_MID_SIDE 3 |
50 | | |
51 | | typedef struct RiceContext { |
52 | | int history_mult; |
53 | | int initial_history; |
54 | | int k_modifier; |
55 | | int rice_modifier; |
56 | | } RiceContext; |
57 | | |
58 | | typedef struct AlacLPCContext { |
59 | | int lpc_order; |
60 | | int lpc_coeff[ALAC_MAX_LPC_ORDER+1]; |
61 | | int lpc_quant; |
62 | | } AlacLPCContext; |
63 | | |
64 | | typedef struct AlacEncodeContext { |
65 | | const AVClass *class; |
66 | | AVCodecContext *avctx; |
67 | | int frame_size; /**< current frame size */ |
68 | | int verbatim; /**< current frame verbatim mode flag */ |
69 | | int compression_level; |
70 | | int min_prediction_order; |
71 | | int max_prediction_order; |
72 | | int max_coded_frame_size; |
73 | | int write_sample_size; |
74 | | int extra_bits; |
75 | | int32_t sample_buf[2][DEFAULT_FRAME_SIZE]; |
76 | | int32_t predictor_buf[2][DEFAULT_FRAME_SIZE]; |
77 | | int interlacing_shift; |
78 | | int interlacing_leftweight; |
79 | | PutBitContext pbctx; |
80 | | RiceContext rc; |
81 | | AlacLPCContext lpc[2]; |
82 | | LPCContext lpc_ctx; |
83 | | } AlacEncodeContext; |
84 | | |
85 | | |
86 | | static void init_sample_buffers(AlacEncodeContext *s, int channels, |
87 | | const uint8_t *samples[2]) |
88 | 0 | { |
89 | 0 | int ch, i; |
90 | 0 | int shift = av_get_bytes_per_sample(s->avctx->sample_fmt) * 8 - |
91 | 0 | s->avctx->bits_per_raw_sample; |
92 | |
|
93 | 0 | #define COPY_SAMPLES(type) do { \ |
94 | 0 | for (ch = 0; ch < channels; ch++) { \ |
95 | 0 | int32_t *bptr = s->sample_buf[ch]; \ |
96 | 0 | const type *sptr = (const type *)samples[ch]; \ |
97 | 0 | for (i = 0; i < s->frame_size; i++) \ |
98 | 0 | bptr[i] = sptr[i] >> shift; \ |
99 | 0 | } \ |
100 | 0 | } while (0) |
101 | |
|
102 | 0 | if (s->avctx->sample_fmt == AV_SAMPLE_FMT_S32P) |
103 | 0 | COPY_SAMPLES(int32_t); |
104 | 0 | else |
105 | 0 | COPY_SAMPLES(int16_t); |
106 | 0 | } |
107 | | |
108 | | static void encode_scalar(AlacEncodeContext *s, int x, |
109 | | int k, int write_sample_size) |
110 | 0 | { |
111 | 0 | int divisor, q, r; |
112 | |
|
113 | 0 | k = FFMIN(k, s->rc.k_modifier); |
114 | 0 | divisor = (1<<k) - 1; |
115 | 0 | q = x / divisor; |
116 | 0 | r = x % divisor; |
117 | |
|
118 | 0 | if (q > 8) { |
119 | | // write escape code and sample value directly |
120 | 0 | put_bits(&s->pbctx, 9, ALAC_ESCAPE_CODE); |
121 | 0 | put_bits(&s->pbctx, write_sample_size, x); |
122 | 0 | } else { |
123 | 0 | if (q) |
124 | 0 | put_bits(&s->pbctx, q, (1<<q) - 1); |
125 | 0 | put_bits(&s->pbctx, 1, 0); |
126 | |
|
127 | 0 | if (k != 1) { |
128 | 0 | if (r > 0) |
129 | 0 | put_bits(&s->pbctx, k, r+1); |
130 | 0 | else |
131 | 0 | put_bits(&s->pbctx, k-1, 0); |
132 | 0 | } |
133 | 0 | } |
134 | 0 | } |
135 | | |
136 | | static void write_element_header(AlacEncodeContext *s, |
137 | | enum AlacRawDataBlockType element, |
138 | | int instance) |
139 | 0 | { |
140 | 0 | int encode_fs = 0; |
141 | |
|
142 | 0 | if (s->frame_size < DEFAULT_FRAME_SIZE) |
143 | 0 | encode_fs = 1; |
144 | |
|
145 | 0 | put_bits(&s->pbctx, 3, element); // element type |
146 | 0 | put_bits(&s->pbctx, 4, instance); // element instance |
147 | 0 | put_bits(&s->pbctx, 12, 0); // unused header bits |
148 | 0 | put_bits(&s->pbctx, 1, encode_fs); // Sample count is in the header |
149 | 0 | put_bits(&s->pbctx, 2, s->extra_bits >> 3); // Extra bytes (for 24-bit) |
150 | 0 | put_bits(&s->pbctx, 1, s->verbatim); // Audio block is verbatim |
151 | 0 | if (encode_fs) |
152 | 0 | put_bits32(&s->pbctx, s->frame_size); // No. of samples in the frame |
153 | 0 | } |
154 | | |
155 | | static void calc_predictor_params(AlacEncodeContext *s, int ch) |
156 | 0 | { |
157 | 0 | int32_t coefs[MAX_LPC_ORDER][MAX_LPC_ORDER]; |
158 | 0 | int shift[MAX_LPC_ORDER]; |
159 | 0 | int opt_order; |
160 | |
|
161 | 0 | if (s->compression_level == 1) { |
162 | 0 | s->lpc[ch].lpc_order = 6; |
163 | 0 | s->lpc[ch].lpc_quant = 6; |
164 | 0 | s->lpc[ch].lpc_coeff[0] = 160; |
165 | 0 | s->lpc[ch].lpc_coeff[1] = -190; |
166 | 0 | s->lpc[ch].lpc_coeff[2] = 170; |
167 | 0 | s->lpc[ch].lpc_coeff[3] = -130; |
168 | 0 | s->lpc[ch].lpc_coeff[4] = 80; |
169 | 0 | s->lpc[ch].lpc_coeff[5] = -25; |
170 | 0 | } else { |
171 | 0 | opt_order = ff_lpc_calc_coefs(&s->lpc_ctx, s->sample_buf[ch], |
172 | 0 | s->frame_size, |
173 | 0 | s->min_prediction_order, |
174 | 0 | s->max_prediction_order, |
175 | 0 | ALAC_MAX_LPC_PRECISION, coefs, shift, |
176 | 0 | FF_LPC_TYPE_LEVINSON, 0, |
177 | 0 | ORDER_METHOD_EST, ALAC_MIN_LPC_SHIFT, |
178 | 0 | ALAC_MAX_LPC_SHIFT, 1); |
179 | |
|
180 | 0 | s->lpc[ch].lpc_order = opt_order; |
181 | 0 | s->lpc[ch].lpc_quant = shift[opt_order-1]; |
182 | 0 | memcpy(s->lpc[ch].lpc_coeff, coefs[opt_order-1], opt_order*sizeof(int)); |
183 | 0 | } |
184 | 0 | } |
185 | | |
186 | | static int estimate_stereo_mode(int32_t *left_ch, int32_t *right_ch, int n) |
187 | 0 | { |
188 | 0 | int i, best; |
189 | 0 | int32_t lt, rt; |
190 | 0 | uint64_t sum[4]; |
191 | 0 | uint64_t score[4]; |
192 | | |
193 | | /* calculate sum of 2nd order residual for each channel */ |
194 | 0 | sum[0] = sum[1] = sum[2] = sum[3] = 0; |
195 | 0 | for (i = 2; i < n; i++) { |
196 | 0 | lt = left_ch[i] - 2 * left_ch[i - 1] + left_ch[i - 2]; |
197 | 0 | rt = right_ch[i] - 2 * right_ch[i - 1] + right_ch[i - 2]; |
198 | 0 | sum[2] += FFABS((lt + rt) >> 1); |
199 | 0 | sum[3] += FFABS(lt - rt); |
200 | 0 | sum[0] += FFABS(lt); |
201 | 0 | sum[1] += FFABS(rt); |
202 | 0 | } |
203 | | |
204 | | /* calculate score for each mode */ |
205 | 0 | score[0] = sum[0] + sum[1]; |
206 | 0 | score[1] = sum[0] + sum[3]; |
207 | 0 | score[2] = sum[1] + sum[3]; |
208 | 0 | score[3] = sum[2] + sum[3]; |
209 | | |
210 | | /* return mode with lowest score */ |
211 | 0 | best = 0; |
212 | 0 | for (i = 1; i < 4; i++) { |
213 | 0 | if (score[i] < score[best]) |
214 | 0 | best = i; |
215 | 0 | } |
216 | 0 | return best; |
217 | 0 | } |
218 | | |
219 | | static void alac_stereo_decorrelation(AlacEncodeContext *s) |
220 | 0 | { |
221 | 0 | int32_t *left = s->sample_buf[0], *right = s->sample_buf[1]; |
222 | 0 | int i, mode, n = s->frame_size; |
223 | 0 | int32_t tmp; |
224 | |
|
225 | 0 | mode = estimate_stereo_mode(left, right, n); |
226 | |
|
227 | 0 | switch (mode) { |
228 | 0 | case ALAC_CHMODE_LEFT_RIGHT: |
229 | 0 | s->interlacing_leftweight = 0; |
230 | 0 | s->interlacing_shift = 0; |
231 | 0 | break; |
232 | 0 | case ALAC_CHMODE_LEFT_SIDE: |
233 | 0 | for (i = 0; i < n; i++) |
234 | 0 | right[i] = left[i] - right[i]; |
235 | 0 | s->interlacing_leftweight = 1; |
236 | 0 | s->interlacing_shift = 0; |
237 | 0 | break; |
238 | 0 | case ALAC_CHMODE_RIGHT_SIDE: |
239 | 0 | for (i = 0; i < n; i++) { |
240 | 0 | tmp = right[i]; |
241 | 0 | right[i] = left[i] - right[i]; |
242 | 0 | left[i] = tmp + (right[i] >> 31); |
243 | 0 | } |
244 | 0 | s->interlacing_leftweight = 1; |
245 | 0 | s->interlacing_shift = 31; |
246 | 0 | break; |
247 | 0 | default: |
248 | 0 | for (i = 0; i < n; i++) { |
249 | 0 | tmp = left[i]; |
250 | 0 | left[i] = (tmp + right[i]) >> 1; |
251 | 0 | right[i] = tmp - right[i]; |
252 | 0 | } |
253 | 0 | s->interlacing_leftweight = 1; |
254 | 0 | s->interlacing_shift = 1; |
255 | 0 | break; |
256 | 0 | } |
257 | 0 | } |
258 | | |
259 | | static void alac_linear_predictor(AlacEncodeContext *s, int ch) |
260 | 0 | { |
261 | 0 | int i; |
262 | 0 | AlacLPCContext lpc = s->lpc[ch]; |
263 | 0 | int32_t *residual = s->predictor_buf[ch]; |
264 | |
|
265 | 0 | if (lpc.lpc_order == 31) { |
266 | 0 | residual[0] = s->sample_buf[ch][0]; |
267 | |
|
268 | 0 | for (i = 1; i < s->frame_size; i++) { |
269 | 0 | residual[i] = s->sample_buf[ch][i ] - |
270 | 0 | s->sample_buf[ch][i - 1]; |
271 | 0 | } |
272 | |
|
273 | 0 | return; |
274 | 0 | } |
275 | | |
276 | | // generalised linear predictor |
277 | | |
278 | 0 | if (lpc.lpc_order > 0) { |
279 | 0 | int32_t *samples = s->sample_buf[ch]; |
280 | | |
281 | | // generate warm-up samples |
282 | 0 | residual[0] = samples[0]; |
283 | 0 | for (i = 1; i <= lpc.lpc_order; i++) |
284 | 0 | residual[i] = sign_extend(samples[i] - samples[i-1], s->write_sample_size); |
285 | | |
286 | | // perform lpc on remaining samples |
287 | 0 | for (i = lpc.lpc_order + 1; i < s->frame_size; i++) { |
288 | 0 | int sum = 1 << (lpc.lpc_quant - 1), res_val, j; |
289 | |
|
290 | 0 | for (j = 0; j < lpc.lpc_order; j++) { |
291 | 0 | sum += (samples[lpc.lpc_order-j] - samples[0]) * |
292 | 0 | lpc.lpc_coeff[j]; |
293 | 0 | } |
294 | |
|
295 | 0 | sum >>= lpc.lpc_quant; |
296 | 0 | sum += samples[0]; |
297 | 0 | residual[i] = sign_extend(samples[lpc.lpc_order+1] - sum, |
298 | 0 | s->write_sample_size); |
299 | 0 | res_val = residual[i]; |
300 | |
|
301 | 0 | if (res_val) { |
302 | 0 | int index = lpc.lpc_order - 1; |
303 | 0 | int neg = (res_val < 0); |
304 | |
|
305 | 0 | while (index >= 0 && (neg ? (res_val < 0) : (res_val > 0))) { |
306 | 0 | int val = samples[0] - samples[lpc.lpc_order - index]; |
307 | 0 | int sign = (val ? FFSIGN(val) : 0); |
308 | |
|
309 | 0 | if (neg) |
310 | 0 | sign *= -1; |
311 | |
|
312 | 0 | lpc.lpc_coeff[index] -= sign; |
313 | 0 | val *= sign; |
314 | 0 | res_val -= (val >> lpc.lpc_quant) * (lpc.lpc_order - index); |
315 | 0 | index--; |
316 | 0 | } |
317 | 0 | } |
318 | 0 | samples++; |
319 | 0 | } |
320 | 0 | } |
321 | 0 | } |
322 | | |
323 | | static void alac_entropy_coder(AlacEncodeContext *s, int ch) |
324 | 0 | { |
325 | 0 | unsigned int history = s->rc.initial_history; |
326 | 0 | int sign_modifier = 0, i, k; |
327 | 0 | int32_t *samples = s->predictor_buf[ch]; |
328 | |
|
329 | 0 | for (i = 0; i < s->frame_size;) { |
330 | 0 | int x; |
331 | |
|
332 | 0 | k = av_log2((history >> 9) + 3); |
333 | |
|
334 | 0 | x = -2 * (*samples) -1; |
335 | 0 | x ^= x >> 31; |
336 | |
|
337 | 0 | samples++; |
338 | 0 | i++; |
339 | |
|
340 | 0 | encode_scalar(s, x - sign_modifier, k, s->write_sample_size); |
341 | |
|
342 | 0 | history += x * s->rc.history_mult - |
343 | 0 | ((history * s->rc.history_mult) >> 9); |
344 | |
|
345 | 0 | sign_modifier = 0; |
346 | 0 | if (x > 0xFFFF) |
347 | 0 | history = 0xFFFF; |
348 | |
|
349 | 0 | if (history < 128 && i < s->frame_size) { |
350 | 0 | unsigned int block_size = 0; |
351 | |
|
352 | 0 | k = 7 - av_log2(history) + ((history + 16) >> 6); |
353 | |
|
354 | 0 | while (*samples == 0 && i < s->frame_size) { |
355 | 0 | samples++; |
356 | 0 | i++; |
357 | 0 | block_size++; |
358 | 0 | } |
359 | 0 | encode_scalar(s, block_size, k, 16); |
360 | 0 | sign_modifier = (block_size <= 0xFFFF); |
361 | 0 | history = 0; |
362 | 0 | } |
363 | |
|
364 | 0 | } |
365 | 0 | } |
366 | | |
367 | | static void write_element(AlacEncodeContext *s, |
368 | | enum AlacRawDataBlockType element, int instance, |
369 | | const uint8_t *samples0, const uint8_t *samples1) |
370 | 0 | { |
371 | 0 | const uint8_t *samples[2] = { samples0, samples1 }; |
372 | 0 | int i, j, channels; |
373 | 0 | int prediction_type = 0; |
374 | 0 | PutBitContext *pb = &s->pbctx; |
375 | |
|
376 | 0 | channels = element == TYPE_CPE ? 2 : 1; |
377 | |
|
378 | 0 | if (s->verbatim) { |
379 | 0 | write_element_header(s, element, instance); |
380 | | /* samples are channel-interleaved in verbatim mode */ |
381 | 0 | if (s->avctx->sample_fmt == AV_SAMPLE_FMT_S32P) { |
382 | 0 | int shift = 32 - s->avctx->bits_per_raw_sample; |
383 | 0 | const int32_t *samples_s32[2] = { (const int32_t *)samples0, |
384 | 0 | (const int32_t *)samples1 }; |
385 | 0 | for (i = 0; i < s->frame_size; i++) |
386 | 0 | for (j = 0; j < channels; j++) |
387 | 0 | put_sbits(pb, s->avctx->bits_per_raw_sample, |
388 | 0 | samples_s32[j][i] >> shift); |
389 | 0 | } else { |
390 | 0 | const int16_t *samples_s16[2] = { (const int16_t *)samples0, |
391 | 0 | (const int16_t *)samples1 }; |
392 | 0 | for (i = 0; i < s->frame_size; i++) |
393 | 0 | for (j = 0; j < channels; j++) |
394 | 0 | put_sbits(pb, s->avctx->bits_per_raw_sample, |
395 | 0 | samples_s16[j][i]); |
396 | 0 | } |
397 | 0 | } else { |
398 | 0 | s->write_sample_size = s->avctx->bits_per_raw_sample - s->extra_bits + |
399 | 0 | channels - 1; |
400 | |
|
401 | 0 | init_sample_buffers(s, channels, samples); |
402 | 0 | write_element_header(s, element, instance); |
403 | | |
404 | | // extract extra bits if needed |
405 | 0 | if (s->extra_bits) { |
406 | 0 | uint32_t mask = (1 << s->extra_bits) - 1; |
407 | 0 | for (j = 0; j < channels; j++) { |
408 | 0 | int32_t *extra = s->predictor_buf[j]; |
409 | 0 | int32_t *smp = s->sample_buf[j]; |
410 | 0 | for (i = 0; i < s->frame_size; i++) { |
411 | 0 | extra[i] = smp[i] & mask; |
412 | 0 | smp[i] >>= s->extra_bits; |
413 | 0 | } |
414 | 0 | } |
415 | 0 | } |
416 | |
|
417 | 0 | if (channels == 2) |
418 | 0 | alac_stereo_decorrelation(s); |
419 | 0 | else |
420 | 0 | s->interlacing_shift = s->interlacing_leftweight = 0; |
421 | 0 | put_bits(pb, 8, s->interlacing_shift); |
422 | 0 | put_bits(pb, 8, s->interlacing_leftweight); |
423 | |
|
424 | 0 | for (i = 0; i < channels; i++) { |
425 | 0 | calc_predictor_params(s, i); |
426 | |
|
427 | 0 | put_bits(pb, 4, prediction_type); |
428 | 0 | put_bits(pb, 4, s->lpc[i].lpc_quant); |
429 | |
|
430 | 0 | put_bits(pb, 3, s->rc.rice_modifier); |
431 | 0 | put_bits(pb, 5, s->lpc[i].lpc_order); |
432 | | // predictor coeff. table |
433 | 0 | for (j = 0; j < s->lpc[i].lpc_order; j++) |
434 | 0 | put_sbits(pb, 16, s->lpc[i].lpc_coeff[j]); |
435 | 0 | } |
436 | | |
437 | | // write extra bits if needed |
438 | 0 | if (s->extra_bits) { |
439 | 0 | for (i = 0; i < s->frame_size; i++) { |
440 | 0 | for (j = 0; j < channels; j++) { |
441 | 0 | put_bits(pb, s->extra_bits, s->predictor_buf[j][i]); |
442 | 0 | } |
443 | 0 | } |
444 | 0 | } |
445 | | |
446 | | // apply lpc and entropy coding to audio samples |
447 | 0 | for (i = 0; i < channels; i++) { |
448 | 0 | alac_linear_predictor(s, i); |
449 | | |
450 | | // TODO: determine when this will actually help. for now it's not used. |
451 | 0 | if (prediction_type == 15) { |
452 | | // 2nd pass 1st order filter |
453 | 0 | int32_t *residual = s->predictor_buf[i]; |
454 | 0 | for (j = s->frame_size - 1; j > 0; j--) |
455 | 0 | residual[j] -= residual[j - 1]; |
456 | 0 | } |
457 | 0 | alac_entropy_coder(s, i); |
458 | 0 | } |
459 | 0 | } |
460 | 0 | } |
461 | | |
462 | | static int write_frame(AlacEncodeContext *s, AVPacket *avpkt, |
463 | | uint8_t * const *samples) |
464 | 0 | { |
465 | 0 | PutBitContext *pb = &s->pbctx; |
466 | 0 | int channels = s->avctx->ch_layout.nb_channels; |
467 | 0 | const enum AlacRawDataBlockType *ch_elements = ff_alac_channel_elements[channels - 1]; |
468 | 0 | const uint8_t *ch_map = ff_alac_channel_layout_offsets[channels - 1]; |
469 | 0 | int ch, element, sce, cpe; |
470 | |
|
471 | 0 | init_put_bits(pb, avpkt->data, avpkt->size); |
472 | |
|
473 | 0 | ch = element = sce = cpe = 0; |
474 | 0 | while (ch < channels) { |
475 | 0 | if (ch_elements[element] == TYPE_CPE) { |
476 | 0 | write_element(s, TYPE_CPE, cpe, samples[ch_map[ch]], |
477 | 0 | samples[ch_map[ch + 1]]); |
478 | 0 | cpe++; |
479 | 0 | ch += 2; |
480 | 0 | } else { |
481 | 0 | write_element(s, TYPE_SCE, sce, samples[ch_map[ch]], NULL); |
482 | 0 | sce++; |
483 | 0 | ch++; |
484 | 0 | } |
485 | 0 | element++; |
486 | 0 | } |
487 | |
|
488 | 0 | put_bits(pb, 3, TYPE_END); |
489 | 0 | flush_put_bits(pb); |
490 | |
|
491 | 0 | return put_bytes_output(pb); |
492 | 0 | } |
493 | | |
494 | | static av_always_inline int get_max_frame_size(int frame_size, int ch, int bps) |
495 | 0 | { |
496 | 0 | int header_bits = 23 + 32 * (frame_size < DEFAULT_FRAME_SIZE); |
497 | 0 | return FFALIGN(header_bits + bps * ch * frame_size + 3, 8) / 8; |
498 | 0 | } |
499 | | |
500 | | static av_cold int alac_encode_close(AVCodecContext *avctx) |
501 | 0 | { |
502 | 0 | AlacEncodeContext *s = avctx->priv_data; |
503 | 0 | ff_lpc_end(&s->lpc_ctx); |
504 | 0 | return 0; |
505 | 0 | } |
506 | | |
507 | | static av_cold int alac_encode_init(AVCodecContext *avctx) |
508 | 0 | { |
509 | 0 | AlacEncodeContext *s = avctx->priv_data; |
510 | 0 | int ret; |
511 | 0 | uint8_t *alac_extradata; |
512 | |
|
513 | 0 | avctx->frame_size = s->frame_size = DEFAULT_FRAME_SIZE; |
514 | |
|
515 | 0 | if (avctx->sample_fmt == AV_SAMPLE_FMT_S32P) { |
516 | 0 | if (avctx->bits_per_raw_sample != 24) |
517 | 0 | av_log(avctx, AV_LOG_WARNING, "encoding as 24 bits-per-sample\n"); |
518 | 0 | avctx->bits_per_raw_sample = 24; |
519 | 0 | } else { |
520 | 0 | avctx->bits_per_raw_sample = 16; |
521 | 0 | s->extra_bits = 0; |
522 | 0 | } |
523 | | |
524 | | // Set default compression level |
525 | 0 | if (avctx->compression_level == FF_COMPRESSION_DEFAULT) |
526 | 0 | s->compression_level = 2; |
527 | 0 | else |
528 | 0 | s->compression_level = av_clip(avctx->compression_level, 0, 2); |
529 | | |
530 | | // Initialize default Rice parameters |
531 | 0 | s->rc.history_mult = 40; |
532 | 0 | s->rc.initial_history = 10; |
533 | 0 | s->rc.k_modifier = 14; |
534 | 0 | s->rc.rice_modifier = 4; |
535 | |
|
536 | 0 | s->max_coded_frame_size = get_max_frame_size(avctx->frame_size, |
537 | 0 | avctx->ch_layout.nb_channels, |
538 | 0 | avctx->bits_per_raw_sample); |
539 | |
|
540 | 0 | avctx->extradata = av_mallocz(ALAC_EXTRADATA_SIZE + AV_INPUT_BUFFER_PADDING_SIZE); |
541 | 0 | if (!avctx->extradata) |
542 | 0 | return AVERROR(ENOMEM); |
543 | 0 | avctx->extradata_size = ALAC_EXTRADATA_SIZE; |
544 | |
|
545 | 0 | alac_extradata = avctx->extradata; |
546 | 0 | AV_WB32(alac_extradata, ALAC_EXTRADATA_SIZE); |
547 | 0 | AV_WB32(alac_extradata+4, MKBETAG('a','l','a','c')); |
548 | 0 | AV_WB32(alac_extradata+12, avctx->frame_size); |
549 | 0 | AV_WB8 (alac_extradata+17, avctx->bits_per_raw_sample); |
550 | 0 | AV_WB8 (alac_extradata+21, avctx->ch_layout.nb_channels); |
551 | 0 | AV_WB32(alac_extradata+24, s->max_coded_frame_size); |
552 | 0 | AV_WB32(alac_extradata+28, |
553 | 0 | avctx->sample_rate * avctx->ch_layout.nb_channels * avctx->bits_per_raw_sample); // average bitrate |
554 | 0 | AV_WB32(alac_extradata+32, avctx->sample_rate); |
555 | | |
556 | | // Set relevant extradata fields |
557 | 0 | if (s->compression_level > 0) { |
558 | 0 | AV_WB8(alac_extradata+18, s->rc.history_mult); |
559 | 0 | AV_WB8(alac_extradata+19, s->rc.initial_history); |
560 | 0 | AV_WB8(alac_extradata+20, s->rc.k_modifier); |
561 | 0 | } |
562 | |
|
563 | 0 | if (s->max_prediction_order < s->min_prediction_order) { |
564 | 0 | av_log(avctx, AV_LOG_ERROR, |
565 | 0 | "invalid prediction orders: min=%d max=%d\n", |
566 | 0 | s->min_prediction_order, s->max_prediction_order); |
567 | 0 | return AVERROR(EINVAL); |
568 | 0 | } |
569 | | |
570 | 0 | s->avctx = avctx; |
571 | |
|
572 | 0 | if ((ret = ff_lpc_init(&s->lpc_ctx, avctx->frame_size, |
573 | 0 | s->max_prediction_order, |
574 | 0 | FF_LPC_TYPE_LEVINSON)) < 0) { |
575 | 0 | return ret; |
576 | 0 | } |
577 | | |
578 | 0 | return 0; |
579 | 0 | } |
580 | | |
581 | | static int alac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt, |
582 | | const AVFrame *frame, int *got_packet_ptr) |
583 | 0 | { |
584 | 0 | AlacEncodeContext *s = avctx->priv_data; |
585 | 0 | int out_bytes, max_frame_size, ret; |
586 | |
|
587 | 0 | s->frame_size = frame->nb_samples; |
588 | |
|
589 | 0 | if (frame->nb_samples < DEFAULT_FRAME_SIZE) |
590 | 0 | max_frame_size = get_max_frame_size(s->frame_size, avctx->ch_layout.nb_channels, |
591 | 0 | avctx->bits_per_raw_sample); |
592 | 0 | else |
593 | 0 | max_frame_size = s->max_coded_frame_size; |
594 | |
|
595 | 0 | if ((ret = ff_alloc_packet(avctx, avpkt, 4 * max_frame_size)) < 0) |
596 | 0 | return ret; |
597 | | |
598 | | /* use verbatim mode for compression_level 0 */ |
599 | 0 | if (s->compression_level) { |
600 | 0 | s->verbatim = 0; |
601 | 0 | s->extra_bits = avctx->bits_per_raw_sample - 16; |
602 | 0 | } else { |
603 | 0 | s->verbatim = 1; |
604 | 0 | s->extra_bits = 0; |
605 | 0 | } |
606 | |
|
607 | 0 | out_bytes = write_frame(s, avpkt, frame->extended_data); |
608 | |
|
609 | 0 | if (out_bytes > max_frame_size) { |
610 | | /* frame too large. use verbatim mode */ |
611 | 0 | s->verbatim = 1; |
612 | 0 | s->extra_bits = 0; |
613 | 0 | out_bytes = write_frame(s, avpkt, frame->extended_data); |
614 | 0 | } |
615 | |
|
616 | 0 | avpkt->size = out_bytes; |
617 | 0 | *got_packet_ptr = 1; |
618 | 0 | return 0; |
619 | 0 | } |
620 | | |
621 | | #define OFFSET(x) offsetof(AlacEncodeContext, x) |
622 | | #define AE AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_ENCODING_PARAM |
623 | | static const AVOption options[] = { |
624 | | { "min_prediction_order", NULL, OFFSET(min_prediction_order), AV_OPT_TYPE_INT, { .i64 = DEFAULT_MIN_PRED_ORDER }, MIN_LPC_ORDER, ALAC_MAX_LPC_ORDER, AE }, |
625 | | { "max_prediction_order", NULL, OFFSET(max_prediction_order), AV_OPT_TYPE_INT, { .i64 = DEFAULT_MAX_PRED_ORDER }, MIN_LPC_ORDER, ALAC_MAX_LPC_ORDER, AE }, |
626 | | |
627 | | { NULL }, |
628 | | }; |
629 | | |
630 | | static const AVClass alacenc_class = { |
631 | | .class_name = "alacenc", |
632 | | .item_name = av_default_item_name, |
633 | | .option = options, |
634 | | .version = LIBAVUTIL_VERSION_INT, |
635 | | }; |
636 | | |
637 | | const FFCodec ff_alac_encoder = { |
638 | | .p.name = "alac", |
639 | | CODEC_LONG_NAME("ALAC (Apple Lossless Audio Codec)"), |
640 | | .p.type = AVMEDIA_TYPE_AUDIO, |
641 | | .p.id = AV_CODEC_ID_ALAC, |
642 | | .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_SMALL_LAST_FRAME | |
643 | | AV_CODEC_CAP_ENCODER_REORDERED_OPAQUE, |
644 | | .priv_data_size = sizeof(AlacEncodeContext), |
645 | | .p.priv_class = &alacenc_class, |
646 | | .init = alac_encode_init, |
647 | | FF_CODEC_ENCODE_CB(alac_encode_frame), |
648 | | .close = alac_encode_close, |
649 | | CODEC_CH_LAYOUTS_ARRAY(ff_alac_ch_layouts), |
650 | | CODEC_SAMPLEFMTS(AV_SAMPLE_FMT_S32P, AV_SAMPLE_FMT_S16P), |
651 | | }; |