/src/ffmpeg/libavcodec/apedec.c
Line | Count | Source |
1 | | /* |
2 | | * Monkey's Audio lossless audio decoder |
3 | | * Copyright (c) 2007 Benjamin Zores <ben@geexbox.org> |
4 | | * based upon libdemac from Dave Chapman. |
5 | | * |
6 | | * This file is part of FFmpeg. |
7 | | * |
8 | | * FFmpeg is free software; you can redistribute it and/or |
9 | | * modify it under the terms of the GNU Lesser General Public |
10 | | * License as published by the Free Software Foundation; either |
11 | | * version 2.1 of the License, or (at your option) any later version. |
12 | | * |
13 | | * FFmpeg is distributed in the hope that it will be useful, |
14 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
15 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
16 | | * Lesser General Public License for more details. |
17 | | * |
18 | | * You should have received a copy of the GNU Lesser General Public |
19 | | * License along with FFmpeg; if not, write to the Free Software |
20 | | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
21 | | */ |
22 | | |
23 | | #include <inttypes.h> |
24 | | |
25 | | #include "libavutil/attributes.h" |
26 | | #include "libavutil/avassert.h" |
27 | | #include "libavutil/channel_layout.h" |
28 | | #include "libavutil/crc.h" |
29 | | #include "libavutil/mem.h" |
30 | | #include "libavutil/opt.h" |
31 | | #include "lossless_audiodsp.h" |
32 | | #include "avcodec.h" |
33 | | #include "bswapdsp.h" |
34 | | #include "bytestream.h" |
35 | | #include "codec_internal.h" |
36 | | #include "decode.h" |
37 | | #include "get_bits.h" |
38 | | #include "unary.h" |
39 | | |
40 | | /** |
41 | | * @file |
42 | | * Monkey's Audio lossless audio decoder |
43 | | */ |
44 | | |
45 | | #define MAX_CHANNELS 2 |
46 | | #define MAX_BYTESPERSAMPLE 3 |
47 | | |
48 | | #define APE_FRAMECODE_MONO_SILENCE 1 |
49 | 170k | #define APE_FRAMECODE_STEREO_SILENCE 3 |
50 | 76.4k | #define APE_FRAMECODE_PSEUDO_STEREO 4 |
51 | | |
52 | 4.01M | #define HISTORY_SIZE 512 |
53 | 9.31M | #define PREDICTOR_ORDER 8 |
54 | | /** Total size of all predictor histories */ |
55 | 197k | #define PREDICTOR_SIZE 50 |
56 | | |
57 | 4.43M | #define YDELAYA (18 + PREDICTOR_ORDER*4) |
58 | 2.08M | #define YDELAYB (18 + PREDICTOR_ORDER*3) |
59 | 1.57M | #define XDELAYA (18 + PREDICTOR_ORDER*2) |
60 | 1.21M | #define XDELAYB (18 + PREDICTOR_ORDER) |
61 | | |
62 | 1.10M | #define YADAPTCOEFFSA 18 |
63 | 191k | #define XADAPTCOEFFSA 14 |
64 | 191k | #define YADAPTCOEFFSB 10 |
65 | 191k | #define XADAPTCOEFFSB 5 |
66 | | |
67 | | /** |
68 | | * Possible compression levels |
69 | | * @{ |
70 | | */ |
71 | | enum APECompressionLevel { |
72 | | COMPRESSION_LEVEL_FAST = 1000, |
73 | | COMPRESSION_LEVEL_NORMAL = 2000, |
74 | | COMPRESSION_LEVEL_HIGH = 3000, |
75 | | COMPRESSION_LEVEL_EXTRA_HIGH = 4000, |
76 | | COMPRESSION_LEVEL_INSANE = 5000 |
77 | | }; |
78 | | /** @} */ |
79 | | |
80 | 254k | #define APE_FILTER_LEVELS 3 |
81 | | |
82 | | /** Filter orders depending on compression level */ |
83 | | static const uint16_t ape_filter_orders[5][APE_FILTER_LEVELS] = { |
84 | | { 0, 0, 0 }, |
85 | | { 16, 0, 0 }, |
86 | | { 64, 0, 0 }, |
87 | | { 32, 256, 0 }, |
88 | | { 16, 256, 1280 } |
89 | | }; |
90 | | |
91 | | /** Filter fraction bits depending on compression level */ |
92 | | static const uint8_t ape_filter_fracbits[5][APE_FILTER_LEVELS] = { |
93 | | { 0, 0, 0 }, |
94 | | { 11, 0, 0 }, |
95 | | { 11, 0, 0 }, |
96 | | { 10, 13, 0 }, |
97 | | { 11, 13, 15 } |
98 | | }; |
99 | | |
100 | | |
101 | | /** Filters applied to the decoded data */ |
102 | | typedef struct APEFilter { |
103 | | int16_t *coeffs; ///< actual coefficients used in filtering |
104 | | int16_t *adaptcoeffs; ///< adaptive filter coefficients used for correcting of actual filter coefficients |
105 | | int16_t *historybuffer; ///< filter memory |
106 | | int16_t *delay; ///< filtered values |
107 | | |
108 | | uint32_t avg; |
109 | | } APEFilter; |
110 | | |
111 | | typedef struct APERice { |
112 | | uint32_t k; |
113 | | uint32_t ksum; |
114 | | } APERice; |
115 | | |
116 | | typedef struct APERangecoder { |
117 | | uint32_t low; ///< low end of interval |
118 | | uint32_t range; ///< length of interval |
119 | | uint32_t help; ///< bytes_to_follow resp. intermediate value |
120 | | unsigned int buffer; ///< buffer for input/output |
121 | | } APERangecoder; |
122 | | |
123 | | /** Filter histories */ |
124 | | typedef struct APEPredictor { |
125 | | int32_t *buf; |
126 | | |
127 | | int32_t lastA[2]; |
128 | | |
129 | | int32_t filterA[2]; |
130 | | int32_t filterB[2]; |
131 | | |
132 | | uint32_t coeffsA[2][4]; ///< adaption coefficients |
133 | | uint32_t coeffsB[2][5]; ///< adaption coefficients |
134 | | int32_t historybuffer[HISTORY_SIZE + PREDICTOR_SIZE]; |
135 | | |
136 | | unsigned int sample_pos; |
137 | | } APEPredictor; |
138 | | |
139 | | typedef struct APEPredictor64 { |
140 | | int64_t *buf; |
141 | | |
142 | | int64_t lastA[2]; |
143 | | |
144 | | int64_t filterA[2]; |
145 | | int64_t filterB[2]; |
146 | | |
147 | | uint64_t coeffsA[2][4]; ///< adaption coefficients |
148 | | uint64_t coeffsB[2][5]; ///< adaption coefficients |
149 | | int64_t historybuffer[HISTORY_SIZE + PREDICTOR_SIZE]; |
150 | | } APEPredictor64; |
151 | | |
152 | | /** Decoder context */ |
153 | | typedef struct APEContext { |
154 | | AVClass *class; ///< class for AVOptions |
155 | | AVCodecContext *avctx; |
156 | | BswapDSPContext bdsp; |
157 | | LLAudDSPContext adsp; |
158 | | int channels; |
159 | | int samples; ///< samples left to decode in current frame |
160 | | int bps; |
161 | | |
162 | | int fileversion; ///< codec version, very important in decoding process |
163 | | int compression_level; ///< compression levels |
164 | | int fset; ///< which filter set to use (calculated from compression level) |
165 | | int flags; ///< global decoder flags |
166 | | |
167 | | uint32_t CRC; ///< signalled frame CRC |
168 | | uint32_t CRC_state; ///< accumulated CRC |
169 | | int frameflags; ///< frame flags |
170 | | APEPredictor predictor; ///< predictor used for final reconstruction |
171 | | APEPredictor64 predictor64; ///< 64bit predictor used for final reconstruction |
172 | | |
173 | | int32_t *decoded_buffer; |
174 | | int decoded_size; |
175 | | int32_t *decoded[MAX_CHANNELS]; ///< decoded data for each channel |
176 | | int32_t *interim_buffer; |
177 | | int interim_size; |
178 | | int32_t *interim[MAX_CHANNELS]; ///< decoded data for each channel |
179 | | int blocks_per_loop; ///< maximum number of samples to decode for each call |
180 | | |
181 | | int16_t* filterbuf[APE_FILTER_LEVELS]; ///< filter memory |
182 | | |
183 | | APERangecoder rc; ///< rangecoder used to decode actual values |
184 | | APERice riceX; ///< rice code parameters for the second channel |
185 | | APERice riceY; ///< rice code parameters for the first channel |
186 | | APEFilter filters[APE_FILTER_LEVELS][2]; ///< filters used for reconstruction |
187 | | GetBitContext gb; |
188 | | |
189 | | uint8_t *data; ///< current frame data |
190 | | uint8_t *data_end; ///< frame data end |
191 | | int data_size; ///< frame data allocated size |
192 | | const uint8_t *ptr; ///< current position in frame data |
193 | | |
194 | | int error; |
195 | | int interim_mode; |
196 | | |
197 | | void (*entropy_decode_mono)(struct APEContext *ctx, int blockstodecode); |
198 | | void (*entropy_decode_stereo)(struct APEContext *ctx, int blockstodecode); |
199 | | void (*predictor_decode_mono)(struct APEContext *ctx, int count); |
200 | | void (*predictor_decode_stereo)(struct APEContext *ctx, int count); |
201 | | } APEContext; |
202 | | |
203 | | static void ape_apply_filters(APEContext *ctx, int32_t *decoded0, |
204 | | int32_t *decoded1, int count); |
205 | | |
206 | | static void entropy_decode_mono_0000(APEContext *ctx, int blockstodecode); |
207 | | static void entropy_decode_stereo_0000(APEContext *ctx, int blockstodecode); |
208 | | static void entropy_decode_mono_3860(APEContext *ctx, int blockstodecode); |
209 | | static void entropy_decode_stereo_3860(APEContext *ctx, int blockstodecode); |
210 | | static void entropy_decode_mono_3900(APEContext *ctx, int blockstodecode); |
211 | | static void entropy_decode_stereo_3900(APEContext *ctx, int blockstodecode); |
212 | | static void entropy_decode_stereo_3930(APEContext *ctx, int blockstodecode); |
213 | | static void entropy_decode_mono_3990(APEContext *ctx, int blockstodecode); |
214 | | static void entropy_decode_stereo_3990(APEContext *ctx, int blockstodecode); |
215 | | |
216 | | static void predictor_decode_mono_3800(APEContext *ctx, int count); |
217 | | static void predictor_decode_stereo_3800(APEContext *ctx, int count); |
218 | | static void predictor_decode_mono_3930(APEContext *ctx, int count); |
219 | | static void predictor_decode_stereo_3930(APEContext *ctx, int count); |
220 | | static void predictor_decode_mono_3950(APEContext *ctx, int count); |
221 | | static void predictor_decode_stereo_3950(APEContext *ctx, int count); |
222 | | |
223 | | static av_cold int ape_decode_close(AVCodecContext *avctx) |
224 | 1.38k | { |
225 | 1.38k | APEContext *s = avctx->priv_data; |
226 | 1.38k | int i; |
227 | | |
228 | 5.54k | for (i = 0; i < APE_FILTER_LEVELS; i++) |
229 | 4.15k | av_freep(&s->filterbuf[i]); |
230 | | |
231 | 1.38k | av_freep(&s->decoded_buffer); |
232 | 1.38k | av_freep(&s->interim_buffer); |
233 | 1.38k | av_freep(&s->data); |
234 | 1.38k | s->decoded_size = s->data_size = 0; |
235 | | |
236 | 1.38k | return 0; |
237 | 1.38k | } |
238 | | |
239 | | static av_cold int ape_decode_init(AVCodecContext *avctx) |
240 | 1.38k | { |
241 | 1.38k | APEContext *s = avctx->priv_data; |
242 | 1.38k | int channels = avctx->ch_layout.nb_channels; |
243 | 1.38k | int i; |
244 | | |
245 | 1.38k | if (avctx->extradata_size != 6) { |
246 | 117 | av_log(avctx, AV_LOG_ERROR, "Incorrect extradata\n"); |
247 | 117 | return AVERROR(EINVAL); |
248 | 117 | } |
249 | 1.26k | if (channels > 2) { |
250 | 1 | av_log(avctx, AV_LOG_ERROR, "Only mono and stereo is supported\n"); |
251 | 1 | return AVERROR(EINVAL); |
252 | 1 | } |
253 | 1.26k | avctx->bits_per_raw_sample = |
254 | 1.26k | s->bps = avctx->bits_per_coded_sample; |
255 | 1.26k | switch (s->bps) { |
256 | 397 | case 8: |
257 | 397 | avctx->sample_fmt = AV_SAMPLE_FMT_U8P; |
258 | 397 | s->interim_mode = 0; |
259 | 397 | break; |
260 | 259 | case 16: |
261 | 259 | avctx->sample_fmt = AV_SAMPLE_FMT_S16P; |
262 | 259 | s->interim_mode = 0; |
263 | 259 | break; |
264 | 611 | case 24: |
265 | 611 | avctx->sample_fmt = AV_SAMPLE_FMT_S32P; |
266 | 611 | s->interim_mode = -1; |
267 | 611 | break; |
268 | 1 | default: |
269 | 1 | avpriv_request_sample(avctx, |
270 | 1 | "%d bits per coded sample", s->bps); |
271 | 1 | return AVERROR_PATCHWELCOME; |
272 | 1.26k | } |
273 | 1.26k | s->avctx = avctx; |
274 | 1.26k | s->channels = channels; |
275 | 1.26k | s->fileversion = AV_RL16(avctx->extradata); |
276 | 1.26k | s->compression_level = AV_RL16(avctx->extradata + 2); |
277 | 1.26k | s->flags = AV_RL16(avctx->extradata + 4); |
278 | | |
279 | 1.26k | av_log(avctx, AV_LOG_VERBOSE, "Compression Level: %d - Flags: %d\n", |
280 | 1.26k | s->compression_level, s->flags); |
281 | 1.26k | if (s->compression_level % 1000 || s->compression_level > COMPRESSION_LEVEL_INSANE || |
282 | 1.26k | !s->compression_level || |
283 | 1.26k | (s->fileversion < 3930 && s->compression_level == COMPRESSION_LEVEL_INSANE)) { |
284 | 4 | av_log(avctx, AV_LOG_ERROR, "Incorrect compression level %d\n", |
285 | 4 | s->compression_level); |
286 | 4 | return AVERROR_INVALIDDATA; |
287 | 4 | } |
288 | 1.26k | s->fset = s->compression_level / 1000 - 1; |
289 | 2.92k | for (i = 0; i < APE_FILTER_LEVELS; i++) { |
290 | 2.76k | if (!ape_filter_orders[s->fset][i]) |
291 | 1.10k | break; |
292 | 1.66k | if (!(s->filterbuf[i] = av_malloc((ape_filter_orders[s->fset][i] * 3 + HISTORY_SIZE) * 4))) |
293 | 0 | return AVERROR(ENOMEM); |
294 | 1.66k | } |
295 | | |
296 | 1.26k | if (s->fileversion < 3860) { |
297 | 477 | s->entropy_decode_mono = entropy_decode_mono_0000; |
298 | 477 | s->entropy_decode_stereo = entropy_decode_stereo_0000; |
299 | 786 | } else if (s->fileversion < 3900) { |
300 | 268 | s->entropy_decode_mono = entropy_decode_mono_3860; |
301 | 268 | s->entropy_decode_stereo = entropy_decode_stereo_3860; |
302 | 518 | } else if (s->fileversion < 3930) { |
303 | 65 | s->entropy_decode_mono = entropy_decode_mono_3900; |
304 | 65 | s->entropy_decode_stereo = entropy_decode_stereo_3900; |
305 | 453 | } else if (s->fileversion < 3990) { |
306 | 185 | s->entropy_decode_mono = entropy_decode_mono_3900; |
307 | 185 | s->entropy_decode_stereo = entropy_decode_stereo_3930; |
308 | 268 | } else { |
309 | 268 | s->entropy_decode_mono = entropy_decode_mono_3990; |
310 | 268 | s->entropy_decode_stereo = entropy_decode_stereo_3990; |
311 | 268 | } |
312 | | |
313 | 1.26k | if (s->fileversion < 3930) { |
314 | 810 | s->predictor_decode_mono = predictor_decode_mono_3800; |
315 | 810 | s->predictor_decode_stereo = predictor_decode_stereo_3800; |
316 | 810 | } else if (s->fileversion < 3950) { |
317 | 141 | s->predictor_decode_mono = predictor_decode_mono_3930; |
318 | 141 | s->predictor_decode_stereo = predictor_decode_stereo_3930; |
319 | 312 | } else { |
320 | 312 | s->predictor_decode_mono = predictor_decode_mono_3950; |
321 | 312 | s->predictor_decode_stereo = predictor_decode_stereo_3950; |
322 | 312 | } |
323 | | |
324 | 1.26k | ff_bswapdsp_init(&s->bdsp); |
325 | 1.26k | ff_llauddsp_init(&s->adsp); |
326 | 1.26k | av_channel_layout_uninit(&avctx->ch_layout); |
327 | 1.26k | avctx->ch_layout = (channels == 2) ? (AVChannelLayout)AV_CHANNEL_LAYOUT_STEREO |
328 | 1.26k | : (AVChannelLayout)AV_CHANNEL_LAYOUT_MONO; |
329 | | |
330 | 1.26k | return 0; |
331 | 1.26k | } |
332 | | |
333 | | /** |
334 | | * @name APE range decoding functions |
335 | | * @{ |
336 | | */ |
337 | | |
338 | 20.0M | #define CODE_BITS 32 |
339 | 20.0M | #define TOP_VALUE ((unsigned int)1 << (CODE_BITS-1)) |
340 | | #define SHIFT_BITS (CODE_BITS - 9) |
341 | 18.1k | #define EXTRA_BITS ((CODE_BITS-2) % 8 + 1) |
342 | 20.0M | #define BOTTOM_VALUE (TOP_VALUE >> 8) |
343 | | |
344 | | /** Start the decoder */ |
345 | | static inline void range_start_decoding(APEContext *ctx) |
346 | 9.07k | { |
347 | 9.07k | ctx->rc.buffer = bytestream_get_byte(&ctx->ptr); |
348 | 9.07k | ctx->rc.low = ctx->rc.buffer >> (8 - EXTRA_BITS); |
349 | 9.07k | ctx->rc.range = (uint32_t) 1 << EXTRA_BITS; |
350 | 9.07k | } |
351 | | |
352 | | /** Perform normalization */ |
353 | | static inline void range_dec_normalize(APEContext *ctx) |
354 | 14.5M | { |
355 | 20.0M | while (ctx->rc.range <= BOTTOM_VALUE) { |
356 | 5.44M | ctx->rc.buffer <<= 8; |
357 | 5.44M | if(ctx->ptr < ctx->data_end) { |
358 | 692k | ctx->rc.buffer += *ctx->ptr; |
359 | 692k | ctx->ptr++; |
360 | 4.75M | } else { |
361 | 4.75M | ctx->error = 1; |
362 | 4.75M | } |
363 | 5.44M | ctx->rc.low = (ctx->rc.low << 8) | ((ctx->rc.buffer >> 1) & 0xFF); |
364 | 5.44M | ctx->rc.range <<= 8; |
365 | 5.44M | } |
366 | 14.5M | } |
367 | | |
368 | | /** |
369 | | * Calculate cumulative frequency for next symbol. Does NO update! |
370 | | * @param ctx decoder context |
371 | | * @param tot_f is the total frequency or (code_value)1<<shift |
372 | | * @return the cumulative frequency |
373 | | */ |
374 | | static inline int range_decode_culfreq(APEContext *ctx, int tot_f) |
375 | 3.76M | { |
376 | 3.76M | range_dec_normalize(ctx); |
377 | 3.76M | ctx->rc.help = ctx->rc.range / tot_f; |
378 | 3.76M | return ctx->rc.low / ctx->rc.help; |
379 | 3.76M | } |
380 | | |
381 | | /** |
382 | | * Decode value with given size in bits |
383 | | * @param ctx decoder context |
384 | | * @param shift number of bits to decode |
385 | | */ |
386 | | static inline int range_decode_culshift(APEContext *ctx, int shift) |
387 | 10.8M | { |
388 | 10.8M | range_dec_normalize(ctx); |
389 | 10.8M | ctx->rc.help = ctx->rc.range >> shift; |
390 | 10.8M | return ctx->rc.low / ctx->rc.help; |
391 | 10.8M | } |
392 | | |
393 | | |
394 | | /** |
395 | | * Update decoding state |
396 | | * @param ctx decoder context |
397 | | * @param sy_f the interval length (frequency of the symbol) |
398 | | * @param lt_f the lower end (frequency sum of < symbols) |
399 | | */ |
400 | | static inline void range_decode_update(APEContext *ctx, int sy_f, int lt_f) |
401 | 14.5M | { |
402 | 14.5M | ctx->rc.low -= ctx->rc.help * lt_f; |
403 | 14.5M | ctx->rc.range = ctx->rc.help * sy_f; |
404 | 14.5M | } |
405 | | |
406 | | /** Decode n bits (n <= 16) without modelling */ |
407 | | static inline int range_decode_bits(APEContext *ctx, int n) |
408 | 3.62M | { |
409 | 3.62M | int sym = range_decode_culshift(ctx, n); |
410 | 3.62M | range_decode_update(ctx, 1, sym); |
411 | 3.62M | return sym; |
412 | 3.62M | } |
413 | | |
414 | | |
415 | 7.17M | #define MODEL_ELEMENTS 64 |
416 | | |
417 | | /** |
418 | | * Fixed probabilities for symbols in Monkey Audio version 3.97 |
419 | | */ |
420 | | static const uint16_t counts_3970[22] = { |
421 | | 0, 14824, 28224, 39348, 47855, 53994, 58171, 60926, |
422 | | 62682, 63786, 64463, 64878, 65126, 65276, 65365, 65419, |
423 | | 65450, 65469, 65480, 65487, 65491, 65493, |
424 | | }; |
425 | | |
426 | | /** |
427 | | * Probability ranges for symbols in Monkey Audio version 3.97 |
428 | | */ |
429 | | static const uint16_t counts_diff_3970[21] = { |
430 | | 14824, 13400, 11124, 8507, 6139, 4177, 2755, 1756, |
431 | | 1104, 677, 415, 248, 150, 89, 54, 31, |
432 | | 19, 11, 7, 4, 2, |
433 | | }; |
434 | | |
435 | | /** |
436 | | * Fixed probabilities for symbols in Monkey Audio version 3.98 |
437 | | */ |
438 | | static const uint16_t counts_3980[22] = { |
439 | | 0, 19578, 36160, 48417, 56323, 60899, 63265, 64435, |
440 | | 64971, 65232, 65351, 65416, 65447, 65466, 65476, 65482, |
441 | | 65485, 65488, 65490, 65491, 65492, 65493, |
442 | | }; |
443 | | |
444 | | /** |
445 | | * Probability ranges for symbols in Monkey Audio version 3.98 |
446 | | */ |
447 | | static const uint16_t counts_diff_3980[21] = { |
448 | | 19578, 16582, 12257, 7906, 4576, 2366, 1170, 536, |
449 | | 261, 119, 65, 31, 19, 10, 6, 3, |
450 | | 3, 2, 1, 1, 1, |
451 | | }; |
452 | | |
453 | | /** |
454 | | * Decode symbol |
455 | | * @param ctx decoder context |
456 | | * @param counts probability range start position |
457 | | * @param counts_diff probability range widths |
458 | | */ |
459 | | static inline int range_get_symbol(APEContext *ctx, |
460 | | const uint16_t counts[], |
461 | | const uint16_t counts_diff[]) |
462 | 7.17M | { |
463 | 7.17M | int symbol, cf; |
464 | | |
465 | 7.17M | cf = range_decode_culshift(ctx, 16); |
466 | | |
467 | 7.17M | if(cf > 65492){ |
468 | 5.95k | symbol= cf - 65535 + 63; |
469 | 5.95k | range_decode_update(ctx, 1, cf); |
470 | 5.95k | if(cf > 65535) |
471 | 1.59k | ctx->error=1; |
472 | 5.95k | return symbol; |
473 | 5.95k | } |
474 | | /* figure out the symbol inefficiently; a binary search would be much better */ |
475 | 12.1M | for (symbol = 0; counts[symbol + 1] <= cf; symbol++); |
476 | | |
477 | 7.17M | range_decode_update(ctx, counts_diff[symbol], counts[symbol]); |
478 | | |
479 | 7.17M | return symbol; |
480 | 7.17M | } |
481 | | /** @} */ // group rangecoder |
482 | | |
483 | | static inline void update_rice(APERice *rice, unsigned int x) |
484 | 7.17M | { |
485 | 7.17M | int lim = rice->k ? (1 << (rice->k + 4)) : 0; |
486 | 7.17M | rice->ksum += ((x + 1) / 2) - ((rice->ksum + 16) >> 5); |
487 | | |
488 | 7.17M | if (rice->ksum < lim) |
489 | 44.8k | rice->k--; |
490 | 7.13M | else if (rice->ksum >= (1 << (rice->k + 5)) && rice->k < 24) |
491 | 28.1k | rice->k++; |
492 | 7.17M | } |
493 | | |
494 | | static inline int get_rice_ook(GetBitContext *gb, int k) |
495 | 1.76M | { |
496 | 1.76M | unsigned int x; |
497 | | |
498 | 1.76M | x = get_unary(gb, 1, get_bits_left(gb)); |
499 | | |
500 | 1.76M | if (k) |
501 | 671k | x = (x << k) | get_bits(gb, k); |
502 | | |
503 | 1.76M | return x; |
504 | 1.76M | } |
505 | | |
506 | | static inline int ape_decode_value_3860(APEContext *ctx, GetBitContext *gb, |
507 | | APERice *rice) |
508 | 2.06M | { |
509 | 2.06M | unsigned int x, overflow; |
510 | | |
511 | 2.06M | overflow = get_unary(gb, 1, get_bits_left(gb)); |
512 | | |
513 | 2.06M | if (ctx->fileversion > 3880) { |
514 | 3.37M | while (overflow >= 16) { |
515 | 2.41M | overflow -= 16; |
516 | 2.41M | rice->k += 4; |
517 | 2.41M | } |
518 | 958k | } |
519 | | |
520 | 2.06M | if (!rice->k) |
521 | 1.00M | x = overflow; |
522 | 1.06M | else if(rice->k <= MIN_CACHE_BITS) { |
523 | 640k | x = (overflow << rice->k) + get_bits(gb, rice->k); |
524 | 640k | } else { |
525 | 420k | av_log(ctx->avctx, AV_LOG_ERROR, "Too many bits: %"PRIu32"\n", rice->k); |
526 | 420k | ctx->error = 1; |
527 | 420k | return AVERROR_INVALIDDATA; |
528 | 420k | } |
529 | 1.64M | rice->ksum += x - (rice->ksum + 8 >> 4); |
530 | 1.64M | if (rice->ksum < (rice->k ? 1 << (rice->k + 4) : 0)) |
531 | 73.4k | rice->k--; |
532 | 1.57M | else if (rice->ksum >= (1 << (rice->k + 5)) && rice->k < 24) |
533 | 4.17k | rice->k++; |
534 | | |
535 | | /* Convert to signed */ |
536 | 1.64M | return ((x >> 1) ^ ((x & 1) - 1)) + 1; |
537 | 2.06M | } |
538 | | |
539 | | static inline int ape_decode_value_3900(APEContext *ctx, APERice *rice) |
540 | 3.54M | { |
541 | 3.54M | unsigned int x, overflow; |
542 | 3.54M | int tmpk; |
543 | | |
544 | 3.54M | overflow = range_get_symbol(ctx, counts_3970, counts_diff_3970); |
545 | | |
546 | 3.54M | if (overflow == (MODEL_ELEMENTS - 1)) { |
547 | 2.14k | tmpk = range_decode_bits(ctx, 5); |
548 | 2.14k | overflow = 0; |
549 | 2.14k | } else |
550 | 3.54M | tmpk = (rice->k < 1) ? 0 : rice->k - 1; |
551 | | |
552 | 3.54M | if (tmpk <= 16 || ctx->fileversion < 3910) { |
553 | 3.46M | if (tmpk > 23) { |
554 | 462 | av_log(ctx->avctx, AV_LOG_ERROR, "Too many bits: %d\n", tmpk); |
555 | 462 | return AVERROR_INVALIDDATA; |
556 | 462 | } |
557 | 3.46M | x = range_decode_bits(ctx, tmpk); |
558 | 3.46M | } else if (tmpk <= 31) { |
559 | 78.3k | x = range_decode_bits(ctx, 16); |
560 | 78.3k | x |= (range_decode_bits(ctx, tmpk - 16) << 16); |
561 | 78.3k | } else { |
562 | 0 | av_log(ctx->avctx, AV_LOG_ERROR, "Too many bits: %d\n", tmpk); |
563 | 0 | return AVERROR_INVALIDDATA; |
564 | 0 | } |
565 | 3.54M | x += overflow << tmpk; |
566 | | |
567 | 3.54M | update_rice(rice, x); |
568 | | |
569 | | /* Convert to signed */ |
570 | 3.54M | return ((x >> 1) ^ ((x & 1) - 1)) + 1; |
571 | 3.54M | } |
572 | | |
573 | | static inline int ape_decode_value_3990(APEContext *ctx, APERice *rice) |
574 | 3.63M | { |
575 | 3.63M | unsigned int x, overflow, pivot; |
576 | 3.63M | int base; |
577 | | |
578 | 3.63M | pivot = FFMAX(rice->ksum >> 5, 1); |
579 | | |
580 | 3.63M | overflow = range_get_symbol(ctx, counts_3980, counts_diff_3980); |
581 | | |
582 | 3.63M | if (overflow == (MODEL_ELEMENTS - 1)) { |
583 | 878 | overflow = (unsigned)range_decode_bits(ctx, 16) << 16; |
584 | 878 | overflow |= range_decode_bits(ctx, 16); |
585 | 878 | } |
586 | | |
587 | 3.63M | if (pivot < 0x10000) { |
588 | 3.49M | base = range_decode_culfreq(ctx, pivot); |
589 | 3.49M | range_decode_update(ctx, 1, base); |
590 | 3.49M | } else { |
591 | 133k | int base_hi = pivot, base_lo; |
592 | 133k | int bbits = 0; |
593 | | |
594 | 969k | while (base_hi & ~0xFFFF) { |
595 | 836k | base_hi >>= 1; |
596 | 836k | bbits++; |
597 | 836k | } |
598 | 133k | base_hi = range_decode_culfreq(ctx, base_hi + 1); |
599 | 133k | range_decode_update(ctx, 1, base_hi); |
600 | 133k | base_lo = range_decode_culfreq(ctx, 1 << bbits); |
601 | 133k | range_decode_update(ctx, 1, base_lo); |
602 | | |
603 | 133k | base = (base_hi << bbits) + base_lo; |
604 | 133k | } |
605 | | |
606 | 3.63M | x = base + overflow * pivot; |
607 | | |
608 | 3.63M | update_rice(rice, x); |
609 | | |
610 | | /* Convert to signed */ |
611 | 3.63M | return ((x >> 1) ^ ((x & 1) - 1)) + 1; |
612 | 3.63M | } |
613 | | |
614 | | static int get_k(int ksum) |
615 | 1.40M | { |
616 | 1.40M | return av_log2(ksum) + !!ksum; |
617 | 1.40M | } |
618 | | |
619 | | static void decode_array_0000(APEContext *ctx, GetBitContext *gb, |
620 | | int32_t *out, APERice *rice, int blockstodecode) |
621 | 133k | { |
622 | 133k | int i; |
623 | 133k | unsigned ksummax, ksummin; |
624 | | |
625 | 133k | rice->ksum = 0; |
626 | 472k | for (i = 0; i < FFMIN(blockstodecode, 5); i++) { |
627 | 339k | out[i] = get_rice_ook(&ctx->gb, 10); |
628 | 339k | rice->ksum += out[i]; |
629 | 339k | } |
630 | | |
631 | 133k | if (blockstodecode <= 5) |
632 | 108k | goto end; |
633 | | |
634 | 24.2k | rice->k = get_k(rice->ksum / 10); |
635 | 24.2k | if (rice->k >= 24) |
636 | 135 | return; |
637 | 1.38M | for (; i < FFMIN(blockstodecode, 64); i++) { |
638 | 1.35M | out[i] = get_rice_ook(&ctx->gb, rice->k); |
639 | 1.35M | rice->ksum += out[i]; |
640 | 1.35M | rice->k = get_k(rice->ksum / ((i + 1) * 2)); |
641 | 1.35M | if (rice->k >= 24) |
642 | 518 | return; |
643 | 1.35M | } |
644 | | |
645 | 23.6k | if (blockstodecode <= 64) |
646 | 3.04k | goto end; |
647 | | |
648 | 20.5k | rice->k = get_k(rice->ksum >> 7); |
649 | 20.5k | ksummax = 1 << rice->k + 7; |
650 | 20.5k | ksummin = rice->k ? (1 << rice->k + 6) : 0; |
651 | 89.0k | for (; i < blockstodecode; i++) { |
652 | 88.7k | if (get_bits_left(&ctx->gb) < 1) { |
653 | 19.8k | ctx->error = 1; |
654 | 19.8k | return; |
655 | 19.8k | } |
656 | 68.8k | out[i] = get_rice_ook(&ctx->gb, rice->k); |
657 | 68.8k | rice->ksum += out[i] - (unsigned)out[i - 64]; |
658 | 70.7k | while (rice->ksum < ksummin) { |
659 | 1.89k | rice->k--; |
660 | 1.89k | ksummin = rice->k ? ksummin >> 1 : 0; |
661 | 1.89k | ksummax >>= 1; |
662 | 1.89k | } |
663 | 72.9k | while (rice->ksum >= ksummax) { |
664 | 4.53k | rice->k++; |
665 | 4.53k | if (rice->k > 24) |
666 | 459 | return; |
667 | 4.07k | ksummax <<= 1; |
668 | 4.07k | ksummin = ksummin ? ksummin << 1 : 128; |
669 | 4.07k | } |
670 | 68.8k | } |
671 | | |
672 | 112k | end: |
673 | 515k | for (i = 0; i < blockstodecode; i++) |
674 | 403k | out[i] = ((out[i] >> 1) ^ ((out[i] & 1) - 1)) + 1; |
675 | 112k | } |
676 | | |
677 | | static void entropy_decode_mono_0000(APEContext *ctx, int blockstodecode) |
678 | 13.4k | { |
679 | 13.4k | decode_array_0000(ctx, &ctx->gb, ctx->decoded[0], &ctx->riceY, |
680 | 13.4k | blockstodecode); |
681 | 13.4k | } |
682 | | |
683 | | static void entropy_decode_stereo_0000(APEContext *ctx, int blockstodecode) |
684 | 59.8k | { |
685 | 59.8k | decode_array_0000(ctx, &ctx->gb, ctx->decoded[0], &ctx->riceY, |
686 | 59.8k | blockstodecode); |
687 | 59.8k | decode_array_0000(ctx, &ctx->gb, ctx->decoded[1], &ctx->riceX, |
688 | 59.8k | blockstodecode); |
689 | 59.8k | } |
690 | | |
691 | | static void entropy_decode_mono_3860(APEContext *ctx, int blockstodecode) |
692 | 2.96k | { |
693 | 2.96k | int32_t *decoded0 = ctx->decoded[0]; |
694 | | |
695 | 1.18M | while (blockstodecode--) |
696 | 1.17M | *decoded0++ = ape_decode_value_3860(ctx, &ctx->gb, &ctx->riceY); |
697 | 2.96k | } |
698 | | |
699 | | static void entropy_decode_stereo_3860(APEContext *ctx, int blockstodecode) |
700 | 10.0k | { |
701 | 10.0k | int32_t *decoded0 = ctx->decoded[0]; |
702 | 10.0k | int32_t *decoded1 = ctx->decoded[1]; |
703 | 10.0k | int blocks = blockstodecode; |
704 | | |
705 | 453k | while (blockstodecode--) |
706 | 443k | *decoded0++ = ape_decode_value_3860(ctx, &ctx->gb, &ctx->riceY); |
707 | 453k | while (blocks--) |
708 | 443k | *decoded1++ = ape_decode_value_3860(ctx, &ctx->gb, &ctx->riceX); |
709 | 10.0k | } |
710 | | |
711 | | static void entropy_decode_mono_3900(APEContext *ctx, int blockstodecode) |
712 | 1.70k | { |
713 | 1.70k | int32_t *decoded0 = ctx->decoded[0]; |
714 | | |
715 | 1.02M | while (blockstodecode--) |
716 | 1.02M | *decoded0++ = ape_decode_value_3900(ctx, &ctx->riceY); |
717 | 1.70k | } |
718 | | |
719 | | static void entropy_decode_stereo_3900(APEContext *ctx, int blockstodecode) |
720 | 1.07k | { |
721 | 1.07k | int32_t *decoded0 = ctx->decoded[0]; |
722 | 1.07k | int32_t *decoded1 = ctx->decoded[1]; |
723 | 1.07k | int blocks = blockstodecode; |
724 | | |
725 | 381k | while (blockstodecode--) |
726 | 379k | *decoded0++ = ape_decode_value_3900(ctx, &ctx->riceY); |
727 | 1.07k | range_dec_normalize(ctx); |
728 | | // because of some implementation peculiarities we need to backpedal here |
729 | 1.07k | ctx->ptr -= 1; |
730 | 1.07k | range_start_decoding(ctx); |
731 | 381k | while (blocks--) |
732 | 379k | *decoded1++ = ape_decode_value_3900(ctx, &ctx->riceX); |
733 | 1.07k | } |
734 | | |
735 | | static void entropy_decode_stereo_3930(APEContext *ctx, int blockstodecode) |
736 | 1.45k | { |
737 | 1.45k | int32_t *decoded0 = ctx->decoded[0]; |
738 | 1.45k | int32_t *decoded1 = ctx->decoded[1]; |
739 | | |
740 | 880k | while (blockstodecode--) { |
741 | 879k | *decoded0++ = ape_decode_value_3900(ctx, &ctx->riceY); |
742 | 879k | *decoded1++ = ape_decode_value_3900(ctx, &ctx->riceX); |
743 | 879k | } |
744 | 1.45k | } |
745 | | |
746 | | static void entropy_decode_mono_3990(APEContext *ctx, int blockstodecode) |
747 | 1.06k | { |
748 | 1.06k | int32_t *decoded0 = ctx->decoded[0]; |
749 | | |
750 | 686k | while (blockstodecode--) |
751 | 685k | *decoded0++ = ape_decode_value_3990(ctx, &ctx->riceY); |
752 | 1.06k | } |
753 | | |
754 | | static void entropy_decode_stereo_3990(APEContext *ctx, int blockstodecode) |
755 | 2.17k | { |
756 | 2.17k | int32_t *decoded0 = ctx->decoded[0]; |
757 | 2.17k | int32_t *decoded1 = ctx->decoded[1]; |
758 | | |
759 | 1.47M | while (blockstodecode--) { |
760 | 1.47M | *decoded0++ = ape_decode_value_3990(ctx, &ctx->riceY); |
761 | 1.47M | *decoded1++ = ape_decode_value_3990(ctx, &ctx->riceX); |
762 | 1.47M | } |
763 | 2.17k | } |
764 | | |
765 | | static int init_entropy_decoder(APEContext *ctx) |
766 | 97.9k | { |
767 | | /* Read the CRC */ |
768 | 97.9k | if (ctx->fileversion >= 3900) { |
769 | 9.55k | if (ctx->data_end - ctx->ptr < 6) |
770 | 1.18k | return AVERROR_INVALIDDATA; |
771 | 8.36k | ctx->CRC = bytestream_get_be32(&ctx->ptr); |
772 | 88.4k | } else { |
773 | 88.4k | ctx->CRC = get_bits_long(&ctx->gb, 32); |
774 | 88.4k | } |
775 | | |
776 | | /* Read the frame flags if they exist */ |
777 | 96.8k | ctx->frameflags = 0; |
778 | 96.8k | ctx->CRC_state = UINT32_MAX; |
779 | 96.8k | if ((ctx->fileversion > 3820) && (ctx->CRC & 0x80000000)) { |
780 | 2.91k | ctx->CRC &= ~0x80000000; |
781 | | |
782 | 2.91k | if (ctx->data_end - ctx->ptr < 6) |
783 | 363 | return AVERROR_INVALIDDATA; |
784 | 2.55k | ctx->frameflags = bytestream_get_be32(&ctx->ptr); |
785 | 2.55k | } |
786 | | |
787 | | /* Initialize the rice structs */ |
788 | 96.4k | ctx->riceX.k = 10; |
789 | 96.4k | ctx->riceX.ksum = (1 << ctx->riceX.k) * 16; |
790 | 96.4k | ctx->riceY.k = 10; |
791 | 96.4k | ctx->riceY.ksum = (1 << ctx->riceY.k) * 16; |
792 | | |
793 | 96.4k | if (ctx->fileversion >= 3900) { |
794 | | /* The first 8 bits of input are ignored. */ |
795 | 8.00k | ctx->ptr++; |
796 | | |
797 | 8.00k | range_start_decoding(ctx); |
798 | 8.00k | } |
799 | | |
800 | 96.4k | return 0; |
801 | 96.8k | } |
802 | | |
803 | | static const int32_t initial_coeffs_fast_3320[1] = { |
804 | | 375, |
805 | | }; |
806 | | |
807 | | static const int32_t initial_coeffs_a_3800[3] = { |
808 | | 64, 115, 64, |
809 | | }; |
810 | | |
811 | | static const int32_t initial_coeffs_b_3800[2] = { |
812 | | 740, 0 |
813 | | }; |
814 | | |
815 | | static const int32_t initial_coeffs_3930[4] = { |
816 | | 360, 317, -109, 98 |
817 | | }; |
818 | | |
819 | | static const int64_t initial_coeffs_3930_64bit[4] = { |
820 | | 360, 317, -109, 98 |
821 | | }; |
822 | | |
823 | | static void init_predictor_decoder(APEContext *ctx) |
824 | 96.4k | { |
825 | 96.4k | APEPredictor *p = &ctx->predictor; |
826 | 96.4k | APEPredictor64 *p64 = &ctx->predictor64; |
827 | | |
828 | | /* Zero the history buffers */ |
829 | 96.4k | memset(p->historybuffer, 0, PREDICTOR_SIZE * sizeof(*p->historybuffer)); |
830 | 96.4k | memset(p64->historybuffer, 0, PREDICTOR_SIZE * sizeof(*p64->historybuffer)); |
831 | 96.4k | p->buf = p->historybuffer; |
832 | 96.4k | p64->buf = p64->historybuffer; |
833 | | |
834 | | /* Initialize and zero the coefficients */ |
835 | 96.4k | if (ctx->fileversion < 3930) { |
836 | 89.6k | if (ctx->compression_level == COMPRESSION_LEVEL_FAST) { |
837 | 25.2k | memcpy(p->coeffsA[0], initial_coeffs_fast_3320, |
838 | 25.2k | sizeof(initial_coeffs_fast_3320)); |
839 | 25.2k | memcpy(p->coeffsA[1], initial_coeffs_fast_3320, |
840 | 25.2k | sizeof(initial_coeffs_fast_3320)); |
841 | 64.3k | } else { |
842 | 64.3k | memcpy(p->coeffsA[0], initial_coeffs_a_3800, |
843 | 64.3k | sizeof(initial_coeffs_a_3800)); |
844 | 64.3k | memcpy(p->coeffsA[1], initial_coeffs_a_3800, |
845 | 64.3k | sizeof(initial_coeffs_a_3800)); |
846 | 64.3k | } |
847 | 89.6k | } else { |
848 | 6.79k | memcpy(p->coeffsA[0], initial_coeffs_3930, sizeof(initial_coeffs_3930)); |
849 | 6.79k | memcpy(p->coeffsA[1], initial_coeffs_3930, sizeof(initial_coeffs_3930)); |
850 | 6.79k | memcpy(p64->coeffsA[0], initial_coeffs_3930_64bit, sizeof(initial_coeffs_3930_64bit)); |
851 | 6.79k | memcpy(p64->coeffsA[1], initial_coeffs_3930_64bit, sizeof(initial_coeffs_3930_64bit)); |
852 | 6.79k | } |
853 | 96.4k | memset(p->coeffsB, 0, sizeof(p->coeffsB)); |
854 | 96.4k | memset(p64->coeffsB, 0, sizeof(p64->coeffsB)); |
855 | 96.4k | if (ctx->fileversion < 3930) { |
856 | 89.6k | memcpy(p->coeffsB[0], initial_coeffs_b_3800, |
857 | 89.6k | sizeof(initial_coeffs_b_3800)); |
858 | 89.6k | memcpy(p->coeffsB[1], initial_coeffs_b_3800, |
859 | 89.6k | sizeof(initial_coeffs_b_3800)); |
860 | 89.6k | } |
861 | | |
862 | 96.4k | p->filterA[0] = p->filterA[1] = 0; |
863 | 96.4k | p->filterB[0] = p->filterB[1] = 0; |
864 | 96.4k | p->lastA[0] = p->lastA[1] = 0; |
865 | | |
866 | 96.4k | p64->filterA[0] = p64->filterA[1] = 0; |
867 | 96.4k | p64->filterB[0] = p64->filterB[1] = 0; |
868 | 96.4k | p64->lastA[0] = p64->lastA[1] = 0; |
869 | | |
870 | 96.4k | p->sample_pos = 0; |
871 | 96.4k | } |
872 | | |
873 | | /** Get inverse sign of integer (-1 for positive, 1 for negative and 0 for zero) */ |
874 | 10.9M | static inline int APESIGN(int32_t x) { |
875 | 10.9M | return (x < 0) - (x > 0); |
876 | 10.9M | } |
877 | | |
878 | | static av_always_inline int filter_fast_3320(APEPredictor *p, |
879 | | const int decoded, const int filter, |
880 | | const int delayA) |
881 | 820k | { |
882 | 820k | int32_t predictionA; |
883 | | |
884 | 820k | p->buf[delayA] = p->lastA[filter]; |
885 | 820k | if (p->sample_pos < 3) { |
886 | 40.9k | p->lastA[filter] = decoded; |
887 | 40.9k | p->filterA[filter] = decoded; |
888 | 40.9k | return decoded; |
889 | 40.9k | } |
890 | | |
891 | 779k | predictionA = p->buf[delayA] * 2U - p->buf[delayA - 1]; |
892 | 779k | p->lastA[filter] = decoded + (unsigned)((int32_t)(predictionA * p->coeffsA[filter][0]) >> 9); |
893 | | |
894 | 779k | if ((decoded ^ predictionA) > 0) |
895 | 97.0k | p->coeffsA[filter][0]++; |
896 | 682k | else |
897 | 682k | p->coeffsA[filter][0]--; |
898 | | |
899 | 779k | p->filterA[filter] += (unsigned)p->lastA[filter]; |
900 | | |
901 | 779k | return p->filterA[filter]; |
902 | 820k | } |
903 | | |
904 | | static av_always_inline int filter_3800(APEPredictor *p, |
905 | | const unsigned decoded, const int filter, |
906 | | const int delayA, const int delayB, |
907 | | const int start, const int shift) |
908 | 2.92M | { |
909 | 2.92M | int32_t predictionA, predictionB, sign; |
910 | 2.92M | int32_t d0, d1, d2, d3, d4; |
911 | | |
912 | 2.92M | p->buf[delayA] = p->lastA[filter]; |
913 | 2.92M | p->buf[delayB] = p->filterB[filter]; |
914 | 2.92M | if (p->sample_pos < start) { |
915 | 775k | predictionA = decoded + p->filterA[filter]; |
916 | 775k | p->lastA[filter] = decoded; |
917 | 775k | p->filterB[filter] = decoded; |
918 | 775k | p->filterA[filter] = predictionA; |
919 | 775k | return predictionA; |
920 | 775k | } |
921 | 2.14M | d2 = p->buf[delayA]; |
922 | 2.14M | d1 = (p->buf[delayA] - (unsigned)p->buf[delayA - 1]) * 2; |
923 | 2.14M | d0 = p->buf[delayA] + ((p->buf[delayA - 2] - (unsigned)p->buf[delayA - 1]) * 8); |
924 | 2.14M | d3 = p->buf[delayB] * 2U - p->buf[delayB - 1]; |
925 | 2.14M | d4 = p->buf[delayB]; |
926 | | |
927 | 2.14M | predictionA = d0 * p->coeffsA[filter][0] + |
928 | 2.14M | d1 * p->coeffsA[filter][1] + |
929 | 2.14M | d2 * p->coeffsA[filter][2]; |
930 | | |
931 | 2.14M | sign = APESIGN(decoded); |
932 | 2.14M | p->coeffsA[filter][0] += (((d0 >> 30) & 2) - 1) * sign; |
933 | 2.14M | p->coeffsA[filter][1] += (((d1 >> 28) & 8) - 4) * sign; |
934 | 2.14M | p->coeffsA[filter][2] += (((d2 >> 28) & 8) - 4) * sign; |
935 | | |
936 | 2.14M | predictionB = d3 * p->coeffsB[filter][0] - |
937 | 2.14M | d4 * p->coeffsB[filter][1]; |
938 | 2.14M | p->lastA[filter] = decoded + (predictionA >> 11); |
939 | 2.14M | sign = APESIGN(p->lastA[filter]); |
940 | 2.14M | p->coeffsB[filter][0] += (((d3 >> 29) & 4) - 2) * sign; |
941 | 2.14M | p->coeffsB[filter][1] -= (((d4 >> 30) & 2) - 1) * sign; |
942 | | |
943 | 2.14M | p->filterB[filter] = p->lastA[filter] + (unsigned)(predictionB >> shift); |
944 | 2.14M | p->filterA[filter] = p->filterB[filter] + (unsigned)((int)(p->filterA[filter] * 31U) >> 5); |
945 | | |
946 | 2.14M | return p->filterA[filter]; |
947 | 2.92M | } |
948 | | |
949 | | static void long_filter_high_3800(int32_t *buffer, int order, int shift, int length) |
950 | 117k | { |
951 | 117k | int i, j; |
952 | 117k | int32_t dotprod, sign; |
953 | 117k | int32_t coeffs[256], delay[256+256], *delayp = delay; |
954 | | |
955 | 117k | if (order >= length) |
956 | 113k | return; |
957 | | |
958 | 3.76k | memset(coeffs, 0, order * sizeof(*coeffs)); |
959 | 231k | for (i = 0; i < order; i++) |
960 | 228k | delay[i] = buffer[i]; |
961 | 2.15M | for (i = order; i < length; i++) { |
962 | 2.14M | dotprod = 0; |
963 | 2.14M | sign = APESIGN(buffer[i]); |
964 | 2.14M | if (sign == 1) { |
965 | 1.01M | for (j = 0; j < order; j++) { |
966 | 1.00M | dotprod += delayp[j] * (unsigned)coeffs[j]; |
967 | 1.00M | coeffs[j] += (delayp[j] >> 31) | 1; |
968 | 1.00M | } |
969 | 2.14M | } else if (sign == -1) { |
970 | 2.85M | for (j = 0; j < order; j++) { |
971 | 2.82M | dotprod += delayp[j] * (unsigned)coeffs[j]; |
972 | 2.82M | coeffs[j] -= (delayp[j] >> 31) | 1; |
973 | 2.82M | } |
974 | 2.10M | } else { |
975 | 291M | for (j = 0; j < order; j++) { |
976 | 289M | dotprod += delayp[j] * (unsigned)coeffs[j]; |
977 | 289M | } |
978 | 2.10M | } |
979 | 2.14M | buffer[i] -= (unsigned)(dotprod >> shift); |
980 | 2.14M | delayp ++; |
981 | 2.14M | delayp[order - 1] = buffer[i]; |
982 | 2.14M | if (delayp - delay == 256) { |
983 | 7.56k | memcpy(delay, delayp, sizeof(*delay)*256); |
984 | 7.56k | delayp = delay; |
985 | 7.56k | } |
986 | 2.14M | } |
987 | 3.76k | } |
988 | | |
989 | | static void long_filter_ehigh_3830(int32_t *buffer, int length) |
990 | 4.38k | { |
991 | 4.38k | int i, j; |
992 | 4.38k | int32_t dotprod, sign; |
993 | 4.38k | int32_t delay[8] = { 0 }; |
994 | 4.38k | uint32_t coeffs[8] = { 0 }; |
995 | | |
996 | 678k | for (i = 0; i < length; i++) { |
997 | 674k | dotprod = 0; |
998 | 674k | sign = APESIGN(buffer[i]); |
999 | 6.07M | for (j = 7; j >= 0; j--) { |
1000 | 5.39M | dotprod += delay[j] * coeffs[j]; |
1001 | 5.39M | coeffs[j] += ((delay[j] >> 31) | 1) * sign; |
1002 | 5.39M | } |
1003 | 5.39M | for (j = 7; j > 0; j--) |
1004 | 4.72M | delay[j] = delay[j - 1]; |
1005 | 674k | delay[0] = buffer[i]; |
1006 | 674k | buffer[i] -= (unsigned)(dotprod >> 9); |
1007 | 674k | } |
1008 | 4.38k | } |
1009 | | |
1010 | | static void predictor_decode_stereo_3800(APEContext *ctx, int count) |
1011 | 66.3k | { |
1012 | 66.3k | APEPredictor *p = &ctx->predictor; |
1013 | 66.3k | int32_t *decoded0 = ctx->decoded[0]; |
1014 | 66.3k | int32_t *decoded1 = ctx->decoded[1]; |
1015 | 66.3k | int start = 4, shift = 10; |
1016 | | |
1017 | 66.3k | if (ctx->compression_level == COMPRESSION_LEVEL_HIGH) { |
1018 | 1.48k | start = 16; |
1019 | 1.48k | long_filter_high_3800(decoded0, 16, 9, count); |
1020 | 1.48k | long_filter_high_3800(decoded1, 16, 9, count); |
1021 | 64.8k | } else if (ctx->compression_level == COMPRESSION_LEVEL_EXTRA_HIGH) { |
1022 | 56.0k | int order = 128, shift2 = 11; |
1023 | | |
1024 | 56.0k | if (ctx->fileversion >= 3830) { |
1025 | 1.48k | order <<= 1; |
1026 | 1.48k | shift++; |
1027 | 1.48k | shift2++; |
1028 | 1.48k | long_filter_ehigh_3830(decoded0 + order, count - order); |
1029 | 1.48k | long_filter_ehigh_3830(decoded1 + order, count - order); |
1030 | 1.48k | } |
1031 | 56.0k | start = order; |
1032 | 56.0k | long_filter_high_3800(decoded0, order, shift2, count); |
1033 | 56.0k | long_filter_high_3800(decoded1, order, shift2, count); |
1034 | 56.0k | } |
1035 | | |
1036 | 1.32M | while (count--) { |
1037 | 1.26M | int X = *decoded0, Y = *decoded1; |
1038 | 1.26M | if (ctx->compression_level == COMPRESSION_LEVEL_FAST) { |
1039 | 233k | *decoded0 = filter_fast_3320(p, Y, 0, YDELAYA); |
1040 | 233k | decoded0++; |
1041 | 233k | *decoded1 = filter_fast_3320(p, X, 1, XDELAYA); |
1042 | 233k | decoded1++; |
1043 | 1.02M | } else { |
1044 | 1.02M | *decoded0 = filter_3800(p, Y, 0, YDELAYA, YDELAYB, |
1045 | 1.02M | start, shift); |
1046 | 1.02M | decoded0++; |
1047 | 1.02M | *decoded1 = filter_3800(p, X, 1, XDELAYA, XDELAYB, |
1048 | 1.02M | start, shift); |
1049 | 1.02M | decoded1++; |
1050 | 1.02M | } |
1051 | | |
1052 | | /* Combined */ |
1053 | 1.26M | p->buf++; |
1054 | 1.26M | p->sample_pos++; |
1055 | | |
1056 | | /* Have we filled the history buffer? */ |
1057 | 1.26M | if (p->buf == p->historybuffer + HISTORY_SIZE) { |
1058 | 1.50k | memmove(p->historybuffer, p->buf, |
1059 | 1.50k | PREDICTOR_SIZE * sizeof(*p->historybuffer)); |
1060 | 1.50k | p->buf = p->historybuffer; |
1061 | 1.50k | } |
1062 | 1.26M | } |
1063 | 66.3k | } |
1064 | | |
1065 | | static void predictor_decode_mono_3800(APEContext *ctx, int count) |
1066 | 3.60k | { |
1067 | 3.60k | APEPredictor *p = &ctx->predictor; |
1068 | 3.60k | int32_t *decoded0 = ctx->decoded[0]; |
1069 | 3.60k | int start = 4, shift = 10; |
1070 | | |
1071 | 3.60k | if (ctx->compression_level == COMPRESSION_LEVEL_HIGH) { |
1072 | 593 | start = 16; |
1073 | 593 | long_filter_high_3800(decoded0, 16, 9, count); |
1074 | 3.01k | } else if (ctx->compression_level == COMPRESSION_LEVEL_EXTRA_HIGH) { |
1075 | 1.85k | int order = 128, shift2 = 11; |
1076 | | |
1077 | 1.85k | if (ctx->fileversion >= 3830) { |
1078 | 1.42k | order <<= 1; |
1079 | 1.42k | shift++; |
1080 | 1.42k | shift2++; |
1081 | 1.42k | long_filter_ehigh_3830(decoded0 + order, count - order); |
1082 | 1.42k | } |
1083 | 1.85k | start = order; |
1084 | 1.85k | long_filter_high_3800(decoded0, order, shift2, count); |
1085 | 1.85k | } |
1086 | | |
1087 | 1.22M | while (count--) { |
1088 | 1.22M | if (ctx->compression_level == COMPRESSION_LEVEL_FAST) { |
1089 | 353k | *decoded0 = filter_fast_3320(p, *decoded0, 0, YDELAYA); |
1090 | 353k | decoded0++; |
1091 | 869k | } else { |
1092 | 869k | *decoded0 = filter_3800(p, *decoded0, 0, YDELAYA, YDELAYB, |
1093 | 869k | start, shift); |
1094 | 869k | decoded0++; |
1095 | 869k | } |
1096 | | |
1097 | | /* Combined */ |
1098 | 1.22M | p->buf++; |
1099 | 1.22M | p->sample_pos++; |
1100 | | |
1101 | | /* Have we filled the history buffer? */ |
1102 | 1.22M | if (p->buf == p->historybuffer + HISTORY_SIZE) { |
1103 | 1.82k | memmove(p->historybuffer, p->buf, |
1104 | 1.82k | PREDICTOR_SIZE * sizeof(*p->historybuffer)); |
1105 | 1.82k | p->buf = p->historybuffer; |
1106 | 1.82k | } |
1107 | 1.22M | } |
1108 | 3.60k | } |
1109 | | |
1110 | | static av_always_inline int predictor_update_3930(APEPredictor *p, |
1111 | | const int decoded, const int filter, |
1112 | | const int delayA) |
1113 | 364k | { |
1114 | 364k | int32_t predictionA, sign; |
1115 | 364k | uint32_t d0, d1, d2, d3; |
1116 | | |
1117 | 364k | p->buf[delayA] = p->lastA[filter]; |
1118 | 364k | d0 = p->buf[delayA ]; |
1119 | 364k | d1 = p->buf[delayA ] - (unsigned)p->buf[delayA - 1]; |
1120 | 364k | d2 = p->buf[delayA - 1] - (unsigned)p->buf[delayA - 2]; |
1121 | 364k | d3 = p->buf[delayA - 2] - (unsigned)p->buf[delayA - 3]; |
1122 | | |
1123 | 364k | predictionA = d0 * p->coeffsA[filter][0] + |
1124 | 364k | d1 * p->coeffsA[filter][1] + |
1125 | 364k | d2 * p->coeffsA[filter][2] + |
1126 | 364k | d3 * p->coeffsA[filter][3]; |
1127 | | |
1128 | 364k | p->lastA[filter] = decoded + (predictionA >> 9); |
1129 | 364k | p->filterA[filter] = p->lastA[filter] + ((int)(p->filterA[filter] * 31U) >> 5); |
1130 | | |
1131 | 364k | sign = APESIGN(decoded); |
1132 | 364k | p->coeffsA[filter][0] += (((int32_t)d0 < 0) * 2 - 1) * sign; |
1133 | 364k | p->coeffsA[filter][1] += (((int32_t)d1 < 0) * 2 - 1) * sign; |
1134 | 364k | p->coeffsA[filter][2] += (((int32_t)d2 < 0) * 2 - 1) * sign; |
1135 | 364k | p->coeffsA[filter][3] += (((int32_t)d3 < 0) * 2 - 1) * sign; |
1136 | | |
1137 | 364k | return p->filterA[filter]; |
1138 | 364k | } |
1139 | | |
1140 | | static void predictor_decode_stereo_3930(APEContext *ctx, int count) |
1141 | 1.07k | { |
1142 | 1.07k | APEPredictor *p = &ctx->predictor; |
1143 | 1.07k | int32_t *decoded0 = ctx->decoded[0]; |
1144 | 1.07k | int32_t *decoded1 = ctx->decoded[1]; |
1145 | | |
1146 | 1.07k | ape_apply_filters(ctx, ctx->decoded[0], ctx->decoded[1], count); |
1147 | | |
1148 | 124k | while (count--) { |
1149 | | /* Predictor Y */ |
1150 | 123k | int Y = *decoded1, X = *decoded0; |
1151 | 123k | *decoded0 = predictor_update_3930(p, Y, 0, YDELAYA); |
1152 | 123k | decoded0++; |
1153 | 123k | *decoded1 = predictor_update_3930(p, X, 1, XDELAYA); |
1154 | 123k | decoded1++; |
1155 | | |
1156 | | /* Combined */ |
1157 | 123k | p->buf++; |
1158 | | |
1159 | | /* Have we filled the history buffer? */ |
1160 | 123k | if (p->buf == p->historybuffer + HISTORY_SIZE) { |
1161 | 229 | memmove(p->historybuffer, p->buf, |
1162 | 229 | PREDICTOR_SIZE * sizeof(*p->historybuffer)); |
1163 | 229 | p->buf = p->historybuffer; |
1164 | 229 | } |
1165 | 123k | } |
1166 | 1.07k | } |
1167 | | |
1168 | | static void predictor_decode_mono_3930(APEContext *ctx, int count) |
1169 | 1.20k | { |
1170 | 1.20k | APEPredictor *p = &ctx->predictor; |
1171 | 1.20k | int32_t *decoded0 = ctx->decoded[0]; |
1172 | | |
1173 | 1.20k | ape_apply_filters(ctx, ctx->decoded[0], NULL, count); |
1174 | | |
1175 | 118k | while (count--) { |
1176 | 117k | *decoded0 = predictor_update_3930(p, *decoded0, 0, YDELAYA); |
1177 | 117k | decoded0++; |
1178 | | |
1179 | 117k | p->buf++; |
1180 | | |
1181 | | /* Have we filled the history buffer? */ |
1182 | 117k | if (p->buf == p->historybuffer + HISTORY_SIZE) { |
1183 | 218 | memmove(p->historybuffer, p->buf, |
1184 | 218 | PREDICTOR_SIZE * sizeof(*p->historybuffer)); |
1185 | 218 | p->buf = p->historybuffer; |
1186 | 218 | } |
1187 | 117k | } |
1188 | 1.20k | } |
1189 | | |
1190 | | static av_always_inline int predictor_update_filter(APEPredictor64 *p, |
1191 | | const int decoded, const int filter, |
1192 | | const int delayA, const int delayB, |
1193 | | const int adaptA, const int adaptB, |
1194 | | int interim_mode) |
1195 | 382k | { |
1196 | 382k | int64_t predictionA, predictionB; |
1197 | 382k | int32_t sign; |
1198 | | |
1199 | 382k | p->buf[delayA] = p->lastA[filter]; |
1200 | 382k | p->buf[adaptA] = APESIGN(p->buf[delayA]); |
1201 | 382k | p->buf[delayA - 1] = p->buf[delayA] - (uint64_t)p->buf[delayA - 1]; |
1202 | 382k | p->buf[adaptA - 1] = APESIGN(p->buf[delayA - 1]); |
1203 | | |
1204 | 382k | predictionA = p->buf[delayA ] * p->coeffsA[filter][0] + |
1205 | 382k | p->buf[delayA - 1] * p->coeffsA[filter][1] + |
1206 | 382k | p->buf[delayA - 2] * p->coeffsA[filter][2] + |
1207 | 382k | p->buf[delayA - 3] * p->coeffsA[filter][3]; |
1208 | | |
1209 | | /* Apply a scaled first-order filter compression */ |
1210 | 382k | p->buf[delayB] = p->filterA[filter ^ 1] - ((int64_t)(p->filterB[filter] * 31ULL) >> 5); |
1211 | 382k | p->buf[adaptB] = APESIGN(p->buf[delayB]); |
1212 | 382k | p->buf[delayB - 1] = p->buf[delayB] - (uint64_t)p->buf[delayB - 1]; |
1213 | 382k | p->buf[adaptB - 1] = APESIGN(p->buf[delayB - 1]); |
1214 | 382k | p->filterB[filter] = p->filterA[filter ^ 1]; |
1215 | | |
1216 | 382k | predictionB = p->buf[delayB ] * p->coeffsB[filter][0] + |
1217 | 382k | p->buf[delayB - 1] * p->coeffsB[filter][1] + |
1218 | 382k | p->buf[delayB - 2] * p->coeffsB[filter][2] + |
1219 | 382k | p->buf[delayB - 3] * p->coeffsB[filter][3] + |
1220 | 382k | p->buf[delayB - 4] * p->coeffsB[filter][4]; |
1221 | | |
1222 | 382k | if (interim_mode < 1) { |
1223 | 222k | predictionA = (int32_t)predictionA; |
1224 | 222k | predictionB = (int32_t)predictionB; |
1225 | 222k | p->lastA[filter] = (int32_t)(decoded + (unsigned)((int32_t)(predictionA + (predictionB >> 1)) >> 10)); |
1226 | 222k | } else { |
1227 | 160k | p->lastA[filter] = decoded + ((int64_t)((uint64_t)predictionA + (predictionB >> 1)) >> 10); |
1228 | 160k | } |
1229 | 382k | p->filterA[filter] = p->lastA[filter] + ((int64_t)(p->filterA[filter] * 31ULL) >> 5); |
1230 | | |
1231 | 382k | sign = APESIGN(decoded); |
1232 | 382k | p->coeffsA[filter][0] += p->buf[adaptA ] * sign; |
1233 | 382k | p->coeffsA[filter][1] += p->buf[adaptA - 1] * sign; |
1234 | 382k | p->coeffsA[filter][2] += p->buf[adaptA - 2] * sign; |
1235 | 382k | p->coeffsA[filter][3] += p->buf[adaptA - 3] * sign; |
1236 | 382k | p->coeffsB[filter][0] += p->buf[adaptB ] * sign; |
1237 | 382k | p->coeffsB[filter][1] += p->buf[adaptB - 1] * sign; |
1238 | 382k | p->coeffsB[filter][2] += p->buf[adaptB - 2] * sign; |
1239 | 382k | p->coeffsB[filter][3] += p->buf[adaptB - 3] * sign; |
1240 | 382k | p->coeffsB[filter][4] += p->buf[adaptB - 4] * sign; |
1241 | | |
1242 | 382k | return p->filterA[filter]; |
1243 | 382k | } |
1244 | | |
1245 | | static void predictor_decode_stereo_3950(APEContext *ctx, int count) |
1246 | 1.54k | { |
1247 | 1.54k | APEPredictor64 *p_default = &ctx->predictor64; |
1248 | 1.54k | APEPredictor64 p_interim; |
1249 | 1.54k | int lcount = count; |
1250 | 1.54k | int num_passes = 1; |
1251 | | |
1252 | 1.54k | ape_apply_filters(ctx, ctx->decoded[0], ctx->decoded[1], count); |
1253 | 1.54k | if (ctx->interim_mode == -1) { |
1254 | 744 | p_interim = *p_default; |
1255 | 744 | num_passes ++; |
1256 | 744 | memcpy(ctx->interim[0], ctx->decoded[0], sizeof(*ctx->interim[0])*count); |
1257 | 744 | memcpy(ctx->interim[1], ctx->decoded[1], sizeof(*ctx->interim[1])*count); |
1258 | 744 | } |
1259 | | |
1260 | 3.83k | for (int pass = 0; pass < num_passes; pass++) { |
1261 | 2.29k | int32_t *decoded0, *decoded1; |
1262 | 2.29k | int interim_mode = ctx->interim_mode > 0 || pass; |
1263 | 2.29k | APEPredictor64 *p; |
1264 | | |
1265 | 2.29k | if (pass) { |
1266 | 744 | p = &p_interim; |
1267 | 744 | decoded0 = ctx->interim[0]; |
1268 | 744 | decoded1 = ctx->interim[1]; |
1269 | 1.54k | } else { |
1270 | 1.54k | p = p_default; |
1271 | 1.54k | decoded0 = ctx->decoded[0]; |
1272 | 1.54k | decoded1 = ctx->decoded[1]; |
1273 | 1.54k | } |
1274 | 2.29k | p->buf = p->historybuffer; |
1275 | | |
1276 | 2.29k | count = lcount; |
1277 | 193k | while (count--) { |
1278 | | /* Predictor Y */ |
1279 | 191k | int32_t a0 = predictor_update_filter(p, *decoded0, 0, YDELAYA, YDELAYB, |
1280 | 191k | YADAPTCOEFFSA, YADAPTCOEFFSB, |
1281 | 191k | interim_mode); |
1282 | 191k | int32_t a1 = predictor_update_filter(p, *decoded1, 1, XDELAYA, XDELAYB, |
1283 | 191k | XADAPTCOEFFSA, XADAPTCOEFFSB, |
1284 | 191k | interim_mode); |
1285 | 191k | *decoded0++ = a0; |
1286 | 191k | *decoded1++ = a1; |
1287 | 191k | if (num_passes > 1) { |
1288 | 160k | int32_t left = a1 - (unsigned)(a0 / 2); |
1289 | 160k | int32_t right = left + (unsigned)a0; |
1290 | | |
1291 | 160k | if (FFMIN(FFNABS(left), FFNABS(right)) < -(1<<23)) { |
1292 | 122 | ctx->interim_mode = !interim_mode; |
1293 | 122 | av_log(ctx->avctx, AV_LOG_VERBOSE, "Interim mode: %d\n", ctx->interim_mode); |
1294 | 122 | break; |
1295 | 122 | } |
1296 | 160k | } |
1297 | | |
1298 | | /* Combined */ |
1299 | 191k | p->buf++; |
1300 | | |
1301 | | /* Have we filled the history buffer? */ |
1302 | 191k | if (p->buf == p->historybuffer + HISTORY_SIZE) { |
1303 | 332 | memmove(p->historybuffer, p->buf, |
1304 | 332 | PREDICTOR_SIZE * sizeof(*p->historybuffer)); |
1305 | 332 | p->buf = p->historybuffer; |
1306 | 332 | } |
1307 | 191k | } |
1308 | 2.29k | } |
1309 | 1.54k | if (num_passes > 1 && ctx->interim_mode > 0) { |
1310 | 7 | memcpy(ctx->decoded[0], ctx->interim[0], sizeof(*ctx->interim[0])*lcount); |
1311 | 7 | memcpy(ctx->decoded[1], ctx->interim[1], sizeof(*ctx->interim[1])*lcount); |
1312 | 7 | *p_default = p_interim; |
1313 | 7 | p_default->buf = p_default->historybuffer; |
1314 | 7 | } |
1315 | 1.54k | } |
1316 | | |
1317 | | static void predictor_decode_mono_3950(APEContext *ctx, int count) |
1318 | 675 | { |
1319 | 675 | APEPredictor64 *p = &ctx->predictor64; |
1320 | 675 | int32_t *decoded0 = ctx->decoded[0]; |
1321 | 675 | int32_t predictionA, currentA, A, sign; |
1322 | | |
1323 | 675 | ape_apply_filters(ctx, ctx->decoded[0], NULL, count); |
1324 | | |
1325 | 675 | currentA = p->lastA[0]; |
1326 | | |
1327 | 152k | while (count--) { |
1328 | 151k | A = *decoded0; |
1329 | | |
1330 | 151k | p->buf[YDELAYA] = currentA; |
1331 | 151k | p->buf[YDELAYA - 1] = p->buf[YDELAYA] - (uint64_t)p->buf[YDELAYA - 1]; |
1332 | | |
1333 | 151k | predictionA = p->buf[YDELAYA ] * p->coeffsA[0][0] + |
1334 | 151k | p->buf[YDELAYA - 1] * p->coeffsA[0][1] + |
1335 | 151k | p->buf[YDELAYA - 2] * p->coeffsA[0][2] + |
1336 | 151k | p->buf[YDELAYA - 3] * p->coeffsA[0][3]; |
1337 | | |
1338 | 151k | currentA = A + (uint64_t)(predictionA >> 10); |
1339 | | |
1340 | 151k | p->buf[YADAPTCOEFFSA] = APESIGN(p->buf[YDELAYA ]); |
1341 | 151k | p->buf[YADAPTCOEFFSA - 1] = APESIGN(p->buf[YDELAYA - 1]); |
1342 | | |
1343 | 151k | sign = APESIGN(A); |
1344 | 151k | p->coeffsA[0][0] += p->buf[YADAPTCOEFFSA ] * sign; |
1345 | 151k | p->coeffsA[0][1] += p->buf[YADAPTCOEFFSA - 1] * sign; |
1346 | 151k | p->coeffsA[0][2] += p->buf[YADAPTCOEFFSA - 2] * sign; |
1347 | 151k | p->coeffsA[0][3] += p->buf[YADAPTCOEFFSA - 3] * sign; |
1348 | | |
1349 | 151k | p->buf++; |
1350 | | |
1351 | | /* Have we filled the history buffer? */ |
1352 | 151k | if (p->buf == p->historybuffer + HISTORY_SIZE) { |
1353 | 284 | memmove(p->historybuffer, p->buf, |
1354 | 284 | PREDICTOR_SIZE * sizeof(*p->historybuffer)); |
1355 | 284 | p->buf = p->historybuffer; |
1356 | 284 | } |
1357 | | |
1358 | 151k | p->filterA[0] = currentA + (uint64_t)((int64_t)(p->filterA[0] * 31U) >> 5); |
1359 | 151k | *(decoded0++) = p->filterA[0]; |
1360 | 151k | } |
1361 | | |
1362 | 675 | p->lastA[0] = currentA; |
1363 | 675 | } |
1364 | | |
1365 | | static void do_init_filter(APEFilter *f, int16_t *buf, int order) |
1366 | 273k | { |
1367 | 273k | f->coeffs = buf; |
1368 | 273k | f->historybuffer = buf + order; |
1369 | 273k | f->delay = f->historybuffer + order * 2; |
1370 | 273k | f->adaptcoeffs = f->historybuffer + order; |
1371 | | |
1372 | 273k | memset(f->historybuffer, 0, (order * 2) * sizeof(*f->historybuffer)); |
1373 | 273k | memset(f->coeffs, 0, order * sizeof(*f->coeffs)); |
1374 | 273k | f->avg = 0; |
1375 | 273k | } |
1376 | | |
1377 | | static void init_filter(APEContext *ctx, APEFilter *f, int16_t *buf, int order) |
1378 | 136k | { |
1379 | 136k | do_init_filter(&f[0], buf, order); |
1380 | 136k | do_init_filter(&f[1], buf + order * 3 + HISTORY_SIZE, order); |
1381 | 136k | } |
1382 | | |
1383 | | static void do_apply_filter(APEContext *ctx, int version, APEFilter *f, |
1384 | | int32_t *data, int count, int order, int fracbits) |
1385 | 13.7k | { |
1386 | 13.7k | int res; |
1387 | 13.7k | unsigned absres; |
1388 | | |
1389 | 827k | while (count--) { |
1390 | | /* round fixedpoint scalar product */ |
1391 | 813k | res = ctx->adsp.scalarproduct_and_madd_int16(f->coeffs, |
1392 | 813k | f->delay - order, |
1393 | 813k | f->adaptcoeffs - order, |
1394 | 813k | order, APESIGN(*data)); |
1395 | 813k | res = (int64_t)(res + (1LL << (fracbits - 1))) >> fracbits; |
1396 | 813k | res += (unsigned)*data; |
1397 | 813k | *data++ = res; |
1398 | | |
1399 | | /* Update the output history */ |
1400 | 813k | *f->delay++ = av_clip_int16(res); |
1401 | | |
1402 | 813k | if (version < 3980) { |
1403 | | /* Version ??? to < 3.98 files (untested) */ |
1404 | 43.6k | f->adaptcoeffs[0] = (res == 0) ? 0 : ((res >> 28) & 8) - 4; |
1405 | 43.6k | f->adaptcoeffs[-4] >>= 1; |
1406 | 43.6k | f->adaptcoeffs[-8] >>= 1; |
1407 | 769k | } else { |
1408 | | /* Version 3.98 and later files */ |
1409 | | |
1410 | | /* Update the adaption coefficients */ |
1411 | 769k | absres = FFABSU(res); |
1412 | 769k | if (absres) |
1413 | 257k | *f->adaptcoeffs = APESIGN(res) * |
1414 | 257k | (8 << ((absres > f->avg * 3LL) + (absres > (f->avg + f->avg / 3)))); |
1415 | | /* equivalent to the following code |
1416 | | if (absres <= f->avg * 4 / 3) |
1417 | | *f->adaptcoeffs = APESIGN(res) * 8; |
1418 | | else if (absres <= f->avg * 3) |
1419 | | *f->adaptcoeffs = APESIGN(res) * 16; |
1420 | | else |
1421 | | *f->adaptcoeffs = APESIGN(res) * 32; |
1422 | | */ |
1423 | 512k | else |
1424 | 512k | *f->adaptcoeffs = 0; |
1425 | | |
1426 | 769k | f->avg += (int)(absres - (unsigned)f->avg) / 16; |
1427 | | |
1428 | 769k | f->adaptcoeffs[-1] >>= 1; |
1429 | 769k | f->adaptcoeffs[-2] >>= 1; |
1430 | 769k | f->adaptcoeffs[-8] >>= 1; |
1431 | 769k | } |
1432 | | |
1433 | 813k | f->adaptcoeffs++; |
1434 | | |
1435 | | /* Have we filled the history buffer? */ |
1436 | 813k | if (f->delay == f->historybuffer + HISTORY_SIZE + (order * 2)) { |
1437 | 1.41k | memmove(f->historybuffer, f->delay - (order * 2), |
1438 | 1.41k | (order * 2) * sizeof(*f->historybuffer)); |
1439 | 1.41k | f->delay = f->historybuffer + order * 2; |
1440 | 1.41k | f->adaptcoeffs = f->historybuffer + order; |
1441 | 1.41k | } |
1442 | 813k | } |
1443 | 13.7k | } |
1444 | | |
1445 | | static void apply_filter(APEContext *ctx, APEFilter *f, |
1446 | | int32_t *data0, int32_t *data1, |
1447 | | int count, int order, int fracbits) |
1448 | 8.52k | { |
1449 | 8.52k | do_apply_filter(ctx, ctx->fileversion, &f[0], data0, count, order, fracbits); |
1450 | 8.52k | if (data1) |
1451 | 5.22k | do_apply_filter(ctx, ctx->fileversion, &f[1], data1, count, order, fracbits); |
1452 | 8.52k | } |
1453 | | |
1454 | | static void ape_apply_filters(APEContext *ctx, int32_t *decoded0, |
1455 | | int32_t *decoded1, int count) |
1456 | 4.50k | { |
1457 | 4.50k | int i; |
1458 | | |
1459 | 13.0k | for (i = 0; i < APE_FILTER_LEVELS; i++) { |
1460 | 11.1k | if (!ape_filter_orders[ctx->fset][i]) |
1461 | 2.64k | break; |
1462 | 8.52k | apply_filter(ctx, ctx->filters[i], decoded0, decoded1, count, |
1463 | 8.52k | ape_filter_orders[ctx->fset][i], |
1464 | 8.52k | ape_filter_fracbits[ctx->fset][i]); |
1465 | 8.52k | } |
1466 | 4.50k | } |
1467 | | |
1468 | | static int init_frame_decoder(APEContext *ctx) |
1469 | 97.9k | { |
1470 | 97.9k | int i, ret; |
1471 | 97.9k | if ((ret = init_entropy_decoder(ctx)) < 0) |
1472 | 1.55k | return ret; |
1473 | 96.4k | init_predictor_decoder(ctx); |
1474 | | |
1475 | 232k | for (i = 0; i < APE_FILTER_LEVELS; i++) { |
1476 | 230k | if (!ape_filter_orders[ctx->fset][i]) |
1477 | 93.8k | break; |
1478 | 136k | init_filter(ctx, ctx->filters[i], ctx->filterbuf[i], |
1479 | 136k | ape_filter_orders[ctx->fset][i]); |
1480 | 136k | } |
1481 | 96.4k | return 0; |
1482 | 97.9k | } |
1483 | | |
1484 | | static void ape_unpack_mono(APEContext *ctx, int count) |
1485 | 20.5k | { |
1486 | 20.5k | if (ctx->frameflags & APE_FRAMECODE_STEREO_SILENCE) { |
1487 | | /* We are pure silence, so we're done. */ |
1488 | 1.44k | av_log(ctx->avctx, AV_LOG_DEBUG, "pure silence mono\n"); |
1489 | 1.44k | return; |
1490 | 1.44k | } |
1491 | | |
1492 | 19.1k | ctx->entropy_decode_mono(ctx, count); |
1493 | 19.1k | if (ctx->error) |
1494 | 13.6k | return; |
1495 | | |
1496 | | /* Now apply the predictor decoding */ |
1497 | 5.49k | ctx->predictor_decode_mono(ctx, count); |
1498 | | |
1499 | | /* Pseudo-stereo - just copy left channel to right channel */ |
1500 | 5.49k | if (ctx->channels == 2) { |
1501 | 211 | memcpy(ctx->decoded[1], ctx->decoded[0], count * sizeof(*ctx->decoded[1])); |
1502 | 211 | } |
1503 | 5.49k | } |
1504 | | |
1505 | | static void ape_unpack_stereo(APEContext *ctx, int count) |
1506 | 75.1k | { |
1507 | 75.1k | unsigned left, right; |
1508 | 75.1k | int32_t *decoded0 = ctx->decoded[0]; |
1509 | 75.1k | int32_t *decoded1 = ctx->decoded[1]; |
1510 | | |
1511 | 75.1k | if ((ctx->frameflags & APE_FRAMECODE_STEREO_SILENCE) == APE_FRAMECODE_STEREO_SILENCE) { |
1512 | | /* We are pure silence, so we're done. */ |
1513 | 463 | av_log(ctx->avctx, AV_LOG_DEBUG, "pure silence stereo\n"); |
1514 | 463 | return; |
1515 | 463 | } |
1516 | | |
1517 | 74.6k | ctx->entropy_decode_stereo(ctx, count); |
1518 | 74.6k | if (ctx->error) |
1519 | 5.68k | return; |
1520 | | |
1521 | | /* Now apply the predictor decoding */ |
1522 | 68.9k | ctx->predictor_decode_stereo(ctx, count); |
1523 | | |
1524 | | /* Decorrelate and scale to output depth */ |
1525 | 1.56M | while (count--) { |
1526 | 1.49M | left = *decoded1 - (unsigned)(*decoded0 / 2); |
1527 | 1.49M | right = left + *decoded0; |
1528 | | |
1529 | 1.49M | *(decoded0++) = left; |
1530 | 1.49M | *(decoded1++) = right; |
1531 | 1.49M | } |
1532 | 68.9k | } |
1533 | | |
1534 | | static int ape_decode_frame(AVCodecContext *avctx, AVFrame *frame, |
1535 | | int *got_frame_ptr, AVPacket *avpkt) |
1536 | 135k | { |
1537 | 135k | const uint8_t *buf = avpkt->data; |
1538 | 135k | APEContext *s = avctx->priv_data; |
1539 | 135k | uint8_t *sample8; |
1540 | 135k | int16_t *sample16; |
1541 | 135k | int32_t *sample24; |
1542 | 135k | int i, ch, ret; |
1543 | 135k | int blockstodecode; |
1544 | 135k | uint64_t decoded_buffer_size; |
1545 | | |
1546 | | /* this should never be negative, but bad things will happen if it is, so |
1547 | | check it just to make sure. */ |
1548 | 135k | av_assert0(s->samples >= 0); |
1549 | | |
1550 | 135k | if(!s->samples){ |
1551 | 135k | uint32_t nblocks, offset; |
1552 | 135k | int buf_size; |
1553 | | |
1554 | 135k | if (!avpkt->size) { |
1555 | 1.22k | *got_frame_ptr = 0; |
1556 | 1.22k | return 0; |
1557 | 1.22k | } |
1558 | 133k | if (avpkt->size < 8) { |
1559 | 27.2k | av_log(avctx, AV_LOG_ERROR, "Packet is too small\n"); |
1560 | 27.2k | return AVERROR_INVALIDDATA; |
1561 | 27.2k | } |
1562 | 106k | buf_size = avpkt->size & ~3; |
1563 | 106k | if (buf_size != avpkt->size) { |
1564 | 64.5k | av_log(avctx, AV_LOG_WARNING, "packet size is not a multiple of 4. " |
1565 | 64.5k | "extra bytes at the end will be skipped.\n"); |
1566 | 64.5k | } |
1567 | 106k | if (s->fileversion < 3950) // previous versions overread two bytes |
1568 | 97.7k | buf_size += 2; |
1569 | 106k | av_fast_padded_malloc(&s->data, &s->data_size, buf_size); |
1570 | 106k | if (!s->data) |
1571 | 0 | return AVERROR(ENOMEM); |
1572 | 106k | s->bdsp.bswap_buf((uint32_t *) s->data, (const uint32_t *) buf, |
1573 | 106k | buf_size >> 2); |
1574 | 106k | memset(s->data + (buf_size & ~3), 0, buf_size & 3); |
1575 | 106k | s->ptr = s->data; |
1576 | 106k | s->data_end = s->data + buf_size; |
1577 | | |
1578 | 106k | nblocks = bytestream_get_be32(&s->ptr); |
1579 | 106k | offset = bytestream_get_be32(&s->ptr); |
1580 | 106k | if (s->fileversion >= 3900) { |
1581 | 16.0k | if (offset > 3) { |
1582 | 6.25k | av_log(avctx, AV_LOG_ERROR, "Incorrect offset passed\n"); |
1583 | 6.25k | av_freep(&s->data); |
1584 | 6.25k | s->data_size = 0; |
1585 | 6.25k | return AVERROR_INVALIDDATA; |
1586 | 6.25k | } |
1587 | 9.83k | if (s->data_end - s->ptr < offset) { |
1588 | 206 | av_log(avctx, AV_LOG_ERROR, "Packet is too small\n"); |
1589 | 206 | return AVERROR_INVALIDDATA; |
1590 | 206 | } |
1591 | 9.62k | s->ptr += offset; |
1592 | 90.4k | } else { |
1593 | 90.4k | if ((ret = init_get_bits8(&s->gb, s->ptr, s->data_end - s->ptr)) < 0) |
1594 | 0 | return ret; |
1595 | 90.4k | if (s->fileversion > 3800) |
1596 | 27.8k | skip_bits_long(&s->gb, offset * 8); |
1597 | 62.6k | else |
1598 | 62.6k | skip_bits_long(&s->gb, offset); |
1599 | 90.4k | } |
1600 | | |
1601 | 100k | if (!nblocks || nblocks > INT_MAX / 2 / sizeof(*s->decoded_buffer) - 8) { |
1602 | 2.11k | av_log(avctx, AV_LOG_ERROR, "Invalid sample count: %"PRIu32".\n", |
1603 | 2.11k | nblocks); |
1604 | 2.11k | return AVERROR_INVALIDDATA; |
1605 | 2.11k | } |
1606 | | |
1607 | | /* Initialize the frame decoder */ |
1608 | 97.9k | if (init_frame_decoder(s) < 0) { |
1609 | 1.55k | av_log(avctx, AV_LOG_ERROR, "Error reading frame header\n"); |
1610 | 1.55k | return AVERROR_INVALIDDATA; |
1611 | 1.55k | } |
1612 | 96.4k | s->samples = nblocks; |
1613 | 96.4k | } |
1614 | | |
1615 | 97.3k | if (!s->data) { |
1616 | 0 | *got_frame_ptr = 0; |
1617 | 0 | return avpkt->size; |
1618 | 0 | } |
1619 | | |
1620 | 97.3k | blockstodecode = FFMIN(s->blocks_per_loop, s->samples); |
1621 | | // for old files coefficients were not interleaved, |
1622 | | // so we need to decode all of them at once |
1623 | 97.3k | if (s->fileversion < 3930) |
1624 | 89.6k | blockstodecode = s->samples; |
1625 | | |
1626 | | /* reallocate decoded sample buffer if needed */ |
1627 | 97.3k | decoded_buffer_size = 2LL * FFALIGN(blockstodecode, 8) * sizeof(*s->decoded_buffer); |
1628 | 97.3k | av_assert0(decoded_buffer_size <= INT_MAX); |
1629 | | |
1630 | | /* get output buffer */ |
1631 | 97.3k | frame->nb_samples = blockstodecode; |
1632 | 97.3k | if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) { |
1633 | 1.63k | s->samples=0; |
1634 | 1.63k | return ret; |
1635 | 1.63k | } |
1636 | | |
1637 | 95.7k | av_fast_malloc(&s->decoded_buffer, &s->decoded_size, decoded_buffer_size); |
1638 | 95.7k | if (!s->decoded_buffer) |
1639 | 0 | return AVERROR(ENOMEM); |
1640 | 95.7k | memset(s->decoded_buffer, 0, decoded_buffer_size); |
1641 | 95.7k | s->decoded[0] = s->decoded_buffer; |
1642 | 95.7k | s->decoded[1] = s->decoded_buffer + FFALIGN(blockstodecode, 8); |
1643 | | |
1644 | 95.7k | if (s->interim_mode < 0) { |
1645 | 76.7k | av_fast_malloc(&s->interim_buffer, &s->interim_size, decoded_buffer_size); |
1646 | 76.7k | if (!s->interim_buffer) |
1647 | 0 | return AVERROR(ENOMEM); |
1648 | 76.7k | memset(s->interim_buffer, 0, decoded_buffer_size); |
1649 | 76.7k | s->interim[0] = s->interim_buffer; |
1650 | 76.7k | s->interim[1] = s->interim_buffer + FFALIGN(blockstodecode, 8); |
1651 | 76.7k | } else { |
1652 | 18.9k | av_freep(&s->interim_buffer); |
1653 | 18.9k | s->interim_size = 0; |
1654 | 18.9k | memset(s->interim, 0, sizeof(s->interim)); |
1655 | 18.9k | } |
1656 | | |
1657 | 95.7k | s->error=0; |
1658 | | |
1659 | 95.7k | if ((s->channels == 1) || (s->frameflags & APE_FRAMECODE_PSEUDO_STEREO)) |
1660 | 20.5k | ape_unpack_mono(s, blockstodecode); |
1661 | 75.1k | else |
1662 | 75.1k | ape_unpack_stereo(s, blockstodecode); |
1663 | | |
1664 | 95.7k | if (s->error) { |
1665 | 19.3k | s->samples=0; |
1666 | 19.3k | av_log(avctx, AV_LOG_ERROR, "Error decoding frame\n"); |
1667 | 19.3k | return AVERROR_INVALIDDATA; |
1668 | 19.3k | } |
1669 | | |
1670 | 76.3k | switch (s->bps) { |
1671 | 11.2k | case 8: |
1672 | 32.2k | for (ch = 0; ch < s->channels; ch++) { |
1673 | 21.0k | sample8 = (uint8_t *)frame->data[ch]; |
1674 | 1.83M | for (i = 0; i < blockstodecode; i++) |
1675 | 1.81M | *sample8++ = (s->decoded[ch][i] + 0x80U) & 0xff; |
1676 | 21.0k | } |
1677 | 11.2k | break; |
1678 | 3.23k | case 16: |
1679 | 8.68k | for (ch = 0; ch < s->channels; ch++) { |
1680 | 5.44k | sample16 = (int16_t *)frame->data[ch]; |
1681 | 6.43M | for (i = 0; i < blockstodecode; i++) |
1682 | 6.42M | *sample16++ = s->decoded[ch][i]; |
1683 | 5.44k | } |
1684 | 3.23k | break; |
1685 | 61.9k | case 24: |
1686 | 182k | for (ch = 0; ch < s->channels; ch++) { |
1687 | 120k | sample24 = (int32_t *)frame->data[ch]; |
1688 | 4.54M | for (i = 0; i < blockstodecode; i++) |
1689 | 4.42M | *sample24++ = s->decoded[ch][i] * 256U; |
1690 | 120k | } |
1691 | 61.9k | break; |
1692 | 76.3k | } |
1693 | | |
1694 | 76.3k | s->samples -= blockstodecode; |
1695 | | |
1696 | 76.3k | if (avctx->err_recognition & AV_EF_CRCCHECK && |
1697 | 14.8k | s->fileversion >= 3900) { |
1698 | 4.74k | uint32_t crc = s->CRC_state; |
1699 | 4.74k | const AVCRC *crc_tab = av_crc_get_table(AV_CRC_32_IEEE_LE); |
1700 | 4.74k | int stride = s->bps == 24 ? 4 : (s->bps>>3); |
1701 | 4.74k | int offset = s->bps == 24; |
1702 | 4.74k | int bytes = s->bps >> 3; |
1703 | | |
1704 | 4.35M | for (i = 0; i < blockstodecode; i++) { |
1705 | 11.9M | for (ch = 0; ch < s->channels; ch++) { |
1706 | | #if HAVE_BIGENDIAN |
1707 | | uint8_t *smp_native = frame->data[ch] + i*stride; |
1708 | | uint8_t smp[4]; |
1709 | | for(int j = 0; j<stride; j++) |
1710 | | smp[j] = smp_native[stride-j-1]; |
1711 | | #else |
1712 | 7.62M | uint8_t *smp = frame->data[ch] + i*stride; |
1713 | 7.62M | #endif |
1714 | 7.62M | crc = av_crc(crc_tab, crc, smp+offset, bytes); |
1715 | 7.62M | } |
1716 | 4.34M | } |
1717 | | |
1718 | 4.74k | if (!s->samples && (~crc >> 1) ^ s->CRC) { |
1719 | 3.62k | av_log(avctx, AV_LOG_ERROR, "CRC mismatch! Previously decoded " |
1720 | 3.62k | "frames may have been affected as well.\n"); |
1721 | 3.62k | if (avctx->err_recognition & AV_EF_EXPLODE) |
1722 | 1.24k | return AVERROR_INVALIDDATA; |
1723 | 3.62k | } |
1724 | | |
1725 | 3.49k | s->CRC_state = crc; |
1726 | 3.49k | } |
1727 | | |
1728 | 75.1k | *got_frame_ptr = 1; |
1729 | | |
1730 | 75.1k | return !s->samples ? avpkt->size : 0; |
1731 | 76.3k | } |
1732 | | |
1733 | | static av_cold void ape_flush(AVCodecContext *avctx) |
1734 | 78.4k | { |
1735 | 78.4k | APEContext *s = avctx->priv_data; |
1736 | 78.4k | s->samples= 0; |
1737 | 78.4k | } |
1738 | | |
1739 | | #define OFFSET(x) offsetof(APEContext, x) |
1740 | | #define PAR (AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_AUDIO_PARAM) |
1741 | | static const AVOption options[] = { |
1742 | | { "max_samples", "maximum number of samples decoded per call", OFFSET(blocks_per_loop), AV_OPT_TYPE_INT, { .i64 = 4608 }, 1, INT_MAX, PAR, .unit = "max_samples" }, |
1743 | | { "all", "no maximum. decode all samples for each packet at once", 0, AV_OPT_TYPE_CONST, { .i64 = INT_MAX }, INT_MIN, INT_MAX, PAR, .unit = "max_samples" }, |
1744 | | { NULL}, |
1745 | | }; |
1746 | | |
1747 | | static const AVClass ape_decoder_class = { |
1748 | | .class_name = "APE decoder", |
1749 | | .item_name = av_default_item_name, |
1750 | | .option = options, |
1751 | | .version = LIBAVUTIL_VERSION_INT, |
1752 | | }; |
1753 | | |
1754 | | const FFCodec ff_ape_decoder = { |
1755 | | .p.name = "ape", |
1756 | | CODEC_LONG_NAME("Monkey's Audio"), |
1757 | | .p.type = AVMEDIA_TYPE_AUDIO, |
1758 | | .p.id = AV_CODEC_ID_APE, |
1759 | | .priv_data_size = sizeof(APEContext), |
1760 | | .init = ape_decode_init, |
1761 | | .close = ape_decode_close, |
1762 | | FF_CODEC_DECODE_CB(ape_decode_frame), |
1763 | | .p.capabilities = AV_CODEC_CAP_DELAY | |
1764 | | AV_CODEC_CAP_DR1, |
1765 | | .caps_internal = FF_CODEC_CAP_INIT_CLEANUP, |
1766 | | .flush = ape_flush, |
1767 | | .p.priv_class = &ape_decoder_class, |
1768 | | }; |