/src/libsndfile/src/ALAC/ag_dec.c
Line | Count | Source |
1 | | /* |
2 | | * Copyright (c) 2011 Apple Inc. All rights reserved. |
3 | | * |
4 | | * @APPLE_APACHE_LICENSE_HEADER_START@ |
5 | | * |
6 | | * Licensed under the Apache License, Version 2.0 (the "License") ; |
7 | | * you may not use this file except in compliance with the License. |
8 | | * You may obtain a copy of the License at |
9 | | * |
10 | | * http://www.apache.org/licenses/LICENSE-2.0 |
11 | | * |
12 | | * Unless required by applicable law or agreed to in writing, software |
13 | | * distributed under the License is distributed on an "AS IS" BASIS, |
14 | | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
15 | | * See the License for the specific language governing permissions and |
16 | | * limitations under the License. |
17 | | * |
18 | | * @APPLE_APACHE_LICENSE_HEADER_END@ |
19 | | */ |
20 | | |
21 | | /* |
22 | | File: ag_dec.c |
23 | | |
24 | | Contains: Adaptive Golomb decode routines. |
25 | | |
26 | | Copyright: (c) 2001-2011 Apple, Inc. |
27 | | */ |
28 | | |
29 | | #include "config.h" |
30 | | |
31 | | #include <math.h> |
32 | | #include <stdio.h> |
33 | | #include <stdlib.h> |
34 | | #include <string.h> |
35 | | |
36 | | #include "aglib.h" |
37 | | #include "ALACBitUtilities.h" |
38 | | #include "ALACAudioTypes.h" |
39 | | |
40 | | #define CODE_TO_LONG_MAXBITS 32 |
41 | 1.91M | #define N_MAX_MEAN_CLAMP 0xffff |
42 | 106k | #define N_MEAN_CLAMP_VAL 0xffff |
43 | | #define REPORT_VAL 40 |
44 | | |
45 | | #if __GNUC__ |
46 | | #define ALWAYS_INLINE __attribute__ ((always_inline)) |
47 | | #elif defined _MSC_VER |
48 | | #define ALWAYS_INLINE __forceinline |
49 | | #else |
50 | | #define ALWAYS_INLINE |
51 | | #endif |
52 | | |
53 | | /* And on the subject of the CodeWarrior x86 compiler and inlining, I reworked a lot of this |
54 | | to help the compiler out. In many cases this required manual inlining or a macro. Sorry |
55 | | if it is ugly but the performance gains are well worth it. |
56 | | - WSK 5/19/04 |
57 | | */ |
58 | | |
59 | | void set_standard_ag_params (AGParamRecPtr params, uint32_t fullwidth, uint32_t sectorwidth) |
60 | 0 | { |
61 | | /* Use |
62 | | fullwidth = sectorwidth = numOfSamples, for analog 1-dimensional type-short data, |
63 | | but use |
64 | | fullwidth = full image width, sectorwidth = sector (patch) width |
65 | | for such as image (2-dim.) data. |
66 | | */ |
67 | 0 | set_ag_params (params, MB0, PB0, KB0, fullwidth, sectorwidth, MAX_RUN_DEFAULT) ; |
68 | 0 | } |
69 | | |
70 | | void set_ag_params (AGParamRecPtr params, uint32_t m, uint32_t p, uint32_t k, uint32_t f, uint32_t s, uint32_t maxrun) |
71 | 21.5k | { |
72 | 21.5k | params->mb = params->mb0 = m ; |
73 | 21.5k | params->pb = p ; |
74 | 21.5k | params->kb = k ; |
75 | 21.5k | params->wb = (1u << params->kb) - 1 ; |
76 | 21.5k | params->qb = QB-params->pb ; |
77 | 21.5k | params->fw = f ; |
78 | 21.5k | params->sw = s ; |
79 | 21.5k | params->maxrun = maxrun ; |
80 | 21.5k | } |
81 | | |
82 | | #if PRAGMA_MARK |
83 | | #pragma mark - |
84 | | #endif |
85 | | |
86 | | |
87 | | // note: implementing this with some kind of "count leading zeros" assembly is a big performance win |
88 | | static inline int32_t lead (int32_t m) |
89 | 3.89M | { |
90 | 3.89M | long j ; |
91 | 3.89M | unsigned long c = (1ul << 31) ; |
92 | | |
93 | 52.9M | for (j = 0 ; j < 32 ; j++) |
94 | 52.9M | { |
95 | 52.9M | if ((c & m) != 0) |
96 | 3.87M | break ; |
97 | 49.0M | c >>= 1 ; |
98 | 49.0M | } |
99 | 3.89M | return j ; |
100 | 3.89M | } |
101 | | |
102 | 1.91M | #define arithmin(a, b) ((a) < (b) ? (a) : (b)) |
103 | | |
104 | | static inline int32_t ALWAYS_INLINE lg3a (int32_t x) |
105 | 1.91M | { |
106 | 1.91M | int32_t result ; |
107 | | |
108 | 1.91M | x += 3 ; |
109 | 1.91M | result = lead (x) ; |
110 | | |
111 | 1.91M | return 31 - result ; |
112 | 1.91M | } |
113 | | |
114 | | static inline uint32_t ALWAYS_INLINE read32bit (uint8_t * buffer) |
115 | 1.95M | { |
116 | | // embedded CPUs typically can't read unaligned 32-bit words so just read the bytes |
117 | 1.95M | uint32_t value ; |
118 | | |
119 | 1.95M | value = ((uint32_t) buffer [0] << 24) | ((uint32_t) buffer [1] << 16) | |
120 | 1.95M | ((uint32_t) buffer [2] << 8) | (uint32_t) buffer [3] ; |
121 | 1.95M | return value ; |
122 | | |
123 | 1.95M | } |
124 | | |
125 | | #if PRAGMA_MARK |
126 | | #pragma mark - |
127 | | #endif |
128 | | |
129 | 1.92M | #define get_next_fromlong(inlong, suff) ((inlong) >> (32 - (suff))) |
130 | | |
131 | | |
132 | | static inline uint32_t ALWAYS_INLINE |
133 | | getstreambits (uint8_t *in, int32_t bitoffset, int32_t numbits) |
134 | 2.58k | { |
135 | 2.58k | uint32_t load1, load2 ; |
136 | 2.58k | uint32_t byteoffset = bitoffset / 8 ; |
137 | 2.58k | uint32_t result ; |
138 | | |
139 | | //Assert (numbits <= 32) ; |
140 | | |
141 | 2.58k | load1 = read32bit (in + byteoffset) ; |
142 | | |
143 | 2.58k | if ((numbits + (bitoffset & 0x7)) > 32) |
144 | 383 | { |
145 | 383 | int32_t load2shift ; |
146 | | |
147 | 383 | result = load1 << (bitoffset & 0x7) ; |
148 | 383 | load2 = (uint32_t) in [byteoffset + 4] ; |
149 | 383 | load2shift = (8 - (numbits + (bitoffset & 0x7) - 32)) ; |
150 | 383 | load2 >>= load2shift ; |
151 | 383 | result >>= (32 - numbits) ; |
152 | 383 | result |= load2 ; |
153 | 383 | } |
154 | 2.20k | else |
155 | 2.20k | { |
156 | 2.20k | result = load1 >> (32 - numbits - (bitoffset & 7)) ; |
157 | 2.20k | } |
158 | | |
159 | | // a shift of >= "the number of bits in the type of the value being shifted" results in undefined |
160 | | // behavior so don't try to shift by 32 |
161 | 2.58k | if (numbits != (sizeof (result) * 8)) |
162 | 2.08k | result &= ~ (0xfffffffful << numbits) ; |
163 | | |
164 | 2.58k | return result ; |
165 | 2.58k | } |
166 | | |
167 | | |
168 | | static inline int32_t dyn_get (unsigned char *in, uint32_t *bitPos, uint32_t m, uint32_t k) |
169 | 38.4k | { |
170 | 38.4k | uint32_t tempbits = *bitPos ; |
171 | 38.4k | uint32_t result ; |
172 | 38.4k | uint32_t pre = 0, v ; |
173 | 38.4k | uint32_t streamlong ; |
174 | | |
175 | 38.4k | streamlong = read32bit (in + (tempbits >> 3)) ; |
176 | 38.4k | streamlong <<= (tempbits & 7) ; |
177 | | |
178 | | /* find the number of bits in the prefix */ |
179 | 38.4k | { |
180 | 38.4k | uint32_t notI = ~streamlong ; |
181 | 38.4k | pre = lead (notI) ; |
182 | 38.4k | } |
183 | | |
184 | 38.4k | if (pre >= MAX_PREFIX_16) |
185 | 246 | { |
186 | 246 | pre = MAX_PREFIX_16 ; |
187 | 246 | tempbits += pre ; |
188 | 246 | streamlong <<= pre ; |
189 | 246 | result = get_next_fromlong (streamlong, MAX_DATATYPE_BITS_16) ; |
190 | 246 | tempbits += MAX_DATATYPE_BITS_16 ; |
191 | | |
192 | 246 | } |
193 | 38.2k | else |
194 | 38.2k | { |
195 | | // all of the bits must fit within the long we have loaded |
196 | | //Assert (pre+1+k <= 32) ; |
197 | | |
198 | 38.2k | tempbits += pre ; |
199 | 38.2k | tempbits += 1 ; |
200 | 38.2k | streamlong <<= pre + 1 ; |
201 | 38.2k | v = get_next_fromlong (streamlong, k) ; |
202 | 38.2k | tempbits += k ; |
203 | | |
204 | 38.2k | result = pre*m + v-1 ; |
205 | | |
206 | 38.2k | if (v < 2) |
207 | 37.2k | { |
208 | 37.2k | result -= (v-1) ; |
209 | 37.2k | tempbits -= 1 ; |
210 | 37.2k | } |
211 | 38.2k | } |
212 | | |
213 | 38.4k | *bitPos = tempbits ; |
214 | 38.4k | return result ; |
215 | 38.4k | } |
216 | | |
217 | | |
218 | | static inline int32_t dyn_get_32bit (uint8_t * in, uint32_t * bitPos, int32_t m, int32_t k, int32_t maxbits) |
219 | 1.91M | { |
220 | 1.91M | uint32_t tempbits = *bitPos ; |
221 | 1.91M | uint32_t v ; |
222 | 1.91M | uint32_t streamlong ; |
223 | 1.91M | uint32_t result ; |
224 | | |
225 | 1.91M | streamlong = read32bit (in + (tempbits >> 3)) ; |
226 | 1.91M | streamlong <<= (tempbits & 7) ; |
227 | | |
228 | | /* find the number of bits in the prefix */ |
229 | 1.91M | { |
230 | 1.91M | uint32_t notI = ~streamlong ; |
231 | 1.91M | result = lead (notI) ; |
232 | 1.91M | } |
233 | | |
234 | 1.91M | if (result >= MAX_PREFIX_32) |
235 | 2.58k | { |
236 | 2.58k | result = getstreambits (in, tempbits+MAX_PREFIX_32, maxbits) ; |
237 | 2.58k | tempbits += MAX_PREFIX_32 + maxbits ; |
238 | 2.58k | } |
239 | 1.90M | else |
240 | 1.90M | { |
241 | | /* all of the bits must fit within the long we have loaded*/ |
242 | | //Assert (k<=14) ; |
243 | | //Assert (result<MAX_PREFIX_32) ; |
244 | | //Assert (result+1+k <= 32) ; |
245 | | |
246 | 1.90M | tempbits += result ; |
247 | 1.90M | tempbits += 1 ; |
248 | | |
249 | 1.90M | if (k != 1) |
250 | 1.88M | { |
251 | 1.88M | streamlong <<= result + 1 ; |
252 | 1.88M | v = get_next_fromlong (streamlong, k) ; |
253 | 1.88M | tempbits += k ; |
254 | 1.88M | tempbits -= 1 ; |
255 | 1.88M | result = result*m ; |
256 | | |
257 | 1.88M | if (v >= 2) |
258 | 119k | { |
259 | 119k | result += (v-1) ; |
260 | 119k | tempbits += 1 ; |
261 | 119k | } |
262 | 1.88M | } |
263 | 1.90M | } |
264 | | |
265 | 1.91M | *bitPos = tempbits ; |
266 | | |
267 | 1.91M | return result ; |
268 | 1.91M | } |
269 | | |
270 | | int32_t dyn_decomp (AGParamRecPtr params, BitBuffer * bitstream, int32_t * pc, int32_t numSamples, int32_t maxSize, uint32_t * outNumBits) |
271 | 21.5k | { |
272 | 21.5k | uint8_t *in ; |
273 | 21.5k | int32_t *outPtr = pc ; |
274 | 21.5k | uint32_t bitPos, startPos, maxPos ; |
275 | 21.5k | uint32_t j, m, k, n, c, mz ; |
276 | 21.5k | int32_t del, zmode ; |
277 | 21.5k | uint32_t mb ; |
278 | 21.5k | uint32_t pb_local = params->pb ; |
279 | 21.5k | uint32_t kb_local = params->kb ; |
280 | 21.5k | uint32_t wb_local = params->wb ; |
281 | 21.5k | int32_t status ; |
282 | | |
283 | 21.5k | RequireAction ((bitstream != NULL) && (pc != NULL) && (outNumBits != NULL), return kALAC_ParamError ;) ; |
284 | 21.5k | *outNumBits = 0 ; |
285 | | |
286 | 21.5k | in = bitstream->cur ; |
287 | 21.5k | startPos = bitstream->bitIndex ; |
288 | 21.5k | maxPos = bitstream->byteSize * 8 ; |
289 | 21.5k | bitPos = startPos ; |
290 | | |
291 | 21.5k | mb = params->mb0 ; |
292 | 21.5k | zmode = 0 ; |
293 | | |
294 | 21.5k | c = 0 ; |
295 | 21.5k | status = ALAC_noErr ; |
296 | | |
297 | 1.93M | while (c < (uint32_t) numSamples) |
298 | 1.91M | { |
299 | | // bail if we've run off the end of the buffer |
300 | 1.91M | RequireAction (bitPos < maxPos, status = kALAC_ParamError ; goto Exit ;) ; |
301 | | |
302 | 1.91M | m = (mb) >> QBSHIFT ; |
303 | 1.91M | k = lg3a (m) ; |
304 | | |
305 | 1.91M | k = arithmin (k, kb_local) ; |
306 | 1.91M | m = (1 << k) - 1 ; |
307 | | |
308 | 1.91M | n = dyn_get_32bit (in, &bitPos, m, k, maxSize) ; |
309 | | |
310 | | // least significant bit is sign bit |
311 | 1.91M | { |
312 | 1.91M | uint32_t ndecode = n + zmode ; |
313 | 1.91M | int32_t multiplier = - (int) (ndecode & 1) ; |
314 | | |
315 | 1.91M | multiplier |= 1 ; |
316 | 1.91M | del = ((ndecode+1) >> 1) * (multiplier) ; |
317 | 1.91M | } |
318 | | |
319 | 1.91M | *outPtr++ = del ; |
320 | | |
321 | 1.91M | c++ ; |
322 | | |
323 | 1.91M | mb = pb_local * (n + zmode) + mb - ((pb_local * mb) >> QBSHIFT) ; |
324 | | |
325 | | // update mean tracking |
326 | 1.91M | if (n > N_MAX_MEAN_CLAMP) |
327 | 106k | mb = N_MEAN_CLAMP_VAL ; |
328 | | |
329 | 1.91M | zmode = 0 ; |
330 | | |
331 | 1.91M | if (((mb << MMULSHIFT) < QB) && (c < (uint32_t) numSamples)) |
332 | 38.4k | { |
333 | 38.4k | zmode = 1 ; |
334 | 38.4k | k = lead (mb) - BITOFF + ((mb + MOFF) >> MDENSHIFT) ; |
335 | 38.4k | mz = ((1 << k) - 1) & wb_local ; |
336 | | |
337 | 38.4k | n = dyn_get (in, &bitPos, mz, k) ; |
338 | | |
339 | 38.4k | RequireAction (c+n <= (uint32_t) numSamples, status = kALAC_ParamError ; goto Exit ;) ; |
340 | | |
341 | 78.0k | for (j = 0 ; j < n ; j++) |
342 | 39.8k | { |
343 | 39.8k | *outPtr++ = 0 ; |
344 | 39.8k | ++c ; |
345 | 39.8k | } |
346 | | |
347 | 38.2k | if (n >= 65535) |
348 | 0 | zmode = 0 ; |
349 | | |
350 | 38.2k | mb = 0 ; |
351 | 38.2k | } |
352 | 1.91M | } |
353 | | |
354 | 21.5k | Exit: |
355 | 21.5k | *outNumBits = (bitPos - startPos) ; |
356 | 21.5k | BitBufferAdvance (bitstream, *outNumBits) ; |
357 | 21.5k | RequireAction (bitstream->cur <= bitstream->end, status = kALAC_ParamError ;) ; |
358 | | |
359 | 21.5k | return status ; |
360 | 21.5k | } |