/src/libsndfile/src/ALAC/matrix_dec.c
Line | Count | Source |
1 | | /* |
2 | | * Copyright (c) 2011 Apple Inc. All rights reserved. |
3 | | * Copyright (C) 2012-2014 Erik de Castro Lopo <erikd@mega-nerd.com> |
4 | | * |
5 | | * @APPLE_APACHE_LICENSE_HEADER_START@ |
6 | | * |
7 | | * Licensed under the Apache License, Version 2.0 (the "License") ; |
8 | | * you may not use this file except in compliance with the License. |
9 | | * You may obtain a copy of the License at |
10 | | * |
11 | | * http://www.apache.org/licenses/LICENSE-2.0 |
12 | | * |
13 | | * Unless required by applicable law or agreed to in writing, software |
14 | | * distributed under the License is distributed on an "AS IS" BASIS, |
15 | | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
16 | | * See the License for the specific language governing permissions and |
17 | | * limitations under the License. |
18 | | * |
19 | | * @APPLE_APACHE_LICENSE_HEADER_END@ |
20 | | */ |
21 | | |
22 | | /* |
23 | | File: matrix_dec.c |
24 | | |
25 | | Contains: ALAC mixing/matrixing decode routines. |
26 | | |
27 | | Copyright: (c) 2004-2011 Apple, Inc. |
28 | | */ |
29 | | |
30 | | #include "config.h" |
31 | | |
32 | | #include "matrixlib.h" |
33 | | #include "ALACAudioTypes.h" |
34 | | #include "shift.h" |
35 | | |
36 | | // up to 24-bit "offset" macros for the individual bytes of a 20/24-bit word |
37 | | #if TARGET_RT_BIG_ENDIAN |
38 | | #define LBYTE 2 |
39 | | #define MBYTE 1 |
40 | | #define HBYTE 0 |
41 | | #else |
42 | | #define LBYTE 0 |
43 | | #define MBYTE 1 |
44 | | #define HBYTE 2 |
45 | | #endif |
46 | | |
47 | | /* |
48 | | There is no plain middle-side option ; instead there are various mixing |
49 | | modes including middle-side, each lossless, as embodied in the mix () |
50 | | and unmix () functions. These functions exploit a generalized middle-side |
51 | | transformation: |
52 | | |
53 | | u := [(rL + (m-r)R)/m] ; |
54 | | v := L - R ; |
55 | | |
56 | | where [ ] denotes integer floor. The (lossless) inverse is |
57 | | |
58 | | L = u + v - [rV/m] ; |
59 | | R = L - v ; |
60 | | */ |
61 | | |
62 | | // 16-bit routines |
63 | | |
64 | | void |
65 | | unmix16 (const int32_t * u, int32_t * v, int32_t * out, uint32_t stride, int32_t numSamples, int32_t mixbits, int32_t mixres) |
66 | 3.82k | { |
67 | 3.82k | int32_t j ; |
68 | | |
69 | 3.82k | if (mixres != 0) |
70 | 1.67k | { |
71 | | /* matrixed stereo */ |
72 | 15.3k | for (j = 0 ; j < numSamples ; j++) |
73 | 13.7k | { |
74 | 13.7k | int32_t l, r ; |
75 | | |
76 | 13.7k | l = u [j] + v [j] - ((mixres * v [j]) >> mixbits) ; |
77 | 13.7k | r = l - v [j] ; |
78 | | |
79 | 13.7k | out [0] = arith_shift_left (l, 16) ; |
80 | 13.7k | out [1] = arith_shift_left (r, 16) ; |
81 | 13.7k | out += stride ; |
82 | 13.7k | } |
83 | 1.67k | } |
84 | 2.15k | else |
85 | 2.15k | { |
86 | | /* Conventional separated stereo. */ |
87 | 41.9k | for (j = 0 ; j < numSamples ; j++) |
88 | 39.7k | { |
89 | 39.7k | out [0] = u [j] << 16 ; |
90 | 39.7k | out [1] = v [j] << 16 ; |
91 | 39.7k | out += stride ; |
92 | 39.7k | } |
93 | 2.15k | } |
94 | 3.82k | } |
95 | | |
96 | | // 20-bit routines |
97 | | // - the 20 bits of data are left-justified in 3 bytes of storage but right-aligned for input/output predictor buffers |
98 | | |
99 | | void |
100 | | unmix20 (const int32_t * u, int32_t * v, int32_t * out, uint32_t stride, int32_t numSamples, int32_t mixbits, int32_t mixres) |
101 | 1.94k | { |
102 | 1.94k | int32_t j ; |
103 | | |
104 | 1.94k | if (mixres != 0) |
105 | 710 | { |
106 | | /* matrixed stereo */ |
107 | 6.00k | for (j = 0 ; j < numSamples ; j++) |
108 | 5.29k | { |
109 | 5.29k | int32_t l, r ; |
110 | | |
111 | 5.29k | l = u [j] + v [j] - ((mixres * v [j]) >> mixbits) ; |
112 | 5.29k | r = l - v [j] ; |
113 | | |
114 | 5.29k | out [0] = arith_shift_left (l, 12) ; |
115 | 5.29k | out [1] = arith_shift_left (r, 12) ; |
116 | 5.29k | out += stride ; |
117 | 5.29k | } |
118 | 710 | } |
119 | 1.23k | else |
120 | 1.23k | { |
121 | | /* Conventional separated stereo. */ |
122 | 22.6k | for (j = 0 ; j < numSamples ; j++) |
123 | 21.3k | { |
124 | 21.3k | out [0] = arith_shift_left (u [j], 12) ; |
125 | 21.3k | out [1] = arith_shift_left (v [j], 12) ; |
126 | 21.3k | out += stride ; |
127 | 21.3k | } |
128 | 1.23k | } |
129 | 1.94k | } |
130 | | |
131 | | // 24-bit routines |
132 | | // - the 24 bits of data are right-justified in the input/output predictor buffers |
133 | | |
134 | | void |
135 | | unmix24 (const int32_t * u, int32_t * v, int32_t * out, uint32_t stride, int32_t numSamples, |
136 | | int32_t mixbits, int32_t mixres, uint16_t * shiftUV, int32_t bytesShifted) |
137 | 3.11k | { |
138 | 3.11k | int32_t shift = bytesShifted * 8 ; |
139 | 3.11k | int32_t l, r ; |
140 | 3.11k | int32_t j, k ; |
141 | | |
142 | 3.11k | if (mixres != 0) |
143 | 1.47k | { |
144 | | /* matrixed stereo */ |
145 | 1.47k | if (bytesShifted != 0) |
146 | 244 | { |
147 | 1.57k | for (j = 0, k = 0 ; j < numSamples ; j++, k += 2) |
148 | 1.32k | { |
149 | 1.32k | l = u [j] + v [j] - ((mixres * v [j]) >> mixbits) ; |
150 | 1.32k | r = l - v [j] ; |
151 | | |
152 | 1.32k | l = arith_shift_left (l, shift) | (uint32_t) shiftUV [k + 0] ; |
153 | 1.32k | r = arith_shift_left (r, shift) | (uint32_t) shiftUV [k + 1] ; |
154 | | |
155 | 1.32k | out [0] = arith_shift_left (l, 8) ; |
156 | 1.32k | out [1] = arith_shift_left (r, 8) ; |
157 | 1.32k | out += stride ; |
158 | 1.32k | } |
159 | 244 | } |
160 | 1.23k | else |
161 | 1.23k | { |
162 | 8.14k | for (j = 0 ; j < numSamples ; j++) |
163 | 6.91k | { |
164 | 6.91k | l = u [j] + v [j] - ((mixres * v [j]) >> mixbits) ; |
165 | 6.91k | r = l - v [j] ; |
166 | | |
167 | 6.91k | out [0] = l << 8 ; |
168 | 6.91k | out [1] = r << 8 ; |
169 | 6.91k | out += stride ; |
170 | 6.91k | } |
171 | 1.23k | } |
172 | 1.47k | } |
173 | 1.63k | else |
174 | 1.63k | { |
175 | | /* Conventional separated stereo. */ |
176 | 1.63k | if (bytesShifted != 0) |
177 | 232 | { |
178 | 1.34k | for (j = 0, k = 0 ; j < numSamples ; j++, k += 2) |
179 | 1.10k | { |
180 | 1.10k | l = u [j] ; |
181 | 1.10k | r = v [j] ; |
182 | | |
183 | 1.10k | l = (l << shift) | (uint32_t) shiftUV [k + 0] ; |
184 | 1.10k | r = (r << shift) | (uint32_t) shiftUV [k + 1] ; |
185 | | |
186 | 1.10k | out [0] = l << 8 ; |
187 | 1.10k | out [1] = r << 8 ; |
188 | 1.10k | out += stride ; |
189 | 1.10k | } |
190 | 232 | } |
191 | 1.40k | else |
192 | 1.40k | { |
193 | 55.4k | for (j = 0 ; j < numSamples ; j++) |
194 | 54.0k | { |
195 | 54.0k | out [0] = u [j] << 8 ; |
196 | 54.0k | out [1] = v [j] << 8 ; |
197 | 54.0k | out += stride ; |
198 | 54.0k | } |
199 | 1.40k | } |
200 | 1.63k | } |
201 | 3.11k | } |
202 | | |
203 | | // 32-bit routines |
204 | | // - note that these really expect the internal data width to be < 32 but the arrays are 32-bit |
205 | | // - otherwise, the calculations might overflow into the 33rd bit and be lost |
206 | | // - therefore, these routines deal with the specified "unused lower" bytes in the "shift" buffers |
207 | | |
208 | | void |
209 | | unmix32 (const int32_t * u, int32_t * v, int32_t * out, uint32_t stride, int32_t numSamples, |
210 | | int32_t mixbits, int32_t mixres, uint16_t * shiftUV, int32_t bytesShifted) |
211 | 2.06k | { |
212 | 2.06k | int32_t shift = bytesShifted * 8 ; |
213 | 2.06k | int32_t l, r ; |
214 | 2.06k | int32_t j, k ; |
215 | | |
216 | 2.06k | if (mixres != 0) |
217 | 740 | { |
218 | | //Assert (bytesShifted != 0) ; |
219 | | |
220 | | /* matrixed stereo with shift */ |
221 | 16.4k | for (j = 0, k = 0 ; j < numSamples ; j++, k += 2) |
222 | 15.6k | { |
223 | 15.6k | int32_t lt, rt ; |
224 | | |
225 | 15.6k | lt = u [j] ; |
226 | 15.6k | rt = v [j] ; |
227 | | |
228 | 15.6k | l = lt + rt - ((mixres * rt) >> mixbits) ; |
229 | 15.6k | r = l - rt ; |
230 | | |
231 | 15.6k | out [0] = arith_shift_left (l, shift) | (uint32_t) shiftUV [k + 0] ; |
232 | 15.6k | out [1] = arith_shift_left (r, shift) | (uint32_t) shiftUV [k + 1] ; |
233 | 15.6k | out += stride ; |
234 | 15.6k | } |
235 | 740 | } |
236 | 1.32k | else |
237 | 1.32k | { |
238 | 1.32k | if (bytesShifted == 0) |
239 | 1.07k | { |
240 | | /* interleaving w/o shift */ |
241 | 124k | for (j = 0 ; j < numSamples ; j++) |
242 | 122k | { |
243 | 122k | out [0] = u [j] ; |
244 | 122k | out [1] = v [j] ; |
245 | 122k | out += stride ; |
246 | 122k | } |
247 | 1.07k | } |
248 | 247 | else |
249 | 247 | { |
250 | | /* interleaving with shift */ |
251 | 2.22k | for (j = 0, k = 0 ; j < numSamples ; j++, k += 2) |
252 | 1.97k | { |
253 | 1.97k | out [0] = (u [j] << shift) | (uint32_t) shiftUV [k + 0] ; |
254 | 1.97k | out [1] = (v [j] << shift) | (uint32_t) shiftUV [k + 1] ; |
255 | 1.97k | out += stride ; |
256 | 1.97k | } |
257 | 247 | } |
258 | 1.32k | } |
259 | 2.06k | } |
260 | | |
261 | | // 20/24-bit <-> 32-bit helper routines (not really matrixing but convenient to put here) |
262 | | |
263 | | void |
264 | | copyPredictorTo24 (const int32_t * in, int32_t * out, uint32_t stride, int32_t numSamples) |
265 | 12.8k | { |
266 | 12.8k | int32_t j ; |
267 | | |
268 | 946k | for (j = 0 ; j < numSamples ; j++) |
269 | 934k | { |
270 | 934k | out [0] = in [j] << 8 ; |
271 | 934k | out += stride ; |
272 | 934k | } |
273 | 12.8k | } |
274 | | |
275 | | void |
276 | | copyPredictorTo24Shift (const int32_t * in, uint16_t * shift, int32_t * out, uint32_t stride, int32_t numSamples, int32_t bytesShifted) |
277 | 2.67k | { |
278 | 2.67k | int32_t shiftVal = bytesShifted * 8 ; |
279 | 2.67k | int32_t j ; |
280 | | |
281 | | //Assert (bytesShifted != 0) ; |
282 | | |
283 | 10.2k | for (j = 0 ; j < numSamples ; j++) |
284 | 7.62k | { |
285 | 7.62k | int32_t val = in [j] ; |
286 | | |
287 | 7.62k | val = arith_shift_left (val, shiftVal) | (uint32_t) shift [j] ; |
288 | 7.62k | out [0] = arith_shift_left (val, 8) ; |
289 | 7.62k | out += stride ; |
290 | 7.62k | } |
291 | 2.67k | } |
292 | | |
293 | | void |
294 | | copyPredictorTo20 (const int32_t * in, int32_t * out, uint32_t stride, int32_t numSamples) |
295 | 4.46k | { |
296 | 4.46k | int32_t j ; |
297 | | |
298 | | // 32-bit predictor values are right-aligned but 20-bit output values should be left-aligned |
299 | | // in the 24-bit output buffer |
300 | 201k | for (j = 0 ; j < numSamples ; j++) |
301 | 197k | { |
302 | 197k | out [0] = arith_shift_left (in [j], 12) ; |
303 | 197k | out += stride ; |
304 | 197k | } |
305 | 4.46k | } |
306 | | |
307 | | void |
308 | | copyPredictorTo32 (const int32_t * in, int32_t * out, uint32_t stride, int32_t numSamples) |
309 | 8.44k | { |
310 | 8.44k | int32_t i, j ; |
311 | | |
312 | | // this is only a subroutine to abstract the "iPod can only output 16-bit data" problem |
313 | 482k | for (i = 0, j = 0 ; i < numSamples ; i++, j += stride) |
314 | 474k | out [j] = arith_shift_left (in [i], 8) ; |
315 | 8.44k | } |
316 | | |
317 | | void |
318 | | copyPredictorTo32Shift (const int32_t * in, uint16_t * shift, int32_t * out, uint32_t stride, int32_t numSamples, int32_t bytesShifted) |
319 | 2.80k | { |
320 | 2.80k | int32_t * op = out ; |
321 | 2.80k | uint32_t shiftVal = bytesShifted * 8 ; |
322 | 2.80k | int32_t j ; |
323 | | |
324 | | //Assert (bytesShifted != 0) ; |
325 | | |
326 | | // this is only a subroutine to abstract the "iPod can only output 16-bit data" problem |
327 | 10.1k | for (j = 0 ; j < numSamples ; j++) |
328 | 7.35k | { |
329 | 7.35k | op [0] = arith_shift_left (in [j], shiftVal) | (uint32_t) shift [j] ; |
330 | 7.35k | op += stride ; |
331 | 7.35k | } |
332 | 2.80k | } |