/src/ffmpeg/libavcodec/sbrdsp_fixed.c
Line | Count | Source |
1 | | /* |
2 | | * AAC Spectral Band Replication decoding functions |
3 | | * Copyright (c) 2008-2009 Robert Swain ( rob opendot cl ) |
4 | | * Copyright (c) 2009-2010 Alex Converse <alex.converse@gmail.com> |
5 | | * |
6 | | * This file is part of FFmpeg. |
7 | | * |
8 | | * FFmpeg is free software; you can redistribute it and/or |
9 | | * modify it under the terms of the GNU Lesser General Public |
10 | | * License as published by the Free Software Foundation; either |
11 | | * version 2.1 of the License, or (at your option) any later version. |
12 | | * |
13 | | * FFmpeg is distributed in the hope that it will be useful, |
14 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
15 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
16 | | * Lesser General Public License for more details. |
17 | | * |
18 | | * You should have received a copy of the GNU Lesser General Public |
19 | | * License along with FFmpeg; if not, write to the Free Software |
20 | | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
21 | | * |
22 | | * Note: Rounding-to-nearest used unless otherwise stated |
23 | | * |
24 | | */ |
25 | | |
26 | | #define USE_FIXED 1 |
27 | | |
28 | | #include "aac.h" |
29 | | #include "libavutil/attributes.h" |
30 | | #include "libavutil/intfloat.h" |
31 | | #include "sbrdsp.h" |
32 | | |
33 | | static SoftFloat sbr_sum_square_c(int (*x)[2], int n) |
34 | 12.9M | { |
35 | 12.9M | SoftFloat ret; |
36 | 12.9M | uint64_t accu = 0, round; |
37 | 12.9M | uint64_t accu0 = 0, accu1 = 0, accu2 = 0, accu3 = 0; |
38 | 12.9M | int i, nz, nz0; |
39 | 12.9M | unsigned u; |
40 | | |
41 | 12.9M | nz = 0; |
42 | 104M | for (i = 0; i < n; i += 2) { |
43 | 91.6M | accu0 += (int64_t)x[i + 0][0] * x[i + 0][0]; |
44 | 91.6M | accu1 += (int64_t)x[i + 0][1] * x[i + 0][1]; |
45 | 91.6M | accu2 += (int64_t)x[i + 1][0] * x[i + 1][0]; |
46 | 91.6M | accu3 += (int64_t)x[i + 1][1] * x[i + 1][1]; |
47 | 91.6M | if ((accu0|accu1|accu2|accu3) > UINT64_MAX - INT32_MIN*(int64_t)INT32_MIN || i+2>=n) { |
48 | 13.0M | accu0 >>= nz; |
49 | 13.0M | accu1 >>= nz; |
50 | 13.0M | accu2 >>= nz; |
51 | 13.0M | accu3 >>= nz; |
52 | 13.0M | while ((accu0|accu1|accu2|accu3) > (UINT64_MAX - accu) >> 2) { |
53 | 46.2k | accu0 >>= 1; |
54 | 46.2k | accu1 >>= 1; |
55 | 46.2k | accu2 >>= 1; |
56 | 46.2k | accu3 >>= 1; |
57 | 46.2k | accu >>= 1; |
58 | 46.2k | nz ++; |
59 | 46.2k | } |
60 | 13.0M | accu += accu0 + accu1 + accu2 + accu3; |
61 | 13.0M | accu0 = accu1 = accu2 = accu3 = 0; |
62 | 13.0M | } |
63 | 91.6M | } |
64 | | |
65 | 12.9M | nz0 = 15 - nz; |
66 | | |
67 | 12.9M | u = accu >> 32; |
68 | 12.9M | if (u) { |
69 | 4.37M | nz = 33; |
70 | 54.5M | while (u < 0x80000000U) { |
71 | 50.1M | u <<= 1; |
72 | 50.1M | nz--; |
73 | 50.1M | } |
74 | 4.37M | } else |
75 | 8.62M | nz = 1; |
76 | | |
77 | 12.9M | round = 1ULL << (nz-1); |
78 | 12.9M | u = ((accu + round) >> nz); |
79 | 12.9M | u >>= 1; |
80 | 12.9M | ret = av_int2sf(u, nz0 - nz); |
81 | | |
82 | 12.9M | return ret; |
83 | 12.9M | } |
84 | | |
85 | | static void sbr_neg_odd_64_c(int *x) |
86 | 21.5M | { |
87 | 21.5M | int i; |
88 | 711M | for (i = 1; i < 64; i += 2) |
89 | 689M | x[i] = -(unsigned)x[i]; |
90 | 21.5M | } |
91 | | |
92 | | static void sbr_qmf_pre_shuffle_c(int *z) |
93 | 18.5M | { |
94 | 18.5M | int k; |
95 | 18.5M | z[64] = z[0]; |
96 | 18.5M | z[65] = z[1]; |
97 | 592M | for (k = 1; k < 32; k++) { |
98 | 573M | z[64+2*k ] = -z[64 - k]; |
99 | 573M | z[64+2*k+1] = z[ k + 1]; |
100 | 573M | } |
101 | 18.5M | } |
102 | | |
103 | | static void sbr_qmf_post_shuffle_c(int W[32][2], const int *z) |
104 | 18.5M | { |
105 | 18.5M | int k; |
106 | 611M | for (k = 0; k < 32; k++) { |
107 | 592M | W[k][0] = -z[63-k]; |
108 | 592M | W[k][1] = z[k]; |
109 | 592M | } |
110 | 18.5M | } |
111 | | |
112 | | static void sbr_qmf_deint_neg_c(int *v, const int *src) |
113 | 2.33M | { |
114 | 2.33M | int i; |
115 | 77.0M | for (i = 0; i < 32; i++) { |
116 | 74.7M | v[ i] = (int)(0x10U + src[63 - 2*i ]) >> 5; |
117 | 74.7M | v[63 - i] = (int)(0x10U - src[63 - 2*i - 1]) >> 5; |
118 | 74.7M | } |
119 | 2.33M | } |
120 | | |
121 | | static av_always_inline SoftFloat autocorr_calc(int64_t accu) |
122 | 36.1M | { |
123 | 36.1M | int nz, mant, expo; |
124 | 36.1M | unsigned round; |
125 | 36.1M | int i = (int)(accu >> 32); |
126 | 36.1M | if (i == 0) { |
127 | 20.2M | nz = 1; |
128 | 20.2M | } else { |
129 | 15.8M | nz = 0; |
130 | 232M | while (FFABS(i) < 0x40000000) { |
131 | 216M | i *= 2; |
132 | 216M | nz++; |
133 | 216M | } |
134 | 15.8M | nz = 32-nz; |
135 | 15.8M | } |
136 | | |
137 | 36.1M | round = 1U << (nz-1); |
138 | 36.1M | mant = (int)((accu + round) >> nz); |
139 | 36.1M | mant = (mant + 0x40LL)>>7; |
140 | 36.1M | mant *= 64; |
141 | 36.1M | expo = nz + 15; |
142 | 36.1M | return av_int2sf(mant, 30 - expo); |
143 | 36.1M | } |
144 | | |
145 | | static av_always_inline void autocorrelate(const int x[40][2], SoftFloat phi[3][2][2], int lag) |
146 | 13.5M | { |
147 | 13.5M | int i; |
148 | 13.5M | int64_t real_sum, imag_sum; |
149 | 13.5M | int64_t accu_re = 0, accu_im = 0; |
150 | | |
151 | 13.5M | if (lag) { |
152 | 343M | for (i = 1; i < 38; i++) { |
153 | 334M | accu_re += (uint64_t)x[i][0] * x[i+lag][0]; |
154 | 334M | accu_re += (uint64_t)x[i][1] * x[i+lag][1]; |
155 | 334M | accu_im += (uint64_t)x[i][0] * x[i+lag][1]; |
156 | 334M | accu_im -= (uint64_t)x[i][1] * x[i+lag][0]; |
157 | 334M | } |
158 | | |
159 | 9.02M | real_sum = accu_re; |
160 | 9.02M | imag_sum = accu_im; |
161 | | |
162 | 9.02M | accu_re += (uint64_t)x[ 0][0] * x[lag][0]; |
163 | 9.02M | accu_re += (uint64_t)x[ 0][1] * x[lag][1]; |
164 | 9.02M | accu_im += (uint64_t)x[ 0][0] * x[lag][1]; |
165 | 9.02M | accu_im -= (uint64_t)x[ 0][1] * x[lag][0]; |
166 | | |
167 | 9.02M | phi[2-lag][1][0] = autocorr_calc(accu_re); |
168 | 9.02M | phi[2-lag][1][1] = autocorr_calc(accu_im); |
169 | | |
170 | 9.02M | if (lag == 1) { |
171 | 4.51M | accu_re = real_sum; |
172 | 4.51M | accu_im = imag_sum; |
173 | 4.51M | accu_re += (uint64_t)x[38][0] * x[39][0]; |
174 | 4.51M | accu_re += (uint64_t)x[38][1] * x[39][1]; |
175 | 4.51M | accu_im += (uint64_t)x[38][0] * x[39][1]; |
176 | 4.51M | accu_im -= (uint64_t)x[38][1] * x[39][0]; |
177 | | |
178 | 4.51M | phi[0][0][0] = autocorr_calc(accu_re); |
179 | 4.51M | phi[0][0][1] = autocorr_calc(accu_im); |
180 | 4.51M | } |
181 | 9.02M | } else { |
182 | 171M | for (i = 1; i < 38; i++) { |
183 | 167M | accu_re += (uint64_t)x[i][0] * x[i][0]; |
184 | 167M | accu_re += (uint64_t)x[i][1] * x[i][1]; |
185 | 167M | } |
186 | 4.51M | real_sum = accu_re; |
187 | 4.51M | accu_re += (uint64_t)x[ 0][0] * x[ 0][0]; |
188 | 4.51M | accu_re += (uint64_t)x[ 0][1] * x[ 0][1]; |
189 | | |
190 | 4.51M | phi[2][1][0] = autocorr_calc(accu_re); |
191 | | |
192 | 4.51M | accu_re = real_sum; |
193 | 4.51M | accu_re += (uint64_t)x[38][0] * x[38][0]; |
194 | 4.51M | accu_re += (uint64_t)x[38][1] * x[38][1]; |
195 | | |
196 | 4.51M | phi[1][0][0] = autocorr_calc(accu_re); |
197 | 4.51M | } |
198 | 13.5M | } |
199 | | |
200 | | static void sbr_autocorrelate_c(const int x[40][2], SoftFloat phi[3][2][2]) |
201 | 4.51M | { |
202 | 4.51M | autocorrelate(x, phi, 0); |
203 | 4.51M | autocorrelate(x, phi, 1); |
204 | 4.51M | autocorrelate(x, phi, 2); |
205 | 4.51M | } |
206 | | |
207 | | static void sbr_hf_gen_c(int (*X_high)[2], const int (*X_low)[2], |
208 | | const int alpha0[2], const int alpha1[2], |
209 | | int bw, int start, int end) |
210 | 5.47M | { |
211 | 5.47M | int alpha[4]; |
212 | 5.47M | int i; |
213 | 5.47M | int64_t accu; |
214 | | |
215 | 5.47M | accu = (int64_t)alpha0[0] * bw; |
216 | 5.47M | alpha[2] = (int)((accu + 0x40000000) >> 31); |
217 | 5.47M | accu = (int64_t)alpha0[1] * bw; |
218 | 5.47M | alpha[3] = (int)((accu + 0x40000000) >> 31); |
219 | 5.47M | accu = (int64_t)bw * bw; |
220 | 5.47M | bw = (int)((accu + 0x40000000) >> 31); |
221 | 5.47M | accu = (int64_t)alpha1[0] * bw; |
222 | 5.47M | alpha[0] = (int)((accu + 0x40000000) >> 31); |
223 | 5.47M | accu = (int64_t)alpha1[1] * bw; |
224 | 5.47M | alpha[1] = (int)((accu + 0x40000000) >> 31); |
225 | | |
226 | 188M | for (i = start; i < end; i++) { |
227 | 183M | accu = (int64_t)X_low[i][0] * 0x20000000; |
228 | 183M | accu += (int64_t)X_low[i - 2][0] * alpha[0]; |
229 | 183M | accu -= (int64_t)X_low[i - 2][1] * alpha[1]; |
230 | 183M | accu += (int64_t)X_low[i - 1][0] * alpha[2]; |
231 | 183M | accu -= (int64_t)X_low[i - 1][1] * alpha[3]; |
232 | 183M | X_high[i][0] = (int)((accu + 0x10000000) >> 29); |
233 | | |
234 | 183M | accu = (int64_t)X_low[i][1] * 0x20000000; |
235 | 183M | accu += (int64_t)X_low[i - 2][1] * alpha[0]; |
236 | 183M | accu += (int64_t)X_low[i - 2][0] * alpha[1]; |
237 | 183M | accu += (int64_t)X_low[i - 1][1] * alpha[2]; |
238 | 183M | accu += (int64_t)X_low[i - 1][0] * alpha[3]; |
239 | 183M | X_high[i][1] = (int)((accu + 0x10000000) >> 29); |
240 | 183M | } |
241 | 5.47M | } |
242 | | |
243 | | static void sbr_hf_g_filt_c(int (*Y)[2], const int (*X_high)[40][2], |
244 | | const SoftFloat *g_filt, int m_max, intptr_t ixh) |
245 | 8.10M | { |
246 | 8.10M | int m; |
247 | 8.10M | int64_t accu; |
248 | | |
249 | 187M | for (m = 0; m < m_max; m++) { |
250 | 179M | if (22 - g_filt[m].exp < 61) { |
251 | 179M | int64_t r = 1LL << (22-g_filt[m].exp); |
252 | 179M | accu = (int64_t)X_high[m][ixh][0] * ((g_filt[m].mant + 0x40)>>7); |
253 | 179M | Y[m][0] = (int)((accu + r) >> (23-g_filt[m].exp)); |
254 | | |
255 | 179M | accu = (int64_t)X_high[m][ixh][1] * ((g_filt[m].mant + 0x40)>>7); |
256 | 179M | Y[m][1] = (int)((accu + r) >> (23-g_filt[m].exp)); |
257 | 179M | } |
258 | 179M | } |
259 | 8.10M | } |
260 | | |
261 | | static av_always_inline int sbr_hf_apply_noise(int (*Y)[2], |
262 | | const SoftFloat *s_m, |
263 | | const SoftFloat *q_filt, |
264 | | int noise, |
265 | | int phi_sign0, |
266 | | int phi_sign1, |
267 | | int m_max) |
268 | 7.69M | { |
269 | 7.69M | int m; |
270 | | |
271 | 172M | for (m = 0; m < m_max; m++) { |
272 | 165M | unsigned y0 = Y[m][0]; |
273 | 165M | unsigned y1 = Y[m][1]; |
274 | 165M | noise = (noise + 1) & 0x1ff; |
275 | 165M | if (s_m[m].mant) { |
276 | 3.20M | int shift, round; |
277 | | |
278 | 3.20M | shift = 22 - s_m[m].exp; |
279 | 3.20M | if (shift < 1) { |
280 | 137k | av_log(NULL, AV_LOG_ERROR, "Overflow in sbr_hf_apply_noise, shift=%d\n", shift); |
281 | 137k | return AVERROR(ERANGE); |
282 | 3.07M | } else if (shift < 30) { |
283 | 2.96M | round = 1 << (shift-1); |
284 | 2.96M | y0 += (s_m[m].mant * phi_sign0 + round) >> shift; |
285 | 2.96M | y1 += (s_m[m].mant * phi_sign1 + round) >> shift; |
286 | 2.96M | } |
287 | 162M | } else { |
288 | 162M | int shift, round, tmp; |
289 | 162M | int64_t accu; |
290 | | |
291 | 162M | shift = 22 - q_filt[m].exp; |
292 | 162M | if (shift < 1) { |
293 | 289k | av_log(NULL, AV_LOG_ERROR, "Overflow in sbr_hf_apply_noise, shift=%d\n", shift); |
294 | 289k | return AVERROR(ERANGE); |
295 | 161M | } else if (shift < 30) { |
296 | 146M | round = 1 << (shift-1); |
297 | | |
298 | 146M | accu = (int64_t)q_filt[m].mant * ff_sbr_noise_table_fixed[noise][0]; |
299 | 146M | tmp = (int)((accu + 0x40000000) >> 31); |
300 | 146M | y0 += (tmp + round) >> shift; |
301 | | |
302 | 146M | accu = (int64_t)q_filt[m].mant * ff_sbr_noise_table_fixed[noise][1]; |
303 | 146M | tmp = (int)((accu + 0x40000000) >> 31); |
304 | 146M | y1 += (tmp + round) >> shift; |
305 | 146M | } |
306 | 162M | } |
307 | 164M | Y[m][0] = y0; |
308 | 164M | Y[m][1] = y1; |
309 | 164M | phi_sign1 = -phi_sign1; |
310 | 164M | } |
311 | 7.26M | return 0; |
312 | 7.69M | } |
313 | | |
314 | | #include "sbrdsp_template.c" |