/src/aac/libFDK/src/dct.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /* ----------------------------------------------------------------------------- |
2 | | Software License for The Fraunhofer FDK AAC Codec Library for Android |
3 | | |
4 | | © Copyright 1995 - 2020 Fraunhofer-Gesellschaft zur Förderung der angewandten |
5 | | Forschung e.V. All rights reserved. |
6 | | |
7 | | 1. INTRODUCTION |
8 | | The Fraunhofer FDK AAC Codec Library for Android ("FDK AAC Codec") is software |
9 | | that implements the MPEG Advanced Audio Coding ("AAC") encoding and decoding |
10 | | scheme for digital audio. This FDK AAC Codec software is intended to be used on |
11 | | a wide variety of Android devices. |
12 | | |
13 | | AAC's HE-AAC and HE-AAC v2 versions are regarded as today's most efficient |
14 | | general perceptual audio codecs. AAC-ELD is considered the best-performing |
15 | | full-bandwidth communications codec by independent studies and is widely |
16 | | deployed. AAC has been standardized by ISO and IEC as part of the MPEG |
17 | | specifications. |
18 | | |
19 | | Patent licenses for necessary patent claims for the FDK AAC Codec (including |
20 | | those of Fraunhofer) may be obtained through Via Licensing |
21 | | (www.vialicensing.com) or through the respective patent owners individually for |
22 | | the purpose of encoding or decoding bit streams in products that are compliant |
23 | | with the ISO/IEC MPEG audio standards. Please note that most manufacturers of |
24 | | Android devices already license these patent claims through Via Licensing or |
25 | | directly from the patent owners, and therefore FDK AAC Codec software may |
26 | | already be covered under those patent licenses when it is used for those |
27 | | licensed purposes only. |
28 | | |
29 | | Commercially-licensed AAC software libraries, including floating-point versions |
30 | | with enhanced sound quality, are also available from Fraunhofer. Users are |
31 | | encouraged to check the Fraunhofer website for additional applications |
32 | | information and documentation. |
33 | | |
34 | | 2. COPYRIGHT LICENSE |
35 | | |
36 | | Redistribution and use in source and binary forms, with or without modification, |
37 | | are permitted without payment of copyright license fees provided that you |
38 | | satisfy the following conditions: |
39 | | |
40 | | You must retain the complete text of this software license in redistributions of |
41 | | the FDK AAC Codec or your modifications thereto in source code form. |
42 | | |
43 | | You must retain the complete text of this software license in the documentation |
44 | | and/or other materials provided with redistributions of the FDK AAC Codec or |
45 | | your modifications thereto in binary form. You must make available free of |
46 | | charge copies of the complete source code of the FDK AAC Codec and your |
47 | | modifications thereto to recipients of copies in binary form. |
48 | | |
49 | | The name of Fraunhofer may not be used to endorse or promote products derived |
50 | | from this library without prior written permission. |
51 | | |
52 | | You may not charge copyright license fees for anyone to use, copy or distribute |
53 | | the FDK AAC Codec software or your modifications thereto. |
54 | | |
55 | | Your modified versions of the FDK AAC Codec must carry prominent notices stating |
56 | | that you changed the software and the date of any change. For modified versions |
57 | | of the FDK AAC Codec, the term "Fraunhofer FDK AAC Codec Library for Android" |
58 | | must be replaced by the term "Third-Party Modified Version of the Fraunhofer FDK |
59 | | AAC Codec Library for Android." |
60 | | |
61 | | 3. NO PATENT LICENSE |
62 | | |
63 | | NO EXPRESS OR IMPLIED LICENSES TO ANY PATENT CLAIMS, including without |
64 | | limitation the patents of Fraunhofer, ARE GRANTED BY THIS SOFTWARE LICENSE. |
65 | | Fraunhofer provides no warranty of patent non-infringement with respect to this |
66 | | software. |
67 | | |
68 | | You may use this FDK AAC Codec software or modifications thereto only for |
69 | | purposes that are authorized by appropriate patent licenses. |
70 | | |
71 | | 4. DISCLAIMER |
72 | | |
73 | | This FDK AAC Codec software is provided by Fraunhofer on behalf of the copyright |
74 | | holders and contributors "AS IS" and WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES, |
75 | | including but not limited to the implied warranties of merchantability and |
76 | | fitness for a particular purpose. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR |
77 | | CONTRIBUTORS BE LIABLE for any direct, indirect, incidental, special, exemplary, |
78 | | or consequential damages, including but not limited to procurement of substitute |
79 | | goods or services; loss of use, data, or profits, or business interruption, |
80 | | however caused and on any theory of liability, whether in contract, strict |
81 | | liability, or tort (including negligence), arising in any way out of the use of |
82 | | this software, even if advised of the possibility of such damage. |
83 | | |
84 | | 5. CONTACT INFORMATION |
85 | | |
86 | | Fraunhofer Institute for Integrated Circuits IIS |
87 | | Attention: Audio and Multimedia Departments - FDK AAC LL |
88 | | Am Wolfsmantel 33 |
89 | | 91058 Erlangen, Germany |
90 | | |
91 | | www.iis.fraunhofer.de/amm |
92 | | amm-info@iis.fraunhofer.de |
93 | | ----------------------------------------------------------------------------- */ |
94 | | |
95 | | /******************* Library for basic calculation routines ******************** |
96 | | |
97 | | Author(s): |
98 | | |
99 | | Description: |
100 | | |
101 | | *******************************************************************************/ |
102 | | |
103 | | /*! |
104 | | \file dct.cpp |
105 | | \brief DCT Implementations |
106 | | Library functions to calculate standard DCTs. This will most likely be |
107 | | replaced by hand-optimized functions for the specific target processor. |
108 | | |
109 | | Three different implementations of the dct type II and the dct type III |
110 | | transforms are provided. |
111 | | |
112 | | By default implementations which are based on a single, standard complex |
113 | | FFT-kernel are used (dctII_f() and dctIII_f()). These are specifically helpful |
114 | | in cases where optimized FFT libraries are already available. The FFT used in |
115 | | these implementation is FFT rad2 from FDK_tools. |
116 | | |
117 | | Of course, one might also use DCT-libraries should they be available. The DCT |
118 | | and DST type IV implementations are only available in a version based on a |
119 | | complex FFT kernel. |
120 | | */ |
121 | | |
122 | | #include "dct.h" |
123 | | |
124 | | #include "FDK_tools_rom.h" |
125 | | #include "fft.h" |
126 | | |
127 | | void dct_getTables(const FIXP_WTP **ptwiddle, const FIXP_STP **sin_twiddle, |
128 | 85.7M | int *sin_step, int length) { |
129 | 85.7M | const FIXP_WTP *twiddle; |
130 | 85.7M | int ld2_length; |
131 | | |
132 | | /* Get ld2 of length - 2 + 1 |
133 | | -2: because first table entry is window of size 4 |
134 | | +1: because we already include +1 because of ceil(log2(length)) */ |
135 | 85.7M | ld2_length = DFRACT_BITS - 1 - fNormz((FIXP_DBL)length) - 1; |
136 | | |
137 | | /* Extract sort of "eigenvalue" (the 4 left most bits) of length. */ |
138 | 85.7M | switch ((length) >> (ld2_length - 1)) { |
139 | 68.4M | case 0x4: /* radix 2 */ |
140 | 68.4M | *sin_twiddle = SineTable1024; |
141 | 68.4M | *sin_step = 1 << (10 - ld2_length); |
142 | 68.4M | twiddle = windowSlopes[0][0][ld2_length - 1]; |
143 | 68.4M | break; |
144 | 472k | case 0x7: /* 10 ms */ |
145 | 472k | *sin_twiddle = SineTable480; |
146 | 472k | *sin_step = 1 << (8 - ld2_length); |
147 | 472k | twiddle = windowSlopes[0][1][ld2_length]; |
148 | 472k | break; |
149 | 16.5M | case 0x6: /* 3/4 of radix 2 */ |
150 | 16.5M | *sin_twiddle = SineTable384; |
151 | 16.5M | *sin_step = 1 << (8 - ld2_length); |
152 | 16.5M | twiddle = windowSlopes[0][2][ld2_length]; |
153 | 16.5M | break; |
154 | 228k | case 0x5: /* 5/16 of radix 2*/ |
155 | 228k | *sin_twiddle = SineTable80; |
156 | 228k | *sin_step = 1 << (6 - ld2_length); |
157 | 228k | twiddle = windowSlopes[0][3][ld2_length]; |
158 | 228k | break; |
159 | 0 | default: |
160 | 0 | *sin_twiddle = NULL; |
161 | 0 | *sin_step = 0; |
162 | 0 | twiddle = NULL; |
163 | 0 | break; |
164 | 85.7M | } |
165 | | |
166 | 85.7M | if (ptwiddle != NULL) { |
167 | 77.3M | FDK_ASSERT(twiddle != NULL); |
168 | 77.3M | *ptwiddle = twiddle; |
169 | 77.3M | } |
170 | | |
171 | 85.7M | FDK_ASSERT(*sin_step > 0); |
172 | 85.7M | } |
173 | | |
174 | | #if !defined(FUNCTION_dct_III) |
175 | | void dct_III(FIXP_DBL *pDat, /*!< pointer to input/output */ |
176 | | FIXP_DBL *tmp, /*!< pointer to temporal working buffer */ |
177 | | int L, /*!< lenght of transform */ |
178 | 1.19M | int *pDat_e) { |
179 | 1.19M | const FIXP_WTP *sin_twiddle; |
180 | 1.19M | int i; |
181 | 1.19M | FIXP_DBL xr, accu1, accu2; |
182 | 1.19M | int inc, index; |
183 | 1.19M | int M = L >> 1; |
184 | | |
185 | 1.19M | FDK_ASSERT(L % 4 == 0); |
186 | 1.19M | dct_getTables(NULL, &sin_twiddle, &inc, L); |
187 | 1.19M | inc >>= 1; |
188 | | |
189 | 1.19M | FIXP_DBL *pTmp_0 = &tmp[2]; |
190 | 1.19M | FIXP_DBL *pTmp_1 = &tmp[(M - 1) * 2]; |
191 | | |
192 | 1.19M | index = 4 * inc; |
193 | | |
194 | | /* This loop performs multiplication for index i (i*inc) */ |
195 | 9.54M | for (i = 1; i<M>> 1; i++, pTmp_0 += 2, pTmp_1 -= 2) { |
196 | 8.34M | FIXP_DBL accu3, accu4, accu5, accu6; |
197 | | |
198 | 8.34M | cplxMultDiv2(&accu2, &accu1, pDat[L - i], pDat[i], sin_twiddle[i * inc]); |
199 | 8.34M | cplxMultDiv2(&accu4, &accu3, pDat[M + i], pDat[M - i], |
200 | 8.34M | sin_twiddle[(M - i) * inc]); |
201 | 8.34M | accu3 >>= 1; |
202 | 8.34M | accu4 >>= 1; |
203 | | |
204 | | /* This method is better for ARM926, that uses operand2 shifted right by 1 |
205 | | * always */ |
206 | 8.34M | if (2 * i < (M / 2)) { |
207 | 3.57M | cplxMultDiv2(&accu6, &accu5, (accu3 - (accu1 >> 1)), |
208 | 3.57M | ((accu2 >> 1) + accu4), sin_twiddle[index]); |
209 | 4.77M | } else { |
210 | 4.77M | cplxMultDiv2(&accu6, &accu5, ((accu2 >> 1) + accu4), |
211 | 4.77M | (accu3 - (accu1 >> 1)), sin_twiddle[index]); |
212 | 4.77M | accu6 = -accu6; |
213 | 4.77M | } |
214 | 8.34M | xr = (accu1 >> 1) + accu3; |
215 | 8.34M | pTmp_0[0] = (xr >> 1) - accu5; |
216 | 8.34M | pTmp_1[0] = (xr >> 1) + accu5; |
217 | | |
218 | 8.34M | xr = (accu2 >> 1) - accu4; |
219 | 8.34M | pTmp_0[1] = (xr >> 1) - accu6; |
220 | 8.34M | pTmp_1[1] = -((xr >> 1) + accu6); |
221 | | |
222 | | /* Create index helper variables for (4*i)*inc indexed equivalent values of |
223 | | * short tables. */ |
224 | 8.34M | if (2 * i < ((M / 2) - 1)) { |
225 | 3.57M | index += 4 * inc; |
226 | 4.77M | } else if (2 * i >= ((M / 2))) { |
227 | 4.77M | index -= 4 * inc; |
228 | 4.77M | } |
229 | 8.34M | } |
230 | | |
231 | 1.19M | xr = fMultDiv2(pDat[M], sin_twiddle[M * inc].v.re); /* cos((PI/(2*L))*M); */ |
232 | 1.19M | tmp[0] = ((pDat[0] >> 1) + xr) >> 1; |
233 | 1.19M | tmp[1] = ((pDat[0] >> 1) - xr) >> 1; |
234 | | |
235 | 1.19M | cplxMultDiv2(&accu2, &accu1, pDat[L - (M / 2)], pDat[M / 2], |
236 | 1.19M | sin_twiddle[M * inc / 2]); |
237 | 1.19M | tmp[M] = accu1 >> 1; |
238 | 1.19M | tmp[M + 1] = accu2 >> 1; |
239 | | |
240 | | /* dit_fft expects 1 bit scaled input values */ |
241 | 1.19M | fft(M, tmp, pDat_e); |
242 | | |
243 | | /* ARM926: 12 cycles per 2-iteration, no overhead code by compiler */ |
244 | 1.19M | pTmp_1 = &tmp[L]; |
245 | 10.7M | for (i = M >> 1; i--;) { |
246 | 9.54M | FIXP_DBL tmp1, tmp2, tmp3, tmp4; |
247 | 9.54M | tmp1 = *tmp++; |
248 | 9.54M | tmp2 = *tmp++; |
249 | 9.54M | tmp3 = *--pTmp_1; |
250 | 9.54M | tmp4 = *--pTmp_1; |
251 | 9.54M | *pDat++ = tmp1; |
252 | 9.54M | *pDat++ = tmp3; |
253 | 9.54M | *pDat++ = tmp2; |
254 | 9.54M | *pDat++ = tmp4; |
255 | 9.54M | } |
256 | | |
257 | 1.19M | *pDat_e += 2; |
258 | 1.19M | } |
259 | | |
260 | | void dst_III(FIXP_DBL *pDat, /*!< pointer to input/output */ |
261 | | FIXP_DBL *tmp, /*!< pointer to temporal working buffer */ |
262 | | int L, /*!< lenght of transform */ |
263 | 0 | int *pDat_e) { |
264 | 0 | int L2 = L >> 1; |
265 | 0 | int i; |
266 | 0 | FIXP_DBL t; |
267 | | |
268 | | /* note: DCT III is reused here, direct DST III implementation might be more |
269 | | * efficient */ |
270 | | |
271 | | /* mirror input */ |
272 | 0 | for (i = 0; i < L2; i++) { |
273 | 0 | t = pDat[i]; |
274 | 0 | pDat[i] = pDat[L - 1 - i]; |
275 | 0 | pDat[L - 1 - i] = t; |
276 | 0 | } |
277 | | |
278 | | /* DCT-III */ |
279 | 0 | dct_III(pDat, tmp, L, pDat_e); |
280 | | |
281 | | /* flip signs at odd indices */ |
282 | 0 | for (i = 1; i < L; i += 2) pDat[i] = -pDat[i]; |
283 | 0 | } |
284 | | |
285 | | #endif |
286 | | |
287 | | #if !defined(FUNCTION_dct_II) |
288 | | void dct_II( |
289 | | FIXP_DBL *pDat, /*!< pointer to input/output */ |
290 | | FIXP_DBL *tmp, /*!< pointer to temporal working buffer */ |
291 | | int L, /*!< lenght of transform (has to be a multiple of 8 (or 4 in case |
292 | | DCT_II_L_MULTIPLE_OF_4_SUPPORT is defined) */ |
293 | 7.21M | int *pDat_e) { |
294 | 7.21M | const FIXP_WTP *sin_twiddle; |
295 | 7.21M | FIXP_DBL accu1, accu2; |
296 | 7.21M | FIXP_DBL *pTmp_0, *pTmp_1; |
297 | | |
298 | 7.21M | int i; |
299 | 7.21M | int inc, index = 0; |
300 | 7.21M | int M = L >> 1; |
301 | | |
302 | 7.21M | FDK_ASSERT(L % 4 == 0); |
303 | 7.21M | dct_getTables(NULL, &sin_twiddle, &inc, L); |
304 | 7.21M | inc >>= 1; |
305 | | |
306 | 7.21M | { |
307 | 82.8M | for (i = 0; i < M; i++) { |
308 | 75.5M | tmp[i] = pDat[2 * i] >> 2; |
309 | 75.5M | tmp[L - 1 - i] = pDat[2 * i + 1] >> 2; |
310 | 75.5M | } |
311 | 7.21M | } |
312 | | |
313 | 7.21M | fft(M, tmp, pDat_e); |
314 | | |
315 | 7.21M | pTmp_0 = &tmp[2]; |
316 | 7.21M | pTmp_1 = &tmp[(M - 1) * 2]; |
317 | | |
318 | 7.21M | index = inc * 4; |
319 | | |
320 | 37.7M | for (i = 1; i<M>> 1; i++, pTmp_0 += 2, pTmp_1 -= 2) { |
321 | 30.5M | FIXP_DBL a1, a2; |
322 | 30.5M | FIXP_DBL accu3, accu4; |
323 | | |
324 | 30.5M | a1 = ((pTmp_0[1] >> 1) + (pTmp_1[1] >> 1)); |
325 | 30.5M | a2 = ((pTmp_1[0] >> 1) - (pTmp_0[0] >> 1)); |
326 | | |
327 | 30.5M | if (2 * i < (M / 2)) { |
328 | 13.5M | cplxMultDiv2(&accu1, &accu2, a2, a1, sin_twiddle[index]); |
329 | 17.0M | } else { |
330 | 17.0M | cplxMultDiv2(&accu1, &accu2, a1, a2, sin_twiddle[index]); |
331 | 17.0M | accu1 = -accu1; |
332 | 17.0M | } |
333 | 30.5M | accu1 <<= 1; |
334 | 30.5M | accu2 <<= 1; |
335 | | |
336 | 30.5M | a1 = ((pTmp_0[0] >> 1) + (pTmp_1[0] >> 1)); |
337 | 30.5M | a2 = ((pTmp_0[1] >> 1) - (pTmp_1[1] >> 1)); |
338 | | |
339 | 30.5M | cplxMult(&accu3, &accu4, (accu1 + a2), (a1 + accu2), sin_twiddle[i * inc]); |
340 | 30.5M | pDat[L - i] = -accu3; |
341 | 30.5M | pDat[i] = accu4; |
342 | | |
343 | 30.5M | cplxMult(&accu3, &accu4, (accu1 - a2), (a1 - accu2), |
344 | 30.5M | sin_twiddle[(M - i) * inc]); |
345 | 30.5M | pDat[M + i] = -accu3; |
346 | 30.5M | pDat[M - i] = accu4; |
347 | | |
348 | | /* Create index helper variables for (4*i)*inc indexed equivalent values of |
349 | | * short tables. */ |
350 | 30.5M | if (2 * i < ((M / 2) - 1)) { |
351 | 9.84M | index += 4 * inc; |
352 | 20.7M | } else if (2 * i >= ((M / 2))) { |
353 | 17.0M | index -= 4 * inc; |
354 | 17.0M | } |
355 | 30.5M | } |
356 | | |
357 | 7.21M | cplxMult(&accu1, &accu2, tmp[M], tmp[M + 1], sin_twiddle[(M / 2) * inc]); |
358 | 7.21M | pDat[L - (M / 2)] = accu2; |
359 | 7.21M | pDat[M / 2] = accu1; |
360 | | |
361 | 7.21M | pDat[0] = tmp[0] + tmp[1]; |
362 | 7.21M | pDat[M] = fMult(tmp[0] - tmp[1], |
363 | 7.21M | sin_twiddle[M * inc].v.re); /* cos((PI/(2*L))*M); */ |
364 | | |
365 | 7.21M | *pDat_e += 2; |
366 | 7.21M | } |
367 | | #endif |
368 | | |
369 | | #if !defined(FUNCTION_dct_IV) |
370 | | |
371 | 42.0M | void dct_IV(FIXP_DBL *pDat, int L, int *pDat_e) { |
372 | 42.0M | int sin_step = 0; |
373 | 42.0M | int M = L >> 1; |
374 | | |
375 | 42.0M | const FIXP_WTP *twiddle; |
376 | 42.0M | const FIXP_STP *sin_twiddle; |
377 | | |
378 | 42.0M | FDK_ASSERT(L >= 4); |
379 | | |
380 | 42.0M | FDK_ASSERT(L >= 4); |
381 | | |
382 | 42.0M | dct_getTables(&twiddle, &sin_twiddle, &sin_step, L); |
383 | | |
384 | 42.0M | { |
385 | 42.0M | FIXP_DBL *RESTRICT pDat_0 = &pDat[0]; |
386 | 42.0M | FIXP_DBL *RESTRICT pDat_1 = &pDat[L - 2]; |
387 | 42.0M | int i; |
388 | | |
389 | | /* 29 cycles on ARM926 */ |
390 | 624M | for (i = 0; i < M - 1; i += 2, pDat_0 += 2, pDat_1 -= 2) { |
391 | 582M | FIXP_DBL accu1, accu2, accu3, accu4; |
392 | | |
393 | 582M | accu1 = pDat_1[1]; |
394 | 582M | accu2 = pDat_0[0]; |
395 | 582M | accu3 = pDat_0[1]; |
396 | 582M | accu4 = pDat_1[0]; |
397 | | |
398 | 582M | cplxMultDiv2(&accu1, &accu2, accu1, accu2, twiddle[i]); |
399 | 582M | cplxMultDiv2(&accu3, &accu4, accu4, accu3, twiddle[i + 1]); |
400 | | |
401 | 582M | pDat_0[0] = accu2 >> 1; |
402 | 582M | pDat_0[1] = accu1 >> 1; |
403 | 582M | pDat_1[0] = accu4 >> 1; |
404 | 582M | pDat_1[1] = -(accu3 >> 1); |
405 | 582M | } |
406 | 42.0M | if (M & 1) { |
407 | 0 | FIXP_DBL accu1, accu2; |
408 | |
|
409 | 0 | accu1 = pDat_1[1]; |
410 | 0 | accu2 = pDat_0[0]; |
411 | |
|
412 | 0 | cplxMultDiv2(&accu1, &accu2, accu1, accu2, twiddle[i]); |
413 | |
|
414 | 0 | pDat_0[0] = accu2 >> 1; |
415 | 0 | pDat_0[1] = accu1 >> 1; |
416 | 0 | } |
417 | 42.0M | } |
418 | | |
419 | 42.0M | fft(M, pDat, pDat_e); |
420 | | |
421 | 42.0M | { |
422 | 42.0M | FIXP_DBL *RESTRICT pDat_0 = &pDat[0]; |
423 | 42.0M | FIXP_DBL *RESTRICT pDat_1 = &pDat[L - 2]; |
424 | 42.0M | FIXP_DBL accu1, accu2, accu3, accu4; |
425 | 42.0M | int idx, i; |
426 | | |
427 | | /* Sin and Cos values are 0.0f and 1.0f */ |
428 | 42.0M | accu1 = pDat_1[0]; |
429 | 42.0M | accu2 = pDat_1[1]; |
430 | | |
431 | 42.0M | pDat_1[1] = -pDat_0[1]; |
432 | | |
433 | | /* 28 cycles for ARM926 */ |
434 | 582M | for (idx = sin_step, i = 1; i<(M + 1)>> 1; i++, idx += sin_step) { |
435 | 540M | FIXP_STP twd = sin_twiddle[idx]; |
436 | 540M | cplxMult(&accu3, &accu4, accu1, accu2, twd); |
437 | 540M | pDat_0[1] = accu3; |
438 | 540M | pDat_1[0] = accu4; |
439 | | |
440 | 540M | pDat_0 += 2; |
441 | 540M | pDat_1 -= 2; |
442 | | |
443 | 540M | cplxMult(&accu3, &accu4, pDat_0[1], pDat_0[0], twd); |
444 | | |
445 | 540M | accu1 = pDat_1[0]; |
446 | 540M | accu2 = pDat_1[1]; |
447 | | |
448 | 540M | pDat_1[1] = -accu3; |
449 | 540M | pDat_0[0] = accu4; |
450 | 540M | } |
451 | | |
452 | 42.0M | if ((M & 1) == 0) { |
453 | | /* Last Sin and Cos value pair are the same */ |
454 | 42.0M | accu1 = fMult(accu1, WTC(0x5a82799a)); |
455 | 42.0M | accu2 = fMult(accu2, WTC(0x5a82799a)); |
456 | | |
457 | 42.0M | pDat_1[0] = accu1 + accu2; |
458 | 42.0M | pDat_0[1] = accu1 - accu2; |
459 | 42.0M | } |
460 | 42.0M | } |
461 | | |
462 | | /* Add twiddeling scale. */ |
463 | 42.0M | *pDat_e += 2; |
464 | 42.0M | } |
465 | | #endif /* defined (FUNCTION_dct_IV) */ |
466 | | |
467 | | #if !defined(FUNCTION_dst_IV) |
468 | 35.3M | void dst_IV(FIXP_DBL *pDat, int L, int *pDat_e) { |
469 | 35.3M | int sin_step = 0; |
470 | 35.3M | int M = L >> 1; |
471 | | |
472 | 35.3M | const FIXP_WTP *twiddle; |
473 | 35.3M | const FIXP_STP *sin_twiddle; |
474 | | |
475 | 35.3M | FDK_ASSERT(L >= 4); |
476 | | |
477 | 35.3M | FDK_ASSERT(L >= 4); |
478 | | |
479 | 35.3M | dct_getTables(&twiddle, &sin_twiddle, &sin_step, L); |
480 | | |
481 | 35.3M | { |
482 | 35.3M | FIXP_DBL *RESTRICT pDat_0 = &pDat[0]; |
483 | 35.3M | FIXP_DBL *RESTRICT pDat_1 = &pDat[L - 2]; |
484 | 35.3M | int i; |
485 | | |
486 | | /* 34 cycles on ARM926 */ |
487 | 432M | for (i = 0; i < M - 1; i += 2, pDat_0 += 2, pDat_1 -= 2) { |
488 | 397M | FIXP_DBL accu1, accu2, accu3, accu4; |
489 | | |
490 | 397M | accu1 = pDat_1[1] >> 1; |
491 | 397M | accu2 = -(pDat_0[0] >> 1); |
492 | 397M | accu3 = pDat_0[1] >> 1; |
493 | 397M | accu4 = -(pDat_1[0] >> 1); |
494 | | |
495 | 397M | cplxMultDiv2(&accu1, &accu2, accu1, accu2, twiddle[i]); |
496 | 397M | cplxMultDiv2(&accu3, &accu4, accu4, accu3, twiddle[i + 1]); |
497 | | |
498 | 397M | pDat_0[0] = accu2; |
499 | 397M | pDat_0[1] = accu1; |
500 | 397M | pDat_1[0] = accu4; |
501 | 397M | pDat_1[1] = -accu3; |
502 | 397M | } |
503 | 35.3M | if (M & 1) { |
504 | 0 | FIXP_DBL accu1, accu2; |
505 | |
|
506 | 0 | accu1 = pDat_1[1]; |
507 | 0 | accu2 = -pDat_0[0]; |
508 | |
|
509 | 0 | cplxMultDiv2(&accu1, &accu2, accu1, accu2, twiddle[i]); |
510 | |
|
511 | 0 | pDat_0[0] = accu2 >> 1; |
512 | 0 | pDat_0[1] = accu1 >> 1; |
513 | 0 | } |
514 | 35.3M | } |
515 | | |
516 | 35.3M | fft(M, pDat, pDat_e); |
517 | | |
518 | 35.3M | { |
519 | 35.3M | FIXP_DBL *RESTRICT pDat_0; |
520 | 35.3M | FIXP_DBL *RESTRICT pDat_1; |
521 | 35.3M | FIXP_DBL accu1, accu2, accu3, accu4; |
522 | 35.3M | int idx, i; |
523 | | |
524 | 35.3M | pDat_0 = &pDat[0]; |
525 | 35.3M | pDat_1 = &pDat[L - 2]; |
526 | | |
527 | | /* Sin and Cos values are 0.0f and 1.0f */ |
528 | 35.3M | accu1 = pDat_1[0]; |
529 | 35.3M | accu2 = pDat_1[1]; |
530 | | |
531 | 35.3M | pDat_1[1] = -pDat_0[0]; |
532 | 35.3M | pDat_0[0] = pDat_0[1]; |
533 | | |
534 | 397M | for (idx = sin_step, i = 1; i<(M + 1)>> 1; i++, idx += sin_step) { |
535 | 361M | FIXP_STP twd = sin_twiddle[idx]; |
536 | | |
537 | 361M | cplxMult(&accu3, &accu4, accu1, accu2, twd); |
538 | 361M | pDat_1[0] = -accu3; |
539 | 361M | pDat_0[1] = -accu4; |
540 | | |
541 | 361M | pDat_0 += 2; |
542 | 361M | pDat_1 -= 2; |
543 | | |
544 | 361M | cplxMult(&accu3, &accu4, pDat_0[1], pDat_0[0], twd); |
545 | | |
546 | 361M | accu1 = pDat_1[0]; |
547 | 361M | accu2 = pDat_1[1]; |
548 | | |
549 | 361M | pDat_0[0] = accu3; |
550 | 361M | pDat_1[1] = -accu4; |
551 | 361M | } |
552 | | |
553 | 35.3M | if ((M & 1) == 0) { |
554 | | /* Last Sin and Cos value pair are the same */ |
555 | 35.3M | accu1 = fMult(accu1, WTC(0x5a82799a)); |
556 | 35.3M | accu2 = fMult(accu2, WTC(0x5a82799a)); |
557 | | |
558 | 35.3M | pDat_0[1] = -accu1 - accu2; |
559 | 35.3M | pDat_1[0] = accu2 - accu1; |
560 | 35.3M | } |
561 | 35.3M | } |
562 | | |
563 | | /* Add twiddeling scale. */ |
564 | 35.3M | *pDat_e += 2; |
565 | 35.3M | } |
566 | | #endif /* !defined(FUNCTION_dst_IV) */ |