Coverage Report

Created: 2025-10-13 07:04

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libxaac/decoder/ixheaacd_aac_imdct.c
Line
Count
Source
1
/******************************************************************************
2
 *                                                                            *
3
 * Copyright (C) 2018 The Android Open Source Project
4
 *
5
 * Licensed under the Apache License, Version 2.0 (the "License");
6
 * you may not use this file except in compliance with the License.
7
 * You may obtain a copy of the License at:
8
 *
9
 * http://www.apache.org/licenses/LICENSE-2.0
10
 *
11
 * Unless required by applicable law or agreed to in writing, software
12
 * distributed under the License is distributed on an "AS IS" BASIS,
13
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 * See the License for the specific language governing permissions and
15
 * limitations under the License.
16
 *
17
 *****************************************************************************
18
 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19
*/
20
#include "ixheaacd_sbr_common.h"
21
#include "ixheaac_type_def.h"
22
23
#include "ixheaac_constants.h"
24
#include "ixheaac_basic_ops32.h"
25
#include "ixheaac_basic_ops16.h"
26
#include "ixheaac_basic_ops40.h"
27
#include "ixheaac_basic_ops.h"
28
29
#include "ixheaacd_defines.h"
30
#include "ixheaacd_common_rom.h"
31
#include "ixheaacd_basic_funcs.h"
32
#include "ixheaacd_aac_rom.h"
33
#include "ixheaacd_aac_imdct.h"
34
#include "ixheaacd_intrinsics.h"
35
36
#include "ixheaac_basic_op.h"
37
#include "ixheaacd_function_selector.h"
38
39
#include "ixheaacd_audioobjtypes.h"
40
#include "ixheaacd_tns.h"
41
42
#define DIG_REV(i, m, j)                                      \
43
4.52M
  do {                                                        \
44
4.52M
    unsigned _ = (i);                                         \
45
4.52M
    _ = ((_ & 0x33333333) << 2) | ((_ & ~0x33333333) >> 2);   \
46
4.52M
    _ = ((_ & 0x0F0F0F0F) << 4) | ((_ & ~0x0F0F0F0F) >> 4);   \
47
4.52M
    _ = ((_ & 0x00FF00FF) << 8) | ((_ & ~0x00FF00FF) >> 8);   \
48
4.52M
    _ = ((_ & 0x0000FFFF) << 16) | ((_ & ~0x0000FFFF) >> 16); \
49
4.52M
    (j) = _ >> (m);                                           \
50
4.52M
  } while (0)
51
52
#define MPYHIRC(x, y)                                                         \
53
                                                                              \
54
256M
  (((WORD32)((WORD16)(x >> 16) * (UWORD16)(y & 0x0000FFFF) + 0x4000) >>       \
55
256M
    15) +                                                                     \
56
256M
   ((WORD32)((WORD16)(x >> 16) * (WORD16)((y) >> 16)) << 1))
57
58
#define MPYLUHS(x, y) \
59
256M
  ((WORD32)((UWORD16)(x & 0x0000FFFF) * (WORD16)(y >> 16)))
60
61
#define MPYLIRC(x, y) \
62
147M
  (((WORD32)((WORD16)(x) * (UWORD16)(y & 0x0000FFFF) + 0x4000) >> 15)+ \
63
147M
((WORD32)((WORD16)(x) * (WORD16)((y) >> 16)) << 1))
64
65
WORD32 rev_dig[] = { 0, 8, 2, 10 };
66
67
138k
#define MDCT_LEN 480
68
#define FFT15X2 30
69
48.5k
#define MDCT_LEN_BY2 240
70
27.3M
#define FFT5 5
71
2.27M
#define FFT16 16
72
10.4M
#define FFT4 4
73
16.6M
#define FFT3 3
74
14.3M
#define FFT15 15
75
2.77M
#define FFT16X2 32
76
219k
#define MDCT_LEN_960 960
77
78
WORD32 ixheaacd_fft5out[FFT15X2];
79
80
286M
static PLATFORM_INLINE WORD32 ixheaacd_mult32x16lin32(WORD32 a, WORD32 b) {
81
286M
  WORD32 result;
82
286M
  WORD64 temp_result;
83
286M
  temp_result = (WORD64)a * (WORD64)(((b & 0xFFFF) << 16) >> 16);
84
286M
  result = (WORD32)(temp_result >> 16);
85
286M
  return (result);
86
286M
}
87
88
static PLATFORM_INLINE WORD32 ixheaacd_mac32x16lin32(WORD32 a, WORD32 b,
89
88.1M
                                                     WORD32 c) {
90
88.1M
  WORD32 result;
91
88.1M
  result = a + ixheaacd_mult32x16lin32(b, c);
92
88.1M
  return (result);
93
88.1M
}
94
95
56.2M
static PLATFORM_INLINE WORD32 ixheaacd_mult32x16lin32_sat(WORD32 a, WORD32 b) {
96
56.2M
  WORD32 result;
97
56.2M
  WORD64 temp_result;
98
56.2M
  temp_result = (WORD64)a * (WORD64)(((b & 0xFFFF) << 16) >> 16);
99
56.2M
  if (temp_result < (WORD64)MIN_32)
100
6.10M
    result = MIN_32;
101
50.1M
  else if (temp_result > (WORD64)MAX_32)
102
6.96M
    result = MAX_32;
103
43.1M
  else
104
43.1M
    result = (WORD32)(temp_result);
105
56.2M
  return (result);
106
56.2M
}
107
108
95.9k
WORD16 ixheaacd_neg_expo_inc_dec(WORD16 neg_expo) { return (neg_expo + 2); }
109
110
0
WORD16 ixheaacd_neg_expo_inc_arm(WORD16 neg_expo) { return (neg_expo + 3); }
111
112
VOID ixheaacd_pretwiddle_compute_960_dec(
113
    WORD32 *spec_data1, WORD32 *spec_data2, WORD32 *out_ptr,
114
    ia_aac_dec_imdct_tables_struct *ptr_imdct_tables, WORD npoints4,
115
0
    WORD32 neg_expo) {
116
117
0
  WORD32 i;
118
0
  WORD32 tempr, tempi;
119
120
0
  WORD16 c, c1, s, s1;
121
0
  WORD32 *out_ptr1 = out_ptr + ((npoints4 << 2) - 1);
122
0
  const WORD16 *cos_sin_ptr = ptr_imdct_tables->cosine_array_240;
123
124
0
  for (i = 0; i < npoints4; i++) {
125
0
    c = *cos_sin_ptr++;
126
0
    s = *cos_sin_ptr++;
127
128
0
    tempr = *spec_data1++;
129
0
    tempi = *spec_data2--;
130
131
0
    *out_ptr =
132
0
        ixheaac_mac32x16in32(ixheaac_mult32x16in32(tempr, c), tempi, s);
133
134
135
0
    *out_ptr = ixheaac_shl32(*out_ptr, neg_expo);
136
0
    out_ptr++;
137
138
0
    *out_ptr = ixheaac_sub32(ixheaac_mult32x16in32(tempi, c),
139
0
                              ixheaac_mult32x16in32(tempr, s));
140
141
0
    *out_ptr = ixheaac_shl32(*out_ptr, neg_expo);
142
0
    out_ptr++;
143
144
0
    c1 = *cos_sin_ptr++;
145
0
    s1 = *cos_sin_ptr++;
146
147
0
    tempi = *spec_data1++;
148
0
    tempr = *spec_data2--;
149
150
151
0
    *out_ptr1 = ixheaac_sub32(ixheaac_mult32x16in32(tempi, c1),
152
0
                               ixheaac_mult32x16in32(tempr, s1));
153
154
0
    *out_ptr1 = ixheaac_shl32(*out_ptr1, neg_expo);
155
0
    out_ptr1--;
156
157
0
    *out_ptr1 =
158
0
        ixheaac_mac32x16in32(ixheaac_mult32x16in32(tempr, c1), tempi, s1);
159
160
0
    *out_ptr1 = ixheaac_shl32(*out_ptr1, neg_expo);
161
0
    out_ptr1--;
162
0
  }
163
0
}
164
165
VOID ixheaacd_pretwiddle_compute_dec(
166
    WORD32 *spec_data1, WORD32 *spec_data2, WORD32 *out_ptr,
167
    ia_aac_dec_imdct_tables_struct *ptr_imdct_tables, WORD npoints4,
168
215k
    WORD32 neg_expo) {
169
215k
  WORD32 i;
170
215k
  WORD32 tempr, tempi;
171
215k
  WORD32 tempr1, tempi1;
172
215k
  WORD32 npoints2 = npoints4 * 2;
173
215k
  WORD32 *out_ptr1 = out_ptr + (npoints2 << 1) - 1;
174
215k
  const WORD16 *cos_sin_ptr = ptr_imdct_tables->cosine_array_2048_256;
175
176
215k
  WORD16 cos = 0, cos1 = 0, sin = 0, sin1 = 0;
177
215k
  if (neg_expo < 0) {
178
89.5k
    neg_expo = -neg_expo;
179
89.5k
    if (npoints4 == 256) {
180
41.5k
      cos = *cos_sin_ptr++;
181
41.5k
      sin = *cos_sin_ptr++;
182
48.0k
    } else if (npoints4 == 32) {
183
48.0k
      cos = *cos_sin_ptr++;
184
48.0k
      sin = *cos_sin_ptr;
185
48.0k
      cos_sin_ptr += 15;
186
48.0k
    }
187
89.5k
    tempr = *spec_data1++;
188
89.5k
    tempi = *spec_data2--;
189
190
89.5k
    *out_ptr =
191
89.5k
        ixheaac_mac32x16in32(ixheaac_mult32x16in32(tempr, cos), tempi, sin);
192
193
89.5k
    *out_ptr = ixheaac_shl32(*out_ptr, neg_expo);
194
89.5k
    out_ptr++;
195
196
89.5k
    *out_ptr = ixheaac_sub32(ixheaac_mult32x16in32(tempi, cos),
197
89.5k
                              ixheaac_mult32x16in32(tempr, sin));
198
199
89.5k
    *out_ptr = ixheaac_shl32(*out_ptr, neg_expo);
200
89.5k
    out_ptr++;
201
202
12.1M
    for (i = 0; i < npoints4 - 1; i++) {
203
12.0M
      if (npoints4 == 256) {
204
10.5M
        sin = *cos_sin_ptr++;
205
10.5M
        cos = *cos_sin_ptr++;
206
10.5M
      } else if (npoints4 == 32) {
207
1.48M
        sin = *cos_sin_ptr++;
208
1.48M
        cos = *cos_sin_ptr;
209
1.48M
        cos_sin_ptr += 15;
210
1.48M
      }
211
212
12.0M
      tempi1 = *spec_data1++;
213
12.0M
      tempr = *spec_data1++;
214
12.0M
      tempr1 = *spec_data2--;
215
12.0M
      tempi = *spec_data2--;
216
217
12.0M
      *out_ptr1 = ixheaac_sub32(ixheaac_mult32x16in32(tempi1, cos),
218
12.0M
                                 ixheaac_mult32x16in32(tempr1, sin));
219
220
12.0M
      *out_ptr1 = ixheaac_shl32(*out_ptr1, neg_expo);
221
12.0M
      out_ptr1--;
222
223
12.0M
      *out_ptr1 = ixheaac_mac32x16in32(ixheaac_mult32x16in32(tempr1, cos),
224
12.0M
                                        tempi1, sin);
225
12.0M
      *out_ptr1 = ixheaac_shl32(*out_ptr1, neg_expo);
226
12.0M
      out_ptr1--;
227
228
12.0M
      *out_ptr =
229
12.0M
          ixheaac_mac32x16in32(ixheaac_mult32x16in32(tempr, sin), tempi, cos);
230
12.0M
      *out_ptr = ixheaac_shl32(*out_ptr, neg_expo);
231
12.0M
      out_ptr++;
232
233
12.0M
      *out_ptr = ixheaac_sub32(ixheaac_mult32x16in32(tempi, sin),
234
12.0M
                                ixheaac_mult32x16in32(tempr, cos));
235
12.0M
      *out_ptr = ixheaac_shl32(*out_ptr, neg_expo);
236
12.0M
      out_ptr++;
237
12.0M
    }
238
89.5k
    cos1 = *cos_sin_ptr++;
239
89.5k
    sin1 = *cos_sin_ptr;
240
241
89.5k
    tempr1 = *spec_data2;
242
89.5k
    tempi1 = *spec_data1;
243
244
89.5k
    *out_ptr1 = ixheaac_sub32(ixheaac_mult32x16in32(tempi1, cos1),
245
89.5k
                               ixheaac_mult32x16in32(tempr1, sin1));
246
89.5k
    *out_ptr1 = ixheaac_shl32(*out_ptr1, neg_expo);
247
89.5k
    out_ptr1--;
248
249
89.5k
    *out_ptr1 = ixheaac_mac32x16in32(ixheaac_mult32x16in32(tempr1, cos1),
250
89.5k
                                      tempi1, sin1);
251
89.5k
    *out_ptr1 = ixheaac_shl32(*out_ptr1, neg_expo);
252
89.5k
    out_ptr1--;
253
254
126k
  } else {
255
126k
    if (npoints4 == 256) {
256
55.4k
      cos = *cos_sin_ptr++;
257
55.4k
      sin = *cos_sin_ptr++;
258
259
70.5k
    } else if (npoints4 == 32) {
260
70.5k
      cos = *cos_sin_ptr++;
261
70.5k
      sin = *cos_sin_ptr;
262
70.5k
      cos_sin_ptr += 15;
263
70.5k
    }
264
126k
    tempr = *spec_data1++;
265
126k
    tempi = *spec_data2--;
266
267
126k
    *out_ptr =
268
126k
        ixheaac_mac32x16in32(ixheaac_mult32x16in32(tempr, cos), tempi, sin);
269
126k
    *out_ptr = ixheaac_shr32(*out_ptr, neg_expo);
270
126k
    out_ptr++;
271
272
126k
    *out_ptr = ixheaac_sub32(ixheaac_mult32x16in32(tempi, cos),
273
126k
                              ixheaac_mult32x16in32(tempr, sin));
274
275
126k
    *out_ptr = ixheaac_shr32(*out_ptr, neg_expo);
276
126k
    out_ptr++;
277
278
16.4M
    for (i = 0; i < npoints4 - 1; i++) {
279
16.3M
      if (npoints4 == 256) {
280
14.1M
        sin = *cos_sin_ptr++;
281
14.1M
        cos = *cos_sin_ptr++;
282
14.1M
      } else if (npoints4 == 32) {
283
2.18M
        sin = *cos_sin_ptr++;
284
2.18M
        cos = *cos_sin_ptr;
285
2.18M
        cos_sin_ptr += 15;
286
2.18M
      }
287
288
16.3M
      tempi1 = *spec_data1++;
289
16.3M
      tempr = *spec_data1++;
290
16.3M
      tempr1 = *spec_data2--;
291
16.3M
      tempi = *spec_data2--;
292
293
16.3M
      *out_ptr1 = ixheaac_sub32(ixheaac_mult32x16in32(tempi1, cos),
294
16.3M
                                 ixheaac_mult32x16in32(tempr1, sin));
295
16.3M
      *out_ptr1 = ixheaac_shr32(*out_ptr1, neg_expo);
296
16.3M
      out_ptr1--;
297
298
16.3M
      *out_ptr1 = ixheaac_mac32x16in32(ixheaac_mult32x16in32(tempr1, cos),
299
16.3M
                                        tempi1, sin);
300
16.3M
      *out_ptr1 = ixheaac_shr32(*out_ptr1, neg_expo);
301
16.3M
      out_ptr1--;
302
303
16.3M
      *out_ptr =
304
16.3M
          ixheaac_mac32x16in32(ixheaac_mult32x16in32(tempr, sin), tempi, cos);
305
16.3M
      *out_ptr = ixheaac_shr32(*out_ptr, neg_expo);
306
16.3M
      out_ptr++;
307
308
16.3M
      *out_ptr = ixheaac_sub32(ixheaac_mult32x16in32(tempi, sin),
309
16.3M
                                ixheaac_mult32x16in32(tempr, cos));
310
16.3M
      *out_ptr = ixheaac_shr32(*out_ptr, neg_expo);
311
16.3M
      out_ptr++;
312
16.3M
    }
313
126k
    cos1 = *cos_sin_ptr++;
314
126k
    sin1 = *cos_sin_ptr;
315
316
126k
    tempr1 = *spec_data2;
317
126k
    tempi1 = *spec_data1;
318
319
126k
    *out_ptr1 = ixheaac_sub32(ixheaac_mult32x16in32(tempi1, cos1),
320
126k
                               ixheaac_mult32x16in32(tempr1, sin1));
321
126k
    *out_ptr1 = ixheaac_shr32(*out_ptr1, neg_expo);
322
126k
    out_ptr1--;
323
324
126k
    *out_ptr1 = ixheaac_mac32x16in32(ixheaac_mult32x16in32(tempr1, cos1),
325
126k
                                      tempi1, sin1);
326
126k
    *out_ptr1 = ixheaac_shr32(*out_ptr1, neg_expo);
327
126k
    out_ptr1--;
328
126k
  }
329
215k
}
330
331
VOID ixheaacd_post_twiddle_dec(WORD32 out_ptr[], WORD32 spec_data[],
332
                               ia_aac_dec_imdct_tables_struct *ptr_imdct_tables,
333
160k
                               WORD npoints) {
334
160k
  WORD i;
335
160k
  WORD16 cos, cos1, sin, sin1;
336
160k
  WORD32 *spec_data1 = spec_data + npoints - 1;
337
160k
  WORD32 *out_ptr1 = out_ptr + npoints - 1;
338
160k
  WORD16 adjust = 50, adjust1 = -50;
339
160k
  const WORD16 *cos_sin_ptr = ptr_imdct_tables->cosine_array_2048_256;
340
341
160k
  if (npoints == 1024) {
342
42.0k
    WORD32 tempr, tempi, outi, outr, temp1, temp2;
343
42.0k
    tempr = *spec_data++;
344
42.0k
    tempi = *spec_data++;
345
346
42.0k
    cos = *cos_sin_ptr;
347
42.0k
    cos_sin_ptr++;
348
42.0k
    sin = *cos_sin_ptr;
349
42.0k
    cos_sin_ptr++;
350
351
42.0k
    outi = ixheaac_sub32(ixheaac_mult32x16in32(tempr, sin),
352
42.0k
                          ixheaac_mult32x16in32(tempi, cos));
353
42.0k
    outr =
354
42.0k
        ixheaac_mac32x16in32(ixheaac_mult32x16in32(tempr, cos), tempi, sin);
355
356
42.0k
    temp1 = ixheaac_mult32x16in32(outi, adjust1);
357
42.0k
    temp2 = ixheaac_mult32x16in32(outr, adjust);
358
359
42.0k
    outr = outr + temp1;
360
42.0k
    outi = outi + temp2;
361
42.0k
    *out_ptr1-- = outi;
362
42.0k
    *out_ptr++ = outr;
363
364
10.7M
    for (i = 0; i < (npoints / 2 - 2); i++) {
365
10.7M
      sin = *cos_sin_ptr++;
366
10.7M
      cos = *cos_sin_ptr++;
367
368
10.7M
      tempi = *spec_data1--;
369
10.7M
      tempr = *spec_data1--;
370
371
10.7M
      outi = ixheaac_sub32(ixheaac_mult32x16in32(tempr, sin),
372
10.7M
                            ixheaac_mult32x16in32(tempi, cos));
373
10.7M
      outr =
374
10.7M
          ixheaac_mac32x16in32(ixheaac_mult32x16in32(tempr, cos), tempi, sin);
375
376
10.7M
      temp1 = ixheaac_mult32x16in32(outi, adjust1);
377
10.7M
      temp2 = ixheaac_mult32x16in32(outr, adjust);
378
379
10.7M
      outr = outr + temp1;
380
10.7M
      outi = outi + temp2;
381
382
10.7M
      *out_ptr++ = outi;
383
10.7M
      *out_ptr1-- = outr;
384
385
10.7M
      i++;
386
10.7M
      tempr = *spec_data++;
387
10.7M
      tempi = *spec_data++;
388
389
10.7M
      outi = ixheaac_sub32(ixheaac_mult32x16in32(tempr, cos),
390
10.7M
                            ixheaac_mult32x16in32(tempi, sin));
391
10.7M
      outr =
392
10.7M
          ixheaac_mac32x16in32(ixheaac_mult32x16in32(tempr, sin), tempi, cos);
393
394
10.7M
      temp1 = ixheaac_mult32x16in32(outi, adjust1);
395
10.7M
      temp2 = ixheaac_mult32x16in32(outr, adjust);
396
397
10.7M
      outr = outr + temp1;
398
10.7M
      outi = outi + temp2;
399
400
10.7M
      *out_ptr1-- = outi;
401
10.7M
      *out_ptr++ = outr;
402
10.7M
    }
403
42.0k
    cos1 = *cos_sin_ptr++;
404
42.0k
    sin1 = *cos_sin_ptr;
405
406
42.0k
    tempi = *spec_data1--;
407
42.0k
    tempr = *spec_data1--;
408
409
42.0k
    outi = ixheaac_sub32(ixheaac_mult32x16in32(tempr, sin1),
410
42.0k
                          ixheaac_mult32x16in32(tempi, cos1));
411
42.0k
    outr =
412
42.0k
        ixheaac_mac32x16in32(ixheaac_mult32x16in32(tempr, cos1), tempi, sin1);
413
414
42.0k
    temp1 = ixheaac_mult32x16in32(outi, adjust1);
415
42.0k
    temp2 = ixheaac_mult32x16in32(outr, adjust);
416
417
42.0k
    outr = outr + temp1;
418
42.0k
    outi = outi + temp2;
419
420
42.0k
    *out_ptr++ = outi;
421
42.0k
    *out_ptr1-- = outr;
422
118k
  } else if (npoints == 128) {
423
118k
    WORD32 tempr, tempi, outi, outr, temp1, temp2;
424
118k
    tempr = *spec_data++;
425
118k
    tempi = *spec_data++;
426
427
118k
    cos = *cos_sin_ptr++;
428
118k
    sin = *cos_sin_ptr;
429
118k
    cos_sin_ptr += 15;
430
431
118k
    outi = ixheaac_sub32(ixheaac_mult32x16in32(tempr, sin),
432
118k
                          ixheaac_mult32x16in32(tempi, cos));
433
118k
    outr =
434
118k
        ixheaac_mac32x16in32(ixheaac_mult32x16in32(tempr, cos), tempi, sin);
435
436
118k
    temp1 = ixheaac_mult32x16in32(outi, -(201 << 1));
437
118k
    temp2 = ixheaac_mult32x16in32(outr, 201 << 1);
438
439
118k
    outr = outr + temp1;
440
118k
    outi = outi + temp2;
441
118k
    *out_ptr1-- = outi;
442
118k
    *out_ptr++ = outr;
443
444
3.79M
    for (i = 0; i < (npoints / 2 - 2); i++) {
445
3.67M
      sin = *cos_sin_ptr++;
446
3.67M
      cos = *cos_sin_ptr;
447
3.67M
      cos_sin_ptr += 15;
448
449
3.67M
      tempi = *spec_data1--;
450
3.67M
      tempr = *spec_data1--;
451
452
3.67M
      outi = ixheaac_sub32(ixheaac_mult32x16in32(tempr, sin),
453
3.67M
                            ixheaac_mult32x16in32(tempi, cos));
454
3.67M
      outr =
455
3.67M
          ixheaac_mac32x16in32(ixheaac_mult32x16in32(tempr, cos), tempi, sin);
456
457
3.67M
      temp1 = ixheaac_mult32x16in32(outi, -(201 << 1));
458
3.67M
      temp2 = ixheaac_mult32x16in32(outr, 201 << 1);
459
460
3.67M
      outr = outr + temp1;
461
3.67M
      outi = outi + temp2;
462
463
3.67M
      *out_ptr++ = outi;
464
3.67M
      *out_ptr1-- = outr;
465
466
3.67M
      i++;
467
3.67M
      tempr = *spec_data++;
468
3.67M
      tempi = *spec_data++;
469
470
3.67M
      outi = ixheaac_sub32(ixheaac_mult32x16in32(tempr, cos),
471
3.67M
                            ixheaac_mult32x16in32(tempi, sin));
472
3.67M
      outr =
473
3.67M
          ixheaac_mac32x16in32(ixheaac_mult32x16in32(tempr, sin), tempi, cos);
474
475
3.67M
      temp1 = ixheaac_mult32x16in32(outi, -(201 << 1));
476
3.67M
      temp2 = ixheaac_mult32x16in32(outr, 201 << 1);
477
478
3.67M
      outr = outr + temp1;
479
3.67M
      outi = outi + temp2;
480
481
3.67M
      *out_ptr1-- = outi;
482
3.67M
      *out_ptr++ = outr;
483
3.67M
    }
484
118k
    cos1 = *cos_sin_ptr++;
485
118k
    sin1 = *cos_sin_ptr;
486
487
118k
    tempi = *spec_data1--;
488
118k
    tempr = *spec_data1--;
489
490
118k
    outi = ixheaac_sub32(ixheaac_mult32x16in32(tempr, sin1),
491
118k
                          ixheaac_mult32x16in32(tempi, cos1));
492
118k
    outr =
493
118k
        ixheaac_mac32x16in32(ixheaac_mult32x16in32(tempr, cos1), tempi, sin1);
494
495
118k
    temp1 = ixheaac_mult32x16in32(outi, -(201 << 1));
496
118k
    temp2 = ixheaac_mult32x16in32(outr, 201 << 1);
497
498
118k
    outr = outr + temp1;
499
118k
    outi = outi + temp2;
500
501
118k
    *out_ptr++ = outi;
502
118k
    *out_ptr1-- = outr;
503
118k
  }
504
160k
}
505
506
VOID ixheaacd_post_twid_overlap_add_dec(
507
    WORD32 pcm_out[], WORD32 spec_data[],
508
    ia_aac_dec_imdct_tables_struct *ptr_imdct_tables, WORD npoints,
509
    WORD32 *ptr_overlap_buf, WORD16 q_shift, const WORD16 *window,
510
54.9k
    WORD16 ch_fac) {
511
54.9k
  WORD i;
512
54.9k
  WORD16 cos, cos1, sin, sin1;
513
54.9k
  WORD32 size = npoints / 2;
514
54.9k
  WORD32 *pcmout1 = pcm_out + (ch_fac * size);
515
54.9k
  const WORD16 *cos_sin_ptr = ptr_imdct_tables->cosine_array_2048_256;
516
517
54.9k
  pcm_out = pcmout1 - ch_fac;
518
54.9k
  spec_data += size;
519
520
54.9k
  if (q_shift > 0) {
521
23.7k
    WORD32 tempr, tempi, outr, outi, win1, accu, temp1, temp2;
522
23.7k
    WORD16 adjust, adjust1;
523
23.7k
    WORD32 overlap_data;
524
525
23.7k
    tempr = *(spec_data - size);
526
23.7k
    tempi = *(spec_data - size + 1);
527
23.7k
    adjust = 50;
528
23.7k
    adjust1 = -50;
529
23.7k
    cos = *cos_sin_ptr++;
530
23.7k
    sin = *cos_sin_ptr++;
531
23.7k
    outi = ixheaac_sub32(ixheaac_mult32x16in32(tempr, sin),
532
23.7k
                          ixheaac_mult32x16in32(tempi, cos));
533
23.7k
    outr = ixheaac_add32(ixheaac_mult32x16in32(tempr, cos),
534
23.7k
                          ixheaac_mult32x16in32(tempi, sin));
535
536
23.7k
    overlap_data = *ptr_overlap_buf;
537
538
23.7k
    temp1 = ixheaac_mult32x16in32(outi, adjust1);
539
23.7k
    temp2 = ixheaac_mult32x16in32(outr, adjust);
540
541
23.7k
    outr = outr + temp1;
542
23.7k
    outi = outi + temp2;
543
544
23.7k
    *ptr_overlap_buf++ = ixheaac_shr32_sat(outr, 16 - q_shift);
545
546
23.7k
    win1 = *((WORD32 *)window + size - 1);
547
23.7k
    accu = ixheaac_sub32_sat(
548
23.7k
        ixheaac_shl32_sat(ixheaacd_mult32x16lin32(outi, win1), q_shift),
549
23.7k
        ixheaacd_mult32x16lin32_sat(overlap_data, (WORD16)(win1 >> 16)));
550
551
23.7k
    *pcm_out = accu;
552
553
23.7k
    pcm_out -= ch_fac;
554
23.7k
    accu = ixheaac_sub32_sat(
555
23.7k
        ixheaac_shl32_sat(
556
23.7k
            ixheaac_mult32x16hin32(ixheaac_negate32_sat(outi), win1),
557
23.7k
            q_shift),
558
23.7k
        ixheaacd_mult32x16lin32_sat(overlap_data, (WORD16)(win1)));
559
560
23.7k
    *pcmout1 = accu;
561
562
23.7k
    pcmout1 += ch_fac;
563
564
6.06M
    for (i = size - 2; i != 0;) {
565
6.04M
      sin = *cos_sin_ptr++;
566
6.04M
      cos = *cos_sin_ptr++;
567
568
6.04M
      tempr = *(spec_data + i);
569
6.04M
      tempi = *(spec_data + i + 1);
570
571
6.04M
      outr = ixheaac_add32(ixheaac_mult32x16in32(tempr, cos),
572
6.04M
                            ixheaac_mult32x16in32(tempi, sin));
573
6.04M
      outi = ixheaac_sub32(ixheaac_mult32x16in32(tempr, sin),
574
6.04M
                            ixheaac_mult32x16in32(tempi, cos));
575
576
6.04M
      temp1 = ixheaac_mult32x16in32(outi, adjust1);
577
6.04M
      temp2 = ixheaac_mult32x16in32(outr, adjust);
578
579
6.04M
      outr = outr + temp1;
580
6.04M
      outi = outi + temp2;
581
582
6.04M
      overlap_data = *ptr_overlap_buf;
583
584
6.04M
      *ptr_overlap_buf++ = ixheaac_shr32_sat(outi, 16 - q_shift);
585
586
6.04M
      win1 = *((WORD32 *)window + i);
587
6.04M
      accu = ixheaac_sub32_sat(
588
6.04M
          ixheaac_shl32_sat(ixheaacd_mult32x16lin32(outr, win1), q_shift),
589
6.04M
          ixheaacd_mult32x16lin32_sat(overlap_data, (WORD16)(win1 >> 16)));
590
591
6.04M
      *pcm_out = accu;
592
6.04M
      pcm_out -= ch_fac;
593
6.04M
      accu = ixheaac_sub32_sat(
594
6.04M
          ixheaac_shl32_sat(
595
6.04M
              ixheaac_mult32x16hin32(ixheaac_negate32_sat(outr), win1),
596
6.04M
              q_shift),
597
6.04M
          ixheaacd_mult32x16lin32_sat(overlap_data, (WORD16)win1));
598
599
6.04M
      *pcmout1 = accu;
600
6.04M
      pcmout1 += ch_fac;
601
602
6.04M
      tempr = *(spec_data - i);
603
6.04M
      tempi = *(spec_data - i + 1);
604
605
6.04M
      i -= 2;
606
607
6.04M
      outi = ixheaac_sub32(ixheaac_mult32x16in32(tempr, cos),
608
6.04M
                            ixheaac_mult32x16in32(tempi, sin));
609
6.04M
      outr = ixheaac_add32(ixheaac_mult32x16in32(tempr, sin),
610
6.04M
                            ixheaac_mult32x16in32(tempi, cos));
611
612
6.04M
      overlap_data = *ptr_overlap_buf;
613
614
6.04M
      temp1 = ixheaac_mult32x16in32(outi, adjust1);
615
616
6.04M
      temp2 = ixheaac_mult32x16in32(outr, adjust);
617
618
6.04M
      outr = outr + temp1;
619
6.04M
      outi = outi + temp2;
620
621
6.04M
      *ptr_overlap_buf++ = ixheaac_shr32_sat(outr, 16 - q_shift);
622
623
6.04M
      win1 = *((WORD32 *)window + i + 1);
624
6.04M
      accu = ixheaac_sub32_sat(
625
6.04M
          ixheaac_shl32_sat(ixheaacd_mult32x16lin32(outi, win1), q_shift),
626
6.04M
          ixheaacd_mult32x16lin32_sat(overlap_data, (WORD16)(win1 >> 16)));
627
628
6.04M
      *pcm_out = accu;
629
6.04M
      pcm_out -= ch_fac;
630
6.04M
      accu = ixheaac_sub32_sat(
631
6.04M
          ixheaac_shl32_sat(
632
6.04M
              ixheaac_mult32x16hin32(ixheaac_negate32_sat(outi), win1),
633
6.04M
              q_shift),
634
6.04M
          ixheaacd_mult32x16lin32_sat(overlap_data, (WORD16)(win1)));
635
6.04M
      *pcmout1 = accu;
636
6.04M
      pcmout1 += ch_fac;
637
6.04M
    }
638
23.7k
    cos1 = *cos_sin_ptr++;
639
23.7k
    sin1 = *cos_sin_ptr;
640
641
23.7k
    tempr = *(spec_data + i);
642
23.7k
    tempi = *(spec_data + i + 1);
643
644
23.7k
    outr = ixheaac_add32(ixheaac_mult32x16in32(tempr, cos1),
645
23.7k
                          ixheaac_mult32x16in32(tempi, sin1));
646
23.7k
    outi = ixheaac_sub32(ixheaac_mult32x16in32(tempr, sin1),
647
23.7k
                          ixheaac_mult32x16in32(tempi, cos1));
648
649
23.7k
    temp1 = ixheaac_mult32x16in32(outi, adjust1);
650
651
23.7k
    temp2 = ixheaac_mult32x16in32(outr, adjust);
652
653
23.7k
    outr = outr + temp1;
654
23.7k
    outi = outi + temp2;
655
656
23.7k
    overlap_data = *ptr_overlap_buf;
657
658
23.7k
    *ptr_overlap_buf++ = ixheaac_shr32_sat(outi, 16 - q_shift);
659
23.7k
    win1 = *((WORD32 *)window + i);
660
23.7k
    accu = ixheaac_sub32_sat(
661
23.7k
        ixheaac_shl32_sat(ixheaacd_mult32x16lin32(outr, win1), q_shift),
662
23.7k
        ixheaacd_mult32x16lin32_sat(overlap_data, (WORD16)(win1 >> 16)));
663
664
23.7k
    *pcm_out = accu;
665
23.7k
    pcm_out -= ch_fac;
666
23.7k
    accu = ixheaac_sub32_sat(
667
23.7k
        ixheaac_shl32_sat(
668
23.7k
            ixheaac_mult32x16hin32(ixheaac_negate32_sat(outr), win1),
669
23.7k
            q_shift),
670
23.7k
        ixheaacd_mult32x16lin32_sat(overlap_data, (WORD16)win1));
671
672
23.7k
    *pcmout1 = accu;
673
23.7k
    pcmout1 += ch_fac;
674
31.2k
  } else {
675
31.2k
    q_shift = -q_shift;
676
31.2k
    {
677
31.2k
      WORD32 tempr, tempi, temp1, temp2, outr, outi, win1, accu;
678
31.2k
      WORD16 adjust, adjust1;
679
31.2k
      WORD16 overlap_data;
680
31.2k
      tempr = *(spec_data - size);
681
31.2k
      tempi = *(spec_data - size + 1);
682
683
31.2k
      adjust = 50;
684
31.2k
      adjust1 = -50;
685
31.2k
      cos = *cos_sin_ptr++;
686
31.2k
      sin = *cos_sin_ptr++;
687
688
31.2k
      outi = ixheaac_sub32(ixheaac_mult32x16in32(tempr, sin),
689
31.2k
                            ixheaac_mult32x16in32(tempi, cos));
690
31.2k
      outr = ixheaac_add32(ixheaac_mult32x16in32(tempr, cos),
691
31.2k
                            ixheaac_mult32x16in32(tempi, sin));
692
693
31.2k
      overlap_data = *ptr_overlap_buf;
694
695
31.2k
      temp1 = ixheaac_mult32x16in32(outi, adjust1);
696
31.2k
      temp2 = ixheaac_mult32x16in32(outr, adjust);
697
698
31.2k
      outr = outr + temp1;
699
31.2k
      outi = outi + temp2;
700
701
31.2k
      *ptr_overlap_buf++ = ixheaac_shr32_sat(outr, 16 + q_shift);
702
703
31.2k
      win1 = *((WORD32 *)window + size - 1);
704
31.2k
      accu = ixheaac_sub32_sat(
705
31.2k
          ixheaac_shr32(ixheaacd_mult32x16lin32(outi, win1), q_shift),
706
31.2k
          ixheaacd_mult32x16lin32_sat(overlap_data, (WORD16)(win1 >> 16)));
707
708
31.2k
      *pcm_out = accu;
709
710
31.2k
      pcm_out -= ch_fac;
711
31.2k
      accu = ixheaac_sub32_sat(
712
31.2k
          ixheaac_shr32(
713
31.2k
              ixheaac_mult32x16hin32(ixheaac_negate32_sat(outi), win1),
714
31.2k
              q_shift),
715
31.2k
          ixheaacd_mult32x16lin32_sat(overlap_data, (WORD16)(win1)));
716
717
31.2k
      *pcmout1 = accu;
718
31.2k
      pcmout1 += ch_fac;
719
720
7.99M
      for (i = size - 2; i != 0;) {
721
7.96M
        sin = *cos_sin_ptr++;
722
7.96M
        cos = *cos_sin_ptr++;
723
724
7.96M
        tempr = *(spec_data + i);
725
7.96M
        tempi = *(spec_data + i + 1);
726
727
7.96M
        outr = ixheaac_add32(ixheaac_mult32x16in32(tempr, cos),
728
7.96M
                              ixheaac_mult32x16in32(tempi, sin));
729
7.96M
        outi = ixheaac_sub32(ixheaac_mult32x16in32(tempr, sin),
730
7.96M
                              ixheaac_mult32x16in32(tempi, cos));
731
732
7.96M
        overlap_data = *ptr_overlap_buf;
733
734
7.96M
        temp1 = ixheaac_mult32x16in32(outi, adjust1);
735
736
7.96M
        temp2 = ixheaac_mult32x16in32(outr, adjust);
737
7.96M
        outr = outr + temp1;
738
7.96M
        outi = outi + temp2;
739
7.96M
        *ptr_overlap_buf++ = ixheaac_shr32_sat(outi, 16 + q_shift);
740
741
7.96M
        win1 = *((WORD32 *)window + i);
742
7.96M
        accu = ixheaac_sub32_sat(
743
7.96M
            ixheaac_shr32(ixheaacd_mult32x16lin32(outr, win1), q_shift),
744
7.96M
            ixheaacd_mult32x16lin32_sat(overlap_data, (WORD16)(win1 >> 16)));
745
746
7.96M
        *pcm_out = accu;
747
7.96M
        pcm_out -= ch_fac;
748
749
7.96M
        accu = ixheaac_sub32_sat(
750
7.96M
            ixheaac_shr32(
751
7.96M
                ixheaac_mult32x16hin32(ixheaac_negate32_sat(outr), win1),
752
7.96M
                q_shift),
753
7.96M
            ixheaacd_mult32x16lin32_sat(overlap_data, (WORD16)win1));
754
755
7.96M
        *pcmout1 = accu;
756
7.96M
        pcmout1 += ch_fac;
757
758
7.96M
        tempr = *(spec_data - i);
759
7.96M
        tempi = *(spec_data - i + 1);
760
7.96M
        i -= 2;
761
762
7.96M
        outi = ixheaac_sub32(ixheaac_mult32x16in32(tempr, cos),
763
7.96M
                              ixheaac_mult32x16in32(tempi, sin));
764
7.96M
        outr = ixheaac_add32(ixheaac_mult32x16in32(tempr, sin),
765
7.96M
                              ixheaac_mult32x16in32(tempi, cos));
766
767
7.96M
        overlap_data = *ptr_overlap_buf;
768
769
7.96M
        temp1 = ixheaac_mult32x16in32(outi, adjust1);
770
7.96M
        temp2 = ixheaac_mult32x16in32(outr, adjust);
771
772
7.96M
        outr = outr + temp1;
773
7.96M
        outi = outi + temp2;
774
775
7.96M
        *ptr_overlap_buf++ = ixheaac_shr32_sat(outr, 16 + q_shift);
776
777
7.96M
        win1 = *((WORD32 *)window + i + 1);
778
7.96M
        accu = ixheaac_sub32_sat(
779
7.96M
            ixheaac_shr32(ixheaacd_mult32x16lin32(outi, win1), q_shift),
780
7.96M
            ixheaacd_mult32x16lin32_sat(overlap_data, (WORD16)(win1 >> 16)));
781
782
7.96M
        *pcm_out = accu;
783
7.96M
        pcm_out -= ch_fac;
784
785
7.96M
        accu = ixheaac_sub32_sat(
786
7.96M
            ixheaac_shr32(
787
7.96M
                ixheaac_mult32x16hin32(ixheaac_negate32_sat(outi), win1),
788
7.96M
                q_shift),
789
7.96M
            ixheaacd_mult32x16lin32_sat(overlap_data, (WORD16)(win1)));
790
791
7.96M
        *pcmout1 = accu;
792
7.96M
        pcmout1 += ch_fac;
793
7.96M
      }
794
31.2k
      cos1 = *cos_sin_ptr++;
795
31.2k
      sin1 = *cos_sin_ptr++;
796
797
31.2k
      tempr = *(spec_data + i);
798
31.2k
      tempi = *(spec_data + i + 1);
799
800
31.2k
      outr = ixheaac_add32(ixheaac_mult32x16in32(tempr, cos1),
801
31.2k
                            ixheaac_mult32x16in32(tempi, sin1));
802
31.2k
      outi = ixheaac_sub32(ixheaac_mult32x16in32(tempr, sin1),
803
31.2k
                            ixheaac_mult32x16in32(tempi, cos1));
804
805
31.2k
      overlap_data = *ptr_overlap_buf;
806
807
31.2k
      temp1 = ixheaac_mult32x16in32(outi, adjust1);
808
809
31.2k
      temp2 = ixheaac_mult32x16in32(outr, adjust);
810
811
31.2k
      outr = outr + temp1;
812
31.2k
      outi = outi + temp2;
813
814
31.2k
      *ptr_overlap_buf++ = ixheaac_shr32_sat(outi, 16 + q_shift);
815
816
31.2k
      win1 = *((WORD32 *)window + i);
817
31.2k
      accu = ixheaac_sub32_sat(
818
31.2k
          ixheaac_shr32(ixheaacd_mult32x16lin32(outr, win1), q_shift),
819
31.2k
          ixheaacd_mult32x16lin32_sat(overlap_data, (WORD16)(win1 >> 16)));
820
821
31.2k
      *pcm_out = accu;
822
31.2k
      pcm_out -= ch_fac;
823
31.2k
      accu = ixheaac_sub32_sat(
824
31.2k
          ixheaac_shr32(
825
31.2k
              ixheaac_mult32x16hin32(ixheaac_negate32_sat(outr), win1),
826
31.2k
              q_shift),
827
31.2k
          ixheaacd_mult32x16lin32_sat(overlap_data, (WORD16)win1));
828
31.2k
      *pcmout1 = accu;
829
31.2k
      pcmout1 += ch_fac;
830
31.2k
    }
831
31.2k
  }
832
54.9k
}
833
834
VOID ixheaacd_imdct_using_fft_dec(
835
    ia_aac_dec_imdct_tables_struct *ptr_imdct_tables, WORD32 npoints,
836
    WORD32 *ptr_x, WORD32 *ptr_y)
837
838
215k
{
839
215k
  WORD32 i, j, k, k1, n_stages;
840
215k
  WORD32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i, x4r, x4i, x5r, x5i, x6r, x6i,
841
215k
      x7r, x7i;
842
215k
  WORD32 del, nodespacing, in_loop_cnt, tmp, twiddle_val, *ptr_tmp;
843
215k
  const WORD32 *ptr_twiddle;
844
215k
  WORD8 *ptr_dig_rev_table;
845
215k
  n_stages = ixheaac_norm32(npoints);
846
847
215k
  n_stages = (30 - n_stages) / 3;
848
849
215k
  ptr_tmp = ptr_y;
850
851
215k
  ptr_twiddle = ptr_imdct_tables->fft_twiddle;
852
215k
  ptr_dig_rev_table = ((npoints << 1) == 1024)
853
215k
                          ? ptr_imdct_tables->dig_rev_table8_long
854
215k
                          : ptr_imdct_tables->dig_rev_table8_short;
855
856
7.37M
  for (i = npoints; i != 0; i -= 8) {
857
7.15M
    WORD32 *data = ptr_x;
858
7.15M
    data = data + (*ptr_dig_rev_table++ << 1);
859
860
7.15M
    x0r = *data;
861
7.15M
    x0i = *(data + 1);
862
7.15M
    data += (npoints >> 1);
863
864
7.15M
    x2r = *data;
865
7.15M
    x2i = *(data + 1);
866
7.15M
    data += (npoints >> 1);
867
868
7.15M
    x4r = *data;
869
7.15M
    x4i = *(data + 1);
870
7.15M
    data += (npoints >> 1);
871
872
7.15M
    x6r = *data;
873
7.15M
    x6i = *(data + 1);
874
7.15M
    data -= 5 * (npoints >> 2);
875
876
7.15M
    x0r = x0r + x4r;
877
7.15M
    x0i = x0i + x4i;
878
7.15M
    x4r = x0r - (x4r << 1);
879
7.15M
    x4i = x0i - (x4i << 1);
880
881
7.15M
    x2r = x2r + x6r;
882
7.15M
    x2i = x2i + x6i;
883
7.15M
    x6r = x2r - (x6r << 1);
884
7.15M
    x6i = x2i - (x6i << 1);
885
886
7.15M
    x0r = x0r + x2r;
887
7.15M
    x0i = x0i + x2i;
888
7.15M
    x2r = x0r - (x2r << 1);
889
7.15M
    x2i = x0i - (x2i << 1);
890
891
7.15M
    x4r = x4r + x6i;
892
7.15M
    x4i = x4i - x6r;
893
7.15M
    tmp = x6r;
894
7.15M
    x6r = x4r - (x6i << 1);
895
7.15M
    x6i = x4i + (tmp << 1);
896
897
7.15M
    x1r = *data;
898
7.15M
    x1i = *(data + 1);
899
7.15M
    data += (npoints >> 1);
900
901
7.15M
    x3r = *data;
902
7.15M
    x3i = *(data + 1);
903
7.15M
    data += (npoints >> 1);
904
905
7.15M
    x5r = *data;
906
7.15M
    x5i = *(data + 1);
907
7.15M
    data += (npoints >> 1);
908
909
7.15M
    x7r = *data;
910
7.15M
    x7i = *(data + 1);
911
7.15M
    data -= 7 * (npoints >> 2);
912
913
7.15M
    x1r = x1r + x5r;
914
7.15M
    x1i = x1i + x5i;
915
7.15M
    x5r = x1r - (x5r << 1);
916
7.15M
    x5i = x1i - (x5i << 1);
917
918
7.15M
    x3r = x3r + x7r;
919
7.15M
    x3i = x3i + x7i;
920
7.15M
    x7r = x3r - (x7r << 1);
921
7.15M
    x7i = x3i - (x7i << 1);
922
923
7.15M
    x1r = x1r + x3r;
924
7.15M
    x1i = x1i + x3i;
925
7.15M
    x3r = x1r - (x3r << 1);
926
7.15M
    x3i = x1i - (x3i << 1);
927
928
7.15M
    x5r = x5r + x5i;
929
7.15M
    x5i = x5r - (x5i << 1);
930
931
7.15M
    x7r = x7r + x7i;
932
7.15M
    x7i = x7r - (x7i << 1);
933
934
7.15M
    x7i = x5r - x7i;
935
7.15M
    x5r = x7i - (x5r << 1);
936
937
7.15M
    x5i = x7r - x5i;
938
7.15M
    x7r = x5i - (x7r << 1);
939
940
7.15M
    x7i = x7i << 1;
941
7.15M
    x5r = x5r << 1;
942
7.15M
    x5i = x5i << 1;
943
7.15M
    x7r = x7r << 1;
944
945
7.15M
    x0r = x0r + x1r;
946
7.15M
    x0i = x0i + x1i;
947
7.15M
    x1r = x0r - (x1r << 1);
948
7.15M
    x1i = x0i - (x1i << 1);
949
950
7.15M
    x2r = x2r + x3i;
951
7.15M
    tmp = x2r - (x3i << 1);
952
7.15M
    x2i = x2i - x3r;
953
7.15M
    x3i = x2i + (x3r << 1);
954
955
7.15M
    *ptr_tmp = x0r;
956
7.15M
    *(ptr_tmp + 1) = x0i;
957
7.15M
    ptr_tmp += 4;
958
959
7.15M
    *ptr_tmp = x2r;
960
7.15M
    *(ptr_tmp + 1) = x2i;
961
7.15M
    ptr_tmp += 4;
962
963
7.15M
    *ptr_tmp = x1r;
964
7.15M
    *(ptr_tmp + 1) = x1i;
965
7.15M
    ptr_tmp += 4;
966
967
7.15M
    *ptr_tmp = tmp;
968
7.15M
    *(ptr_tmp + 1) = x3i;
969
7.15M
    ptr_tmp -= 10;
970
971
7.15M
    tmp = 0x5A82;
972
973
7.15M
    x7i = x4r + (ixheaacd_mult32x16lin32(x7i, tmp));
974
7.15M
    x4r = x7i - (x4r << 1);
975
976
7.15M
    x7r = x4i + (ixheaacd_mult32x16lin32(x7r, tmp));
977
7.15M
    x4i = x7r - (x4i << 1);
978
979
7.15M
    x5i = x6r + (ixheaacd_mult32x16lin32(x5i, tmp));
980
7.15M
    x6r = x5i - (x6r << 1);
981
982
7.15M
    x5r = x6i + (ixheaacd_mult32x16lin32(x5r, tmp));
983
7.15M
    x6i = x5r - (x6i << 1);
984
985
7.15M
    *ptr_tmp = x7i;
986
7.15M
    *(ptr_tmp + 1) = x7r;
987
7.15M
    ptr_tmp += 4;
988
989
7.15M
    *ptr_tmp = x5i;
990
7.15M
    *(ptr_tmp + 1) = x5r;
991
7.15M
    ptr_tmp += 4;
992
993
7.15M
    *ptr_tmp = -x4r;
994
7.15M
    *(ptr_tmp + 1) = -x4i;
995
7.15M
    ptr_tmp += 4;
996
997
7.15M
    *ptr_tmp = -x6r;
998
7.15M
    *(ptr_tmp + 1) = -x6i;
999
7.15M
    ptr_tmp += 2;
1000
7.15M
  }
1001
1002
215k
  del = 8;
1003
1004
215k
  nodespacing = 64;
1005
215k
  in_loop_cnt = npoints >> 6;
1006
1007
312k
  for (k1 = n_stages - 2; k1 > 0; k1--) {
1008
97.0k
    WORD32 *data = ptr_y;
1009
97.0k
    const WORD32 *twiddles;
1010
1011
873k
    for (i = 0; i != npoints; i += 8 * del) {
1012
776k
      data = ptr_y + (i << 1);
1013
776k
      x0r = *data;
1014
776k
      x0i = *(data + 1);
1015
776k
      data += (del << 2);
1016
1017
776k
      x2r = *data;
1018
776k
      x2i = *(data + 1);
1019
776k
      data += (del << 2);
1020
1021
776k
      x4r = *data;
1022
776k
      x4i = *(data + 1);
1023
776k
      data += (del << 2);
1024
1025
776k
      x6r = *data;
1026
776k
      x6i = *(data + 1);
1027
776k
      data -= 5 * (del << 1);
1028
1029
776k
      x0r = x0r + x4r;
1030
776k
      x0i = x0i + x4i;
1031
776k
      x4r = x0r - (x4r << 1);
1032
776k
      x4i = x0i - (x4i << 1);
1033
1034
776k
      x2r = x2r + x6r;
1035
776k
      x2i = x2i + x6i;
1036
776k
      x6r = x2r - (x6r << 1);
1037
776k
      x6i = x2i - (x6i << 1);
1038
1039
776k
      x0r = x0r + x2r;
1040
776k
      x0i = x0i + x2i;
1041
776k
      x2r = x0r - (x2r << 1);
1042
776k
      x2i = x0i - (x2i << 1);
1043
1044
776k
      x4r = x4r + x6i;
1045
776k
      x4i = x4i - x6r;
1046
776k
      tmp = x6r;
1047
776k
      x6r = x4r - (x6i << 1);
1048
776k
      x6i = x4i + (tmp << 1);
1049
1050
776k
      x1r = *data;
1051
776k
      x1i = *(data + 1);
1052
776k
      data += (del << 2);
1053
1054
776k
      x3r = *data;
1055
776k
      x3i = *(data + 1);
1056
776k
      data += (del << 2);
1057
1058
776k
      x5r = *data;
1059
776k
      x5i = *(data + 1);
1060
776k
      data += (del << 2);
1061
1062
776k
      x7r = *data;
1063
776k
      x7i = *(data + 1);
1064
776k
      data -= 7 * (del << 1);
1065
1066
776k
      x1r = x1r + x5r;
1067
776k
      x1i = x1i + x5i;
1068
776k
      x5r = x1r - (x5r << 1);
1069
776k
      x5i = x1i - (x5i << 1);
1070
1071
776k
      x3r = x3r + x7r;
1072
776k
      x3i = x3i + x7i;
1073
776k
      x7r = x3r - (x7r << 1);
1074
776k
      x7i = x3i - (x7i << 1);
1075
1076
776k
      x1r = x1r + x3r;
1077
776k
      x1i = x1i + x3i;
1078
776k
      x3r = x1r - (x3r << 1);
1079
776k
      x3i = x1i - (x3i << 1);
1080
1081
776k
      x5r = x5r + x5i;
1082
776k
      x5i = x5r - (x5i << 1);
1083
1084
776k
      x7r = x7r + x7i;
1085
776k
      x7i = x7r - (x7i << 1);
1086
1087
776k
      x7i = x5r - x7i;
1088
776k
      x5r = x7i - (x5r << 1);
1089
1090
776k
      x5i = x7r - x5i;
1091
776k
      x7r = x5i - (x7r << 1);
1092
1093
776k
      x7i = x7i << 1;
1094
776k
      x5r = x5r << 1;
1095
776k
      x5i = x5i << 1;
1096
776k
      x7r = x7r << 1;
1097
1098
776k
      x0r = x0r + x1r;
1099
776k
      x0i = x0i + x1i;
1100
776k
      x1r = x0r - (x1r << 1);
1101
776k
      x1i = x0i - (x1i << 1);
1102
1103
776k
      x2r = x2r + x3i;
1104
776k
      tmp = x2r - (x3i << 1);
1105
776k
      x2i = x2i - x3r;
1106
776k
      x3i = x2i + (x3r << 1);
1107
1108
776k
      *data = x0r;
1109
776k
      *(data + 1) = x0i;
1110
776k
      data += (del << 2);
1111
1112
776k
      *data = x2r;
1113
776k
      *(data + 1) = x2i;
1114
776k
      data += (del << 2);
1115
1116
776k
      *data = x1r;
1117
776k
      *(data + 1) = x1i;
1118
776k
      data += (del << 2);
1119
1120
776k
      *data = tmp;
1121
776k
      *(data + 1) = x3i;
1122
776k
      data -= 5 * (del << 1);
1123
1124
776k
      tmp = 0x5A82;
1125
1126
776k
      x7i = x4r + (ixheaacd_mult32x16lin32(x7i, tmp));
1127
776k
      x4r = x7i - (x4r << 1);
1128
776k
      x7r = x4i + (ixheaacd_mult32x16lin32(x7r, tmp));
1129
776k
      x4i = x7r - (x4i << 1);
1130
1131
776k
      x5i = x6r + (ixheaacd_mult32x16lin32(x5i, tmp));
1132
776k
      x6r = x5i - (x6r << 1);
1133
776k
      x5r = x6i + (ixheaacd_mult32x16lin32(x5r, tmp));
1134
776k
      x6i = x5r - (x6i << 1);
1135
1136
776k
      *data = x7i;
1137
776k
      *(data + 1) = x7r;
1138
776k
      data += (del << 2);
1139
1140
776k
      *data = x5i;
1141
776k
      *(data + 1) = x5r;
1142
776k
      data += (del << 2);
1143
1144
776k
      *data = -x4r;
1145
776k
      *(data + 1) = -x4i;
1146
776k
      data += (del << 2);
1147
1148
776k
      *data = -x6r;
1149
776k
      *(data + 1) = -x6i;
1150
1151
776k
      data -= 7 * (del << 1);
1152
776k
    }
1153
1154
97.0k
    twiddles = ptr_twiddle;
1155
97.0k
    data = ptr_y;
1156
1157
776k
    for (j = nodespacing; j < nodespacing * del; j += nodespacing) {
1158
679k
      data = data + 2;
1159
1160
6.11M
      for (k = in_loop_cnt; k != 0; k--) {
1161
5.43M
        data += (del << 2);
1162
5.43M
        x2r = *data;
1163
5.43M
        x2i = *(data + 1);
1164
1165
5.43M
        data += (del << 2);
1166
5.43M
        x4r = *data;
1167
5.43M
        x4i = *(data + 1);
1168
1169
5.43M
        data += (del << 2);
1170
5.43M
        x6r = *data;
1171
5.43M
        x6i = *(data + 1);
1172
1173
5.43M
        data -= 6 * (del << 1);
1174
1175
5.43M
        twiddles += (j >> 2);
1176
1177
5.43M
        twiddle_val = *(twiddles);
1178
1179
5.43M
        tmp = (ixheaacd_mult32x16lin32(x2r, twiddle_val) -
1180
5.43M
               ixheaac_mult32x16hin32(x2i, twiddle_val));
1181
5.43M
        x2i = (ixheaacd_mac32x16lin32(
1182
5.43M
                  ixheaac_mult32x16hin32(x2r, twiddle_val), x2i,
1183
5.43M
                  twiddle_val))
1184
5.43M
              << 1;
1185
5.43M
        x2r = tmp << 1;
1186
1187
5.43M
        twiddles += (j >> 2);
1188
5.43M
        twiddle_val = *(twiddles);
1189
1190
5.43M
        tmp = (ixheaacd_mult32x16lin32(x4r, twiddle_val) -
1191
5.43M
               ixheaac_mult32x16hin32(x4i, twiddle_val));
1192
5.43M
        x4i = (ixheaacd_mac32x16lin32(
1193
5.43M
                  ixheaac_mult32x16hin32(x4r, twiddle_val), x4i,
1194
5.43M
                  twiddle_val))
1195
5.43M
              << 1;
1196
5.43M
        x4r = tmp << 1;
1197
1198
5.43M
        twiddles += (j >> 2);
1199
5.43M
        twiddle_val = *(twiddles);
1200
1201
5.43M
        tmp = (ixheaacd_mult32x16lin32(x6r, twiddle_val) -
1202
5.43M
               ixheaac_mult32x16hin32(x6i, twiddle_val));
1203
5.43M
        x6i = (ixheaacd_mac32x16lin32(
1204
5.43M
                  ixheaac_mult32x16hin32(x6r, twiddle_val), x6i,
1205
5.43M
                  twiddle_val))
1206
5.43M
              << 1;
1207
5.43M
        x6r = tmp << 1;
1208
1209
5.43M
        x0r = *data;
1210
5.43M
        x0i = *(data + 1);
1211
5.43M
        data += (del << 1);
1212
1213
5.43M
        x0r = x0r + x4r;
1214
5.43M
        x0i = x0i + x4i;
1215
5.43M
        x4r = x0r - (x4r << 1);
1216
5.43M
        x4i = x0i - (x4i << 1);
1217
1218
5.43M
        x2r = x2r + x6r;
1219
5.43M
        x2i = x2i + x6i;
1220
5.43M
        x6r = x2r - (x6r << 1);
1221
5.43M
        x6i = x2i - (x6i << 1);
1222
1223
5.43M
        x0r = x0r + x2r;
1224
5.43M
        x0i = x0i + x2i;
1225
5.43M
        x2r = x0r - (x2r << 1);
1226
5.43M
        x2i = x0i - (x2i << 1);
1227
1228
5.43M
        x4r = x4r + x6i;
1229
5.43M
        x4i = x4i - x6r;
1230
5.43M
        tmp = x6r;
1231
5.43M
        x6r = x4r - (x6i << 1);
1232
5.43M
        x6i = x4i + (tmp << 1);
1233
1234
5.43M
        x1r = *data;
1235
5.43M
        x1i = *(data + 1);
1236
5.43M
        data += (del << 2);
1237
1238
5.43M
        twiddles -= 5 * (j >> 3);
1239
5.43M
        twiddle_val = *(twiddles);
1240
1241
5.43M
        tmp = (ixheaacd_mult32x16lin32(x1r, twiddle_val) -
1242
5.43M
               ixheaac_mult32x16hin32(x1i, twiddle_val));
1243
5.43M
        x1i = (ixheaacd_mac32x16lin32(
1244
5.43M
                  ixheaac_mult32x16hin32(x1r, twiddle_val), x1i,
1245
5.43M
                  twiddle_val))
1246
5.43M
              << 1;
1247
5.43M
        x1r = tmp << 1;
1248
1249
5.43M
        x3r = *data;
1250
5.43M
        x3i = *(data + 1);
1251
5.43M
        data += (del << 2);
1252
1253
5.43M
        twiddles += (j >> 2);
1254
5.43M
        twiddle_val = *(twiddles);
1255
1256
5.43M
        tmp = (ixheaacd_mult32x16lin32(x3r, twiddle_val) -
1257
5.43M
               ixheaac_mult32x16hin32(x3i, twiddle_val));
1258
5.43M
        x3i = (ixheaacd_mac32x16lin32(
1259
5.43M
            ixheaac_mult32x16hin32(x3r, twiddle_val), x3i, twiddle_val));
1260
5.43M
        x3r = tmp;
1261
1262
5.43M
        x5r = *data;
1263
5.43M
        x5i = *(data + 1);
1264
5.43M
        data += (del << 2);
1265
1266
5.43M
        twiddles += (j >> 2);
1267
5.43M
        twiddle_val = *(twiddles);
1268
1269
5.43M
        tmp = (ixheaacd_mult32x16lin32(x5r, twiddle_val) -
1270
5.43M
               ixheaac_mult32x16hin32(x5i, twiddle_val));
1271
5.43M
        x5i = (ixheaacd_mac32x16lin32(
1272
5.43M
            ixheaac_mult32x16hin32(x5r, twiddle_val), x5i, twiddle_val));
1273
5.43M
        x5r = tmp;
1274
1275
5.43M
        x7r = *data;
1276
5.43M
        x7i = *(data + 1);
1277
5.43M
        data -= 7 * (del << 1);
1278
1279
5.43M
        twiddles += (j >> 2);
1280
5.43M
        twiddle_val = *(twiddles);
1281
5.43M
        twiddles -= 7 * (j >> 3);
1282
1283
5.43M
        tmp = (ixheaacd_mult32x16lin32(x7r, twiddle_val) -
1284
5.43M
               ixheaac_mult32x16hin32(x7i, twiddle_val));
1285
5.43M
        x7i = (ixheaacd_mac32x16lin32(
1286
5.43M
            ixheaac_mult32x16hin32(x7r, twiddle_val), x7i, twiddle_val));
1287
5.43M
        x7r = tmp;
1288
1289
5.43M
        x1r = x1r + (x5r << 1);
1290
5.43M
        x1i = x1i + (x5i << 1);
1291
5.43M
        x5r = x1r - (x5r << 2);
1292
5.43M
        x5i = x1i - (x5i << 2);
1293
1294
5.43M
        x3r = x3r + x7r;
1295
5.43M
        x3i = x3i + x7i;
1296
5.43M
        x7r = x3r - (x7r << 1);
1297
5.43M
        x7i = x3i - (x7i << 1);
1298
1299
5.43M
        x1r = x1r + (x3r << 1);
1300
5.43M
        x1i = x1i + (x3i << 1);
1301
5.43M
        x3r = x1r - (x3r << 2);
1302
5.43M
        x3i = x1i - (x3i << 2);
1303
1304
5.43M
        x5r = x5r + x5i;
1305
5.43M
        x5i = x5r - (x5i << 1);
1306
1307
5.43M
        x7r = x7r + x7i;
1308
5.43M
        x7i = x7r - (x7i << 1);
1309
1310
5.43M
        x7i = x5r - (x7i << 1);
1311
5.43M
        x5r = x7i - (x5r << 1);
1312
1313
5.43M
        x5i = (x7r << 1) - x5i;
1314
5.43M
        x7r = x5i - (x7r << 2);
1315
1316
5.43M
        x7i = x7i << 1;
1317
5.43M
        x5r = x5r << 1;
1318
5.43M
        x5i = x5i << 1;
1319
5.43M
        x7r = x7r << 1;
1320
1321
5.43M
        x0r = x0r + x1r;
1322
5.43M
        x0i = x0i + x1i;
1323
5.43M
        x1r = x0r - (x1r << 1);
1324
5.43M
        x1i = x0i - (x1i << 1);
1325
1326
5.43M
        x2r = x2r + x3i;
1327
5.43M
        tmp = x2r - (x3i << 1);
1328
5.43M
        x2i = x2i - x3r;
1329
5.43M
        x3i = x2i + (x3r << 1);
1330
1331
5.43M
        *data = x0r;
1332
5.43M
        *(data + 1) = x0i;
1333
5.43M
        data += (del << 2);
1334
1335
5.43M
        *data = x2r;
1336
5.43M
        *(data + 1) = x2i;
1337
5.43M
        data += (del << 2);
1338
1339
5.43M
        *data = x1r;
1340
5.43M
        *(data + 1) = x1i;
1341
5.43M
        data += (del << 2);
1342
1343
5.43M
        *data = tmp;
1344
5.43M
        *(data + 1) = x3i;
1345
5.43M
        data -= 5 * (del << 1);
1346
1347
5.43M
        tmp = 0x5A82;
1348
1349
5.43M
        x7i = x4r + (ixheaacd_mult32x16lin32(x7i, tmp));
1350
5.43M
        x4r = x7i - (x4r << 1);
1351
1352
5.43M
        x7r = x4i + (ixheaacd_mult32x16lin32(x7r, tmp));
1353
5.43M
        x4i = x7r - (x4i << 1);
1354
1355
5.43M
        x5i = x6r + (ixheaacd_mult32x16lin32(x5i, tmp));
1356
5.43M
        x6r = x5i - (x6r << 1);
1357
1358
5.43M
        x5r = x6i + (ixheaacd_mult32x16lin32(x5r, tmp));
1359
5.43M
        x6i = x5r - (x6i << 1);
1360
1361
5.43M
        *data = x7i;
1362
5.43M
        *(data + 1) = x7r;
1363
5.43M
        data += (del << 2);
1364
1365
5.43M
        *data = x5i;
1366
5.43M
        *(data + 1) = x5r;
1367
5.43M
        data += (del << 2);
1368
1369
5.43M
        *data = -x4r;
1370
5.43M
        *(data + 1) = -x4i;
1371
5.43M
        data += (del << 2);
1372
1373
5.43M
        *data = -x6r;
1374
5.43M
        *(data + 1) = -x6i;
1375
1376
5.43M
        data -= 7 * (del << 1);
1377
5.43M
        data += (del << 4);
1378
5.43M
      }
1379
679k
      data -= npoints << 1;
1380
679k
    }
1381
97.0k
    nodespacing >>= 3;
1382
97.0k
    del <<= 3;
1383
97.0k
    in_loop_cnt >>= 3;
1384
97.0k
  }
1385
1386
215k
  {
1387
215k
    WORD32 *data = ptr_y;
1388
215k
    const WORD32 *twiddles;
1389
215k
    twiddles = ptr_twiddle;
1390
215k
    data = ptr_y;
1391
215k
    data = data - 2;
1392
1393
7.37M
    for (j = 0; j < nodespacing * del; j += nodespacing) {
1394
7.15M
      data = data + 2;
1395
1396
7.15M
      {
1397
7.15M
        data += (del << 2);
1398
7.15M
        x2r = *data;
1399
7.15M
        x2i = *(data + 1);
1400
1401
7.15M
        data += (del << 2);
1402
7.15M
        x4r = *data;
1403
7.15M
        x4i = *(data + 1);
1404
1405
7.15M
        data += (del << 2);
1406
7.15M
        x6r = *data;
1407
7.15M
        x6i = *(data + 1);
1408
1409
7.15M
        data -= 6 * (del << 1);
1410
1411
7.15M
        twiddles += (j >> 2);
1412
1413
7.15M
        twiddle_val = *(twiddles);
1414
1415
7.15M
        tmp = (ixheaacd_mult32x16lin32(x2r, twiddle_val) -
1416
7.15M
               ixheaac_mult32x16hin32(x2i, twiddle_val));
1417
7.15M
        x2i = (ixheaacd_mac32x16lin32(
1418
7.15M
                  ixheaac_mult32x16hin32(x2r, twiddle_val), x2i,
1419
7.15M
                  twiddle_val))
1420
7.15M
              << 1;
1421
7.15M
        x2r = tmp << 1;
1422
1423
7.15M
        twiddles += (j >> 2);
1424
7.15M
        twiddle_val = *(twiddles);
1425
1426
7.15M
        tmp = (ixheaacd_mult32x16lin32(x4r, twiddle_val) -
1427
7.15M
               ixheaac_mult32x16hin32(x4i, twiddle_val));
1428
7.15M
        x4i = (ixheaacd_mac32x16lin32(
1429
7.15M
                  ixheaac_mult32x16hin32(x4r, twiddle_val), x4i,
1430
7.15M
                  twiddle_val))
1431
7.15M
              << 1;
1432
7.15M
        x4r = tmp << 1;
1433
1434
7.15M
        twiddles += (j >> 2);
1435
7.15M
        twiddle_val = *(twiddles);
1436
1437
7.15M
        tmp = (ixheaacd_mult32x16lin32(x6r, twiddle_val) -
1438
7.15M
               ixheaac_mult32x16hin32(x6i, twiddle_val));
1439
7.15M
        x6i = (ixheaacd_mac32x16lin32(
1440
7.15M
                  ixheaac_mult32x16hin32(x6r, twiddle_val), x6i,
1441
7.15M
                  twiddle_val))
1442
7.15M
              << 1;
1443
7.15M
        x6r = tmp << 1;
1444
1445
7.15M
        x0r = *data;
1446
7.15M
        x0i = *(data + 1);
1447
7.15M
        data += (del << 1);
1448
1449
7.15M
        x0r = x0r + x4r;
1450
7.15M
        x0i = x0i + x4i;
1451
7.15M
        x4r = x0r - (x4r << 1);
1452
7.15M
        x4i = x0i - (x4i << 1);
1453
1454
7.15M
        x2r = x2r + x6r;
1455
7.15M
        x2i = x2i + x6i;
1456
7.15M
        x6r = x2r - (x6r << 1);
1457
7.15M
        x6i = x2i - (x6i << 1);
1458
1459
7.15M
        x0r = x0r + x2r;
1460
7.15M
        x0i = x0i + x2i;
1461
7.15M
        x2r = x0r - (x2r << 1);
1462
7.15M
        x2i = x0i - (x2i << 1);
1463
1464
7.15M
        x4r = x4r + x6i;
1465
7.15M
        x4i = x4i - x6r;
1466
7.15M
        tmp = x6r;
1467
7.15M
        x6r = x4r - (x6i << 1);
1468
7.15M
        x6i = x4i + (tmp << 1);
1469
1470
7.15M
        x1r = *data;
1471
7.15M
        x1i = *(data + 1);
1472
7.15M
        data += (del << 2);
1473
1474
7.15M
        twiddles -= 5 * (j >> 3);
1475
7.15M
        twiddle_val = *(twiddles);
1476
1477
7.15M
        tmp = (ixheaacd_mult32x16lin32(x1r, twiddle_val) -
1478
7.15M
               ixheaac_mult32x16hin32(x1i, twiddle_val));
1479
7.15M
        x1i = (ixheaacd_mac32x16lin32(
1480
7.15M
                  ixheaac_mult32x16hin32(x1r, twiddle_val), x1i,
1481
7.15M
                  twiddle_val))
1482
7.15M
              << 1;
1483
7.15M
        x1r = tmp << 1;
1484
1485
7.15M
        x3r = *data;
1486
7.15M
        x3i = *(data + 1);
1487
7.15M
        data += (del << 2);
1488
1489
7.15M
        twiddles += (j >> 2);
1490
7.15M
        twiddle_val = *(twiddles);
1491
1492
7.15M
        tmp = (ixheaacd_mult32x16lin32(x3r, twiddle_val) -
1493
7.15M
               ixheaac_mult32x16hin32(x3i, twiddle_val));
1494
7.15M
        x3i = (ixheaacd_mac32x16lin32(
1495
7.15M
            ixheaac_mult32x16hin32(x3r, twiddle_val), x3i, twiddle_val));
1496
7.15M
        x3r = tmp;
1497
1498
7.15M
        x5r = *data;
1499
7.15M
        x5i = *(data + 1);
1500
7.15M
        data += (del << 2);
1501
1502
7.15M
        twiddles += (j >> 2);
1503
7.15M
        twiddle_val = *(twiddles);
1504
1505
7.15M
        tmp = (ixheaacd_mult32x16lin32(x5r, twiddle_val) -
1506
7.15M
               ixheaac_mult32x16hin32(x5i, twiddle_val));
1507
7.15M
        x5i = (ixheaacd_mac32x16lin32(
1508
7.15M
            ixheaac_mult32x16hin32(x5r, twiddle_val), x5i, twiddle_val));
1509
7.15M
        x5r = tmp;
1510
1511
7.15M
        x7r = *data;
1512
7.15M
        x7i = *(data + 1);
1513
7.15M
        data -= 7 * (del << 1);
1514
1515
7.15M
        twiddles += (j >> 2);
1516
7.15M
        twiddle_val = *(twiddles);
1517
7.15M
        twiddles -= 7 * (j >> 3);
1518
1519
7.15M
        tmp = (ixheaacd_mult32x16lin32(x7r, twiddle_val) -
1520
7.15M
               ixheaac_mult32x16hin32(x7i, twiddle_val));
1521
7.15M
        x7i = (ixheaacd_mac32x16lin32(
1522
7.15M
            ixheaac_mult32x16hin32(x7r, twiddle_val), x7i, twiddle_val));
1523
7.15M
        x7r = tmp;
1524
1525
7.15M
        x1r = x1r + (x5r << 1);
1526
7.15M
        x1i = x1i + (x5i << 1);
1527
7.15M
        x5r = x1r - (x5r << 2);
1528
7.15M
        x5i = x1i - (x5i << 2);
1529
1530
7.15M
        x3r = x3r + x7r;
1531
7.15M
        x3i = x3i + x7i;
1532
7.15M
        x7r = x3r - (x7r << 1);
1533
7.15M
        x7i = x3i - (x7i << 1);
1534
1535
7.15M
        x1r = x1r + (x3r << 1);
1536
7.15M
        x1i = x1i + (x3i << 1);
1537
7.15M
        x3r = x1r - (x3r << 2);
1538
7.15M
        x3i = x1i - (x3i << 2);
1539
1540
7.15M
        x5r = x5r + x5i;
1541
7.15M
        x5i = x5r - (x5i << 1);
1542
1543
7.15M
        x7r = x7r + x7i;
1544
7.15M
        x7i = x7r - (x7i << 1);
1545
1546
7.15M
        x7i = x5r - (x7i << 1);
1547
7.15M
        x5r = x7i - (x5r << 1);
1548
1549
7.15M
        x5i = (x7r << 1) - x5i;
1550
7.15M
        x7r = x5i - (x7r << 2);
1551
1552
7.15M
        x7i = x7i << 1;
1553
7.15M
        x5r = x5r << 1;
1554
7.15M
        x5i = x5i << 1;
1555
7.15M
        x7r = x7r << 1;
1556
1557
7.15M
        x0r = x0r + x1r;
1558
7.15M
        x0i = x0i + x1i;
1559
7.15M
        x1r = x0r - (x1r << 1);
1560
7.15M
        x1i = x0i - (x1i << 1);
1561
1562
7.15M
        x2r = x2r + x3i;
1563
7.15M
        tmp = x2r - (x3i << 1);
1564
7.15M
        x2i = x2i - x3r;
1565
7.15M
        x3i = x2i + (x3r << 1);
1566
1567
7.15M
        *data = x0r;
1568
7.15M
        *(data + 1) = x0i;
1569
7.15M
        data += (del << 2);
1570
1571
7.15M
        *data = x2r;
1572
7.15M
        *(data + 1) = x2i;
1573
7.15M
        data += (del << 2);
1574
1575
7.15M
        *data = x1r;
1576
7.15M
        *(data + 1) = x1i;
1577
7.15M
        data += (del << 2);
1578
1579
7.15M
        *data = tmp;
1580
7.15M
        *(data + 1) = x3i;
1581
7.15M
        data -= 5 * (del << 1);
1582
1583
7.15M
        tmp = 0x5A82;
1584
1585
7.15M
        x7i = x4r + (ixheaacd_mult32x16lin32(x7i, tmp));
1586
7.15M
        x4r = x7i - (x4r << 1);
1587
1588
7.15M
        x7r = x4i + (ixheaacd_mult32x16lin32(x7r, tmp));
1589
7.15M
        x4i = x7r - (x4i << 1);
1590
1591
7.15M
        x5i = x6r + (ixheaacd_mult32x16lin32(x5i, tmp));
1592
7.15M
        x6r = x5i - (x6r << 1);
1593
1594
7.15M
        x5r = x6i + (ixheaacd_mult32x16lin32(x5r, tmp));
1595
7.15M
        x6i = x5r - (x6i << 1);
1596
1597
7.15M
        *data = x7i;
1598
7.15M
        *(data + 1) = x7r;
1599
7.15M
        data += (del << 2);
1600
1601
7.15M
        *data = x5i;
1602
7.15M
        *(data + 1) = x5r;
1603
7.15M
        data += (del << 2);
1604
1605
7.15M
        *data = -x4r;
1606
7.15M
        *(data + 1) = -x4i;
1607
7.15M
        data += (del << 2);
1608
1609
7.15M
        *data = -x6r;
1610
7.15M
        *(data + 1) = -x6i;
1611
1612
7.15M
        data -= 7 * (del << 1);
1613
7.15M
        data += (del << 4);
1614
7.15M
      }
1615
7.15M
      data -= npoints << 1;
1616
7.15M
    }
1617
1618
215k
    nodespacing >>= 3;
1619
215k
    del <<= 3;
1620
215k
    in_loop_cnt >>= 3;
1621
215k
  }
1622
215k
}
1623
1624
VOID ixheaacd_inverse_transform_960(
1625
    WORD32 spec_data[], WORD32 scratch[],
1626
    ia_aac_dec_imdct_tables_struct *ptr_imdct_tables, WORD32 expo,
1627
297k
    WORD32 *imdct_scale) {
1628
1629
297k
  WORD32 n;
1630
297k
  WORD32 Nd2;
1631
297k
  WORD16 const_mltfac;
1632
297k
  WORD32 neg_expo;
1633
1634
297k
  WORD32 i;
1635
1636
297k
  n = 120;
1637
297k
  Nd2 = n >> 1;
1638
297k
  neg_expo = 4;
1639
1640
297k
  ixheaacd_pre_twiddle_120(spec_data, scratch, n, ptr_imdct_tables->cosine_array_240,
1641
297k
                           neg_expo - expo);
1642
1643
297k
  ixheaacd_fft_120(ptr_imdct_tables, Nd2, spec_data, scratch);
1644
1645
297k
  neg_expo += 2;
1646
297k
  *imdct_scale = neg_expo + 1;
1647
1648
297k
  ixheaacd_post_twiddle_120(spec_data, scratch, ptr_imdct_tables->cosine_array_240,
1649
297k
                            n);
1650
297k
  const_mltfac = 17476;
1651
35.9M
  for (i = 0; i < 120; i++)
1652
35.6M
  {
1653
35.6M
    spec_data[i] = ixheaac_mult32x16in32_shl(spec_data[i], const_mltfac);
1654
35.6M
  }
1655
297k
}
1656
1657
WORD32 ixheaacd_inverse_transform(
1658
    WORD32 spec_data[], WORD32 scratch[],
1659
    ia_aac_dec_imdct_tables_struct *ptr_imdct_tables, WORD32 expo,
1660
215k
    WORD32 npoints) {
1661
215k
  (*ixheaacd_pretwiddle_compute)(spec_data, spec_data + npoints - 1, scratch,
1662
215k
                                 ptr_imdct_tables, (npoints >> 2), expo);
1663
1664
215k
  (*ixheaacd_imdct_using_fft)(ptr_imdct_tables, npoints >> 1, scratch,
1665
215k
                              spec_data);
1666
1667
215k
  expo += 2;
1668
1669
215k
  return expo;
1670
215k
}
1671
1672
VOID ixheaacd_mdct_960(WORD32 *inp, WORD32 *scratch, WORD32 *mdct_scale,
1673
                       WORD32 mdct_flag,
1674
43.9k
                       ia_aac_dec_imdct_tables_struct *imdct_tables_ptr) {
1675
43.9k
  WORD32 expo, neg_expo = 0, k;
1676
1677
43.9k
  WORD16 const_mltfac = 17476;
1678
1679
43.9k
  expo = (*ixheaacd_calc_max_spectral_line)(inp, MDCT_LEN_960) - 1;
1680
43.9k
  ;
1681
1682
43.9k
  memcpy(scratch, inp, sizeof(WORD32) * MDCT_LEN_960);
1683
1684
43.9k
  neg_expo = 7 - expo;
1685
1686
43.9k
  ixheaacd_pre_twiddle_960(inp, scratch, MDCT_LEN_960, imdct_tables_ptr->cosine_array_1920,
1687
43.9k
                           neg_expo);
1688
1689
43.9k
  ixheaacd_fft_960(inp, scratch, imdct_tables_ptr);
1690
1691
43.9k
  ixheaacd_post_twiddle_960(inp, scratch, imdct_tables_ptr->cosine_array_1920,
1692
43.9k
                            MDCT_LEN_960);
1693
1694
43.9k
  if (0 == mdct_flag) {
1695
43.9k
    WORD32 *data = inp;
1696
1697
21.1M
    for (k = MDCT_LEN_960 - 1; k >= 0; k -= 2) {
1698
21.0M
      *data = ixheaac_mult32x16in32_shl(*data, const_mltfac);
1699
21.0M
      data++;
1700
21.0M
      *data = ixheaac_mult32x16in32_shl(*data, const_mltfac);
1701
21.0M
      data++;
1702
21.0M
    }
1703
43.9k
  }
1704
43.9k
  *mdct_scale = neg_expo + 1 + 1 + 1;
1705
43.9k
}
1706
1707
VOID ixheaacd_mdct_480_ld(WORD32 *inp, WORD32 *scratch, WORD32 *mdct_scale,
1708
                          WORD32 mdct_flag,
1709
                          ia_aac_dec_imdct_tables_struct *imdct_tables_ptr,
1710
48.5k
                          WORD32 object_type) {
1711
48.5k
  WORD32 expo, neg_expo = 0, k;
1712
1713
48.5k
  WORD32 const_mltfac = 1145324612;
1714
1715
48.5k
  expo = (*ixheaacd_calc_max_spectral_line)(inp, MDCT_LEN) - 1;
1716
48.5k
  ;
1717
1718
48.5k
  memcpy(scratch, inp, sizeof(WORD32) * MDCT_LEN);
1719
1720
48.5k
  neg_expo = 7 - expo;
1721
1722
48.5k
  ixheaacd_pre_twiddle(inp, scratch, 480, imdct_tables_ptr->cosine_array_960,
1723
48.5k
                       neg_expo);
1724
1725
48.5k
  ixheaacd_fft_480_ld(inp, scratch, imdct_tables_ptr);
1726
1727
48.5k
  if (object_type == AOT_ER_AAC_LD) {
1728
17.6k
    ixheaacd_post_twiddle_ld(inp, scratch, imdct_tables_ptr->cosine_array_960,
1729
17.6k
                             480);
1730
30.8k
  } else if (object_type == AOT_ER_AAC_ELD) {
1731
30.8k
    ixheaacd_post_twiddle_eld(inp + (480), scratch,
1732
30.8k
                              imdct_tables_ptr->cosine_array_960, 480);
1733
30.8k
  }
1734
1735
48.5k
  if (0 == mdct_flag) {
1736
41.1k
    WORD32 *data = inp;
1737
1738
41.1k
    if (object_type != AOT_ER_AAC_ELD) {
1739
2.46M
      for (k = MDCT_LEN - 1; k >= 0; k -= 2) {
1740
2.45M
        *data = ixheaac_mult32_shl(*data, const_mltfac);
1741
2.45M
        data++;
1742
2.45M
        *data = ixheaac_mult32_shl(*data, const_mltfac);
1743
2.45M
        data++;
1744
2.45M
      }
1745
10.2k
      neg_expo += 1;
1746
30.8k
    } else {
1747
30.8k
      data = inp + 480;
1748
14.8M
      for (k = (MDCT_LEN << 1) - 1; k >= 0; k -= 2) {
1749
14.8M
        *data = ixheaac_mult32_shl(*data, const_mltfac);
1750
14.8M
        data++;
1751
14.8M
        *data = ixheaac_mult32_shl(*data, const_mltfac);
1752
14.8M
        data++;
1753
14.8M
      }
1754
30.8k
      neg_expo += 1;
1755
30.8k
    }
1756
41.1k
  }
1757
1758
48.5k
  *mdct_scale = neg_expo + 3;
1759
48.5k
}
1760
1761
VOID ixheaacd_inverse_transform_512(
1762
    WORD32 data[], WORD32 temp[], WORD32 *imdct_scale, WORD32 *cos_sin_ptr,
1763
95.9k
    ia_aac_dec_imdct_tables_struct *imdct_tables_ptr, WORD32 object_type) {
1764
95.9k
  WORD32 n;
1765
95.9k
  WORD32 npoints_2;
1766
95.9k
  WORD16 expo, neg_expo;
1767
1768
95.9k
  n = 512;
1769
1770
95.9k
  npoints_2 = n >> 1;
1771
1772
95.9k
  expo = (*ixheaacd_calc_max_spectral_line)(data, n) - 1;
1773
1774
95.9k
  memcpy(temp, data, sizeof(WORD32) * n);
1775
1776
95.9k
  neg_expo = 7 - expo;
1777
1778
95.9k
  ixheaacd_pre_twiddle(data, temp, n, cos_sin_ptr, neg_expo);
1779
1780
95.9k
  (*ixheaacd_fft32x32_ld)(imdct_tables_ptr, npoints_2, data, temp);
1781
1782
95.9k
  neg_expo = (*ixheaacd_neg_expo_inc)(neg_expo);
1783
1784
95.9k
  *imdct_scale = neg_expo + 1;
1785
1786
95.9k
  if (object_type == AOT_ER_AAC_ELD)
1787
32.0k
    ixheaacd_post_twiddle_eld((data + n), temp, cos_sin_ptr, n);
1788
63.9k
  else
1789
63.9k
    ixheaacd_post_twiddle_ld((data), temp, cos_sin_ptr, n);
1790
95.9k
}
1791
1792
VOID ixheaacd_fft_960(WORD32 *inp, WORD32 *op,
1793
43.9k
                      ia_aac_dec_imdct_tables_struct *imdct_tables_ptr) {
1794
43.9k
  WORD32 i;
1795
43.9k
  WORD32 *buf1, *buf2;
1796
43.9k
  WORD16 *re_arr_tab_sml_480_ptr;
1797
1798
43.9k
  (*ixheaacd_aac_ld_dec_rearrange_960)(inp, op, 480,
1799
43.9k
                                       imdct_tables_ptr->re_arr_tab_32);
1800
1801
43.9k
  buf1 = op;
1802
43.9k
  buf2 = inp;
1803
1804
702k
  for (i = 0; i < FFT15; i++) {
1805
658k
    ixheaacd_fft_32_points(imdct_tables_ptr->w_32,
1806
658k
                           32, buf1, buf2);
1807
1808
658k
    buf1 += (FFT16X2 * 2);
1809
658k
    buf2 += (FFT16X2 * 2);
1810
658k
  }
1811
1812
43.9k
  re_arr_tab_sml_480_ptr = imdct_tables_ptr->re_arr_tab_sml_480;
1813
43.9k
  buf1 = inp;
1814
1815
1.44M
  for (i = 0; i < FFT16 * 2; i++) {
1816
1.40M
    ixheaacd_ld_dec_fft_15_opt(buf1, op,
1817
1.40M
                               ixheaacd_fft5out, re_arr_tab_sml_480_ptr);
1818
1.40M
    buf1 += 2;
1819
1.40M
    re_arr_tab_sml_480_ptr += FFT15;
1820
1.40M
  }
1821
43.9k
}
1822
1823
VOID ixheaacd_fft_32_points(WORD16 *ptr_w, WORD32 npoints,
1824
658k
                            WORD32* ptr_x, WORD32* ptr_y) {
1825
658k
  WORD32   i, j, l1, l2, h2, predj, tw_offset, stride, fft_jmp;
1826
658k
  WORD32   xt0_0, yt0_0, xt1_0, yt1_0, xt2_0, yt2_0;
1827
658k
  WORD32   xh0_0, xh1_0, xh20_0, xh21_0, xl0_0, xl1_0, xl20_0, xl21_0;
1828
658k
  WORD32   x_0, x_1, x_l1_0, x_l1_1, x_l2_0, x_l2_1;
1829
658k
  WORD32   x_h2_0, x_h2_1;
1830
658k
  WORD16 si10, si20, si30, co10, co20, co30;
1831
658k
  WORD16 *w;
1832
658k
  WORD32   *x, *x2, *x0;
1833
658k
  WORD32   *y0, *y1, *y2, *y3;
1834
658k
  WORD32   n0, j0;
1835
658k
  WORD32   radix;
1836
658k
  WORD32   norm;
1837
1838
658k
  radix = 2;
1839
658k
  norm = 25;
1840
1841
658k
  stride = 32;
1842
658k
  tw_offset = 0;
1843
658k
  fft_jmp = 192;
1844
1845
1.97M
  while (stride > radix) {
1846
1.31M
    j = 0;
1847
1.31M
    fft_jmp >>= 2;
1848
1849
1.31M
    h2 = stride >> 1;
1850
1.31M
    l1 = stride;
1851
1.31M
    l2 = stride + (stride >> 1);
1852
1853
1.31M
    x = ptr_x;
1854
1.31M
    w = ptr_w + tw_offset;
1855
1.31M
    tw_offset += fft_jmp;
1856
1857
11.8M
    for (i = 0; i < npoints; i += 4) {
1858
10.5M
      co10 = w[j + 1];            si10 = w[j + 0];
1859
10.5M
      co20 = w[j + 3];            si20 = w[j + 2];
1860
10.5M
      co30 = w[j + 5];            si30 = w[j + 4];
1861
1862
10.5M
      x_0 = x[0];             x_1 = x[1];
1863
10.5M
      x_l1_0 = x[l1];         x_l1_1 = x[l1 + 1];
1864
10.5M
      x_l2_0 = x[l2];         x_l2_1 = x[l2 + 1];
1865
10.5M
      x_h2_0 = x[h2];         x_h2_1 = x[h2 + 1];
1866
1867
10.5M
      xh0_0 = ixheaac_add32_sat(x_0, x_l1_0);
1868
10.5M
      xh1_0 = ixheaac_add32_sat(x_1, x_l1_1);
1869
10.5M
      xl0_0 = ixheaac_sub32_sat(x_0, x_l1_0);
1870
10.5M
      xl1_0 = ixheaac_sub32_sat(x_1, x_l1_1);
1871
10.5M
      xh20_0 = ixheaac_add32_sat(x_h2_0, x_l2_0);
1872
10.5M
      xh21_0 = ixheaac_add32_sat(x_h2_1, x_l2_1);
1873
10.5M
      xl20_0 = ixheaac_sub32_sat(x_h2_0, x_l2_0);
1874
10.5M
      xl21_0 = ixheaac_sub32_sat(x_h2_1, x_l2_1);
1875
1876
10.5M
      x0 = x;
1877
10.5M
      x2 = x0;
1878
1879
10.5M
      j += 6;
1880
10.5M
      x += 2;
1881
10.5M
      predj = (j - fft_jmp);
1882
10.5M
      if (!predj) x += fft_jmp;
1883
10.5M
      if (!predj) j = 0;
1884
1885
10.5M
      x0[0] = ixheaac_add32_sat(xh0_0, xh20_0);
1886
10.5M
      x0[1] = ixheaac_add32_sat(xh1_0, xh21_0);
1887
10.5M
      xt0_0 = ixheaac_sub32_sat(xh0_0, xh20_0);
1888
10.5M
      yt0_0 = ixheaac_sub32_sat(xh1_0, xh21_0);
1889
10.5M
      xt1_0 = ixheaac_add32_sat(xl0_0, xl21_0);
1890
10.5M
      yt2_0 = ixheaac_add32_sat(xl1_0, xl20_0);
1891
10.5M
      xt2_0 = ixheaac_sub32_sat(xl0_0, xl21_0);
1892
10.5M
      yt1_0 = ixheaac_sub32_sat(xl1_0, xl20_0);
1893
1894
10.5M
      x2[h2] = ixheaac_add32_sat(MPYLIRC(si10, yt1_0), MPYLIRC(co10, xt1_0));
1895
1896
10.5M
      x2[h2 + 1] = ixheaac_sub32_sat(MPYLIRC(co10, yt1_0), MPYLIRC(si10, xt1_0));
1897
1898
10.5M
      x2[l1] = ixheaac_add32_sat(MPYLIRC(si20, yt0_0), MPYLIRC(co20, xt0_0));
1899
1900
10.5M
      x2[l1 + 1] = ixheaac_sub32_sat(MPYLIRC(co20, yt0_0), MPYLIRC(si20, xt0_0));
1901
10.5M
      yt0_0 = MPYLIRC(si20, yt0_0);
1902
1903
10.5M
      x2[l2] = ixheaac_add32_sat(MPYLIRC(si30, yt2_0), MPYLIRC(co30, xt2_0));
1904
1905
10.5M
      x2[l2 + 1] = ixheaac_sub32_sat(MPYLIRC(co30, yt2_0), MPYLIRC(si30, xt2_0));
1906
10.5M
      yt2_0 = MPYLIRC(si30, yt2_0);
1907
1908
10.5M
    }
1909
1.31M
    stride >>= 2;
1910
1.31M
  }
1911
1912
658k
  y0 = ptr_y;
1913
658k
  y2 = ptr_y + (int)npoints;
1914
658k
  x0 = ptr_x;
1915
658k
  x2 = ptr_x + (int)(npoints >> 1);
1916
1917
658k
  y1 = y0 + (int)(npoints >> 2);
1918
658k
  y3 = y2 + (int)(npoints >> 2);
1919
658k
  l1 = norm + 1;
1920
658k
  j0 = 8;
1921
658k
  n0 = npoints >> 1;
1922
1923
658k
  j = 0;
1924
3.29M
  for (i = 0; i < 4; i++) {
1925
2.63M
    int t1, t2;
1926
2.63M
    h2 = rev_dig[i];
1927
1928
2.63M
    t1 = h2 << 1;
1929
2.63M
    t2 = t1 + 1;
1930
1931
2.63M
    y0[t1] = ixheaac_add32_sat(x0[0], x0[2]);
1932
2.63M
    y2[t1] = ixheaac_sub32_sat(x0[0], x0[2]);
1933
2.63M
    y0[t2] = ixheaac_add32_sat(x0[1], x0[3]);
1934
2.63M
    y2[t2] = ixheaac_sub32_sat(x0[1], x0[3]);
1935
2.63M
    y1[t1] = ixheaac_add32_sat(x0[4], x0[6]);
1936
2.63M
    y3[t1] = ixheaac_sub32_sat(x0[4], x0[6]);
1937
2.63M
    y1[t2] = ixheaac_add32_sat(x0[5], x0[7]);
1938
2.63M
    y3[t2] = ixheaac_sub32_sat(x0[5], x0[7]);
1939
2.63M
    x0 += 8;
1940
1941
2.63M
    t1 += 2;
1942
2.63M
    t2 += 2;
1943
1944
2.63M
    y0[t1] = ixheaac_add32_sat(x2[0], x2[2]);
1945
2.63M
    y2[t1] = ixheaac_sub32_sat(x2[0], x2[2]);
1946
2.63M
    y0[t2] = ixheaac_add32_sat(x2[1], x2[3]);
1947
2.63M
    y2[t2] = ixheaac_sub32_sat(x2[1], x2[3]);
1948
2.63M
    y1[t1] = ixheaac_add32_sat(x2[4], x2[6]);
1949
2.63M
    y3[t1] = ixheaac_sub32_sat(x2[4], x2[6]);
1950
2.63M
    y1[t2] = ixheaac_add32_sat(x2[5], x2[7]);
1951
2.63M
    y3[t2] = ixheaac_sub32_sat(x2[5], x2[7]);
1952
2.63M
    x2 += 8;
1953
1954
2.63M
    j += j0;
1955
1956
2.63M
    if (j == n0)
1957
658k
    {
1958
658k
      j += n0;
1959
658k
      x0 += (int)npoints >> 1;
1960
658k
      x2 += (int)npoints >> 1;
1961
658k
    }
1962
2.63M
  }
1963
658k
}
1964
1965
4.50M
VOID ixheaacd_dec_rearrange_short(WORD32 *ip, WORD32 *op, WORD32 mdct_len_2, WORD16 *re_arr_tab) {
1966
4.50M
  WORD32 n, i = 0;
1967
1968
132M
  for (n = 0; n < mdct_len_2; n++) {
1969
128M
    WORD32 idx = re_arr_tab[n] << 1;
1970
128M
    op[i++] = ip[idx];
1971
128M
    op[i++] = ip[idx + 1];
1972
128M
  }
1973
4.50M
}
1974
1975
VOID ixheaacd_ld_dec_fft_15_opt(WORD32 *inp, WORD32 *op, WORD32 *fft3out,
1976
1.40M
                                WORD16 *ptr_re_arr_tab_sml_240) {
1977
1.40M
  WORD32 i, n, idx;
1978
1.40M
  WORD32 *buf1, *buf2, *buf1a;
1979
1.40M
  WORD32 add_r, sub_r;
1980
1.40M
  WORD32 add_i, sub_i;
1981
1.40M
  WORD32 x_01_r, x_01_i, temp;
1982
1.40M
  WORD32 p1, p2, p3, p4;
1983
1984
1.40M
  WORD32 sinmu = 1859775393;
1985
1.40M
  WORD32 c_51 = 2042378317;
1986
1.40M
  WORD32 c_52 = -1652318768;
1987
1.40M
  WORD32 c_53 = -780119100;
1988
1.40M
  WORD32 c_54 = 1200479854;
1989
1.40M
  WORD32 c_55 = -1342177280;
1990
1991
1.40M
  WORD32 r1, r2, r3, r4;
1992
1.40M
  WORD32 s1, s2, s3, s4, t, temp1, temp2;
1993
1.40M
  WORD32 *fft3outptr = fft3out;
1994
1995
1.40M
  WORD32 xr_0, xr_1, xr_2;
1996
1.40M
  WORD32 xi_0, xi_1, xi_2;
1997
1998
1.40M
  buf2 = fft3out;
1999
1.40M
  buf1 = buf1a = fft3out;
2000
1.40M
  n = 0;
2001
2002
1.40M
  {
2003
1.40M
    *buf1++ = inp[0];
2004
1.40M
    *buf1++ = inp[1];
2005
2006
1.40M
    *buf1++ = inp[192];
2007
1.40M
    *buf1++ = inp[193];
2008
2009
1.40M
    *buf1++ = inp[384];
2010
1.40M
    *buf1++ = inp[385];
2011
2012
1.40M
    *buf1++ = inp[576];
2013
1.40M
    *buf1++ = inp[577];
2014
2015
1.40M
    *buf1++ = inp[768];
2016
1.40M
    *buf1++ = inp[769];
2017
2018
1.40M
    r1 = ixheaac_add32_sat(buf1a[2], buf1a[8]);
2019
1.40M
    r4 = ixheaac_sub32_sat(buf1a[2], buf1a[8]);
2020
1.40M
    r3 = ixheaac_add32_sat(buf1a[4], buf1a[6]);
2021
1.40M
    r2 = ixheaac_sub32_sat(buf1a[4], buf1a[6]);
2022
2023
1.40M
    t = ixheaac_mult32_shl(ixheaac_sub32_sat(r1, r3), c_54);
2024
2025
1.40M
    r1 = ixheaac_add32_sat(r1, r3);
2026
2027
1.40M
    temp1 = ixheaac_add32_sat(buf1a[0], r1);
2028
2029
1.40M
    r1 = ixheaac_add32_sat(temp1, (ixheaac_mult32_shl(r1, c_55) << 1));
2030
2031
1.40M
    r3 = ixheaac_sub32_sat(r1, t);
2032
1.40M
    r1 = ixheaac_add32_sat(r1, t);
2033
2034
1.40M
    t = ixheaac_mult32_shl(ixheaac_add32_sat(r4, r2), c_51);
2035
1.40M
    r4 = ixheaac_add32_sat(t, (ixheaac_mult32_shl(r4, c_52) << 1));
2036
1.40M
    r2 = ixheaac_add32_sat(t, ixheaac_mult32_shl(r2, c_53));
2037
2038
1.40M
    s1 = ixheaac_add32_sat(buf1a[3], buf1a[9]);
2039
1.40M
    s4 = ixheaac_sub32_sat(buf1a[3], buf1a[9]);
2040
1.40M
    s3 = ixheaac_add32_sat(buf1a[5], buf1a[7]);
2041
1.40M
    s2 = ixheaac_sub32_sat(buf1a[5], buf1a[7]);
2042
2043
1.40M
    t = ixheaac_mult32_shl(ixheaac_sub32_sat(s1, s3), c_54);
2044
2045
1.40M
    s1 = ixheaac_add32_sat(s1, s3);
2046
2047
1.40M
    temp2 = ixheaac_add32_sat(buf1a[1], s1);
2048
2049
2050
1.40M
    s1 = ixheaac_add32_sat(temp2, (ixheaac_mult32_shl(s1, c_55) << 1));
2051
2052
1.40M
    s3 = ixheaac_sub32_sat(s1, t);
2053
1.40M
    s1 = ixheaac_add32_sat(s1, t);
2054
2055
1.40M
    t = ixheaac_mult32_shl(ixheaac_add32_sat(s4, s2), c_51);
2056
1.40M
    s4 = ixheaac_add32_sat(t, (ixheaac_mult32_shl(s4, c_52) << 1));
2057
1.40M
    s2 = ixheaac_add32_sat(t, (ixheaac_mult32_shl(s2, c_53)));
2058
2059
1.40M
    *buf2++ = temp1;
2060
1.40M
    *buf2++ = temp2;
2061
1.40M
    *buf2++ = ixheaac_add32_sat(r1, s2);
2062
1.40M
    *buf2++ = ixheaac_sub32_sat(s1, r2);
2063
1.40M
    *buf2++ = ixheaac_sub32_sat(r3, s4);
2064
1.40M
    *buf2++ = ixheaac_add32_sat(s3, r4);
2065
1.40M
    *buf2++ = ixheaac_add32_sat(r3, s4);
2066
1.40M
    *buf2++ = ixheaac_sub32_sat(s3, r4);
2067
1.40M
    *buf2++ = ixheaac_sub32_sat(r1, s2);
2068
1.40M
    *buf2++ = ixheaac_add32_sat(s1, r2);
2069
1.40M
    buf1a = buf1;
2070
2071
1.40M
    *buf1++ = inp[320];
2072
1.40M
    *buf1++ = inp[321];
2073
2074
1.40M
    *buf1++ = inp[512];
2075
1.40M
    *buf1++ = inp[513];
2076
2077
1.40M
    *buf1++ = inp[704];
2078
1.40M
    *buf1++ = inp[705];
2079
2080
1.40M
    *buf1++ = inp[896];
2081
1.40M
    *buf1++ = inp[897];
2082
2083
1.40M
    *buf1++ = inp[128];
2084
1.40M
    *buf1++ = inp[129];
2085
2086
1.40M
    r1 = ixheaac_add32_sat(buf1a[2], buf1a[8]);
2087
1.40M
    r4 = ixheaac_sub32_sat(buf1a[2], buf1a[8]);
2088
1.40M
    r3 = ixheaac_add32_sat(buf1a[4], buf1a[6]);
2089
1.40M
    r2 = ixheaac_sub32_sat(buf1a[4], buf1a[6]);
2090
2091
1.40M
    t = ixheaac_mult32_shl(ixheaac_sub32_sat(r1, r3), c_54);
2092
2093
1.40M
    r1 = ixheaac_add32_sat(r1, r3);
2094
2095
1.40M
    temp1 = ixheaac_add32_sat(buf1a[0], r1);
2096
2097
1.40M
    r1 = ixheaac_add32_sat(temp1, (ixheaac_mult32_shl(r1, c_55) << 1));
2098
2099
1.40M
    r3 = ixheaac_sub32_sat(r1, t);
2100
1.40M
    r1 = ixheaac_add32_sat(r1, t);
2101
2102
1.40M
    t = ixheaac_mult32_shl(ixheaac_add32_sat(r4, r2), c_51);
2103
1.40M
    r4 = ixheaac_add32_sat(t, (ixheaac_mult32_shl(r4, c_52) << 1));
2104
1.40M
    r2 = ixheaac_add32_sat(t, ixheaac_mult32_shl(r2, c_53));
2105
2106
1.40M
    s1 = ixheaac_add32_sat(buf1a[3], buf1a[9]);
2107
1.40M
    s4 = ixheaac_sub32_sat(buf1a[3], buf1a[9]);
2108
1.40M
    s3 = ixheaac_add32_sat(buf1a[5], buf1a[7]);
2109
1.40M
    s2 = ixheaac_sub32_sat(buf1a[5], buf1a[7]);
2110
2111
1.40M
    t = ixheaac_mult32_shl(ixheaac_sub32_sat(s1, s3), c_54);
2112
2113
1.40M
    s1 = ixheaac_add32_sat(s1, s3);
2114
2115
1.40M
    temp2 = ixheaac_add32_sat(buf1a[1], s1);
2116
2117
1.40M
    s1 = ixheaac_add32_sat(temp2, (ixheaac_mult32_shl(s1, c_55) << 1));
2118
2119
1.40M
    s3 = ixheaac_sub32_sat(s1, t);
2120
1.40M
    s1 = ixheaac_add32_sat(s1, t);
2121
2122
1.40M
    t = ixheaac_mult32_shl(ixheaac_add32_sat(s4, s2), c_51);
2123
1.40M
    s4 = ixheaac_add32_sat(t, (ixheaac_mult32_shl(s4, c_52) << 1));
2124
1.40M
    s2 = ixheaac_add32_sat(t, ixheaac_mult32_shl(s2, c_53));
2125
2126
1.40M
    *buf2++ = temp1;
2127
1.40M
    *buf2++ = temp2;
2128
1.40M
    *buf2++ = ixheaac_add32_sat(r1, s2);
2129
1.40M
    *buf2++ = ixheaac_sub32_sat(s1, r2);
2130
1.40M
    *buf2++ = ixheaac_sub32_sat(r3, s4);
2131
1.40M
    *buf2++ = ixheaac_add32_sat(s3, r4);
2132
1.40M
    *buf2++ = ixheaac_add32_sat(r3, s4);
2133
1.40M
    *buf2++ = ixheaac_sub32_sat(s3, r4);
2134
1.40M
    *buf2++ = ixheaac_sub32_sat(r1, s2);
2135
1.40M
    *buf2++ = ixheaac_add32_sat(s1, r2);
2136
1.40M
    buf1a = buf1;
2137
2138
1.40M
    *buf1++ = inp[640];
2139
1.40M
    *buf1++ = inp[641];
2140
2141
1.40M
    *buf1++ = inp[832];
2142
1.40M
    *buf1++ = inp[833];
2143
2144
1.40M
    *buf1++ = inp[64];
2145
1.40M
    *buf1++ = inp[65];
2146
2147
1.40M
    *buf1++ = inp[256];
2148
1.40M
    *buf1++ = inp[257];
2149
2150
1.40M
    *buf1++ = inp[448];
2151
1.40M
    *buf1++ = inp[449];
2152
2153
1.40M
    r1 = ixheaac_add32_sat(buf1a[2], buf1a[8]);
2154
1.40M
    r4 = ixheaac_sub32_sat(buf1a[2], buf1a[8]);
2155
1.40M
    r3 = ixheaac_add32_sat(buf1a[4], buf1a[6]);
2156
1.40M
    r2 = ixheaac_sub32_sat(buf1a[4], buf1a[6]);
2157
2158
1.40M
    t = ixheaac_mult32_shl(ixheaac_sub32_sat(r1, r3), c_54);
2159
2160
1.40M
    r1 = ixheaac_add32_sat(r1, r3);
2161
2162
1.40M
    temp1 = ixheaac_add32_sat(buf1a[0], r1);
2163
2164
1.40M
    r1 = ixheaac_add32_sat(temp1, (ixheaac_mult32_shl(r1, c_55) << 1));
2165
2166
1.40M
    r3 = ixheaac_sub32_sat(r1, t);
2167
1.40M
    r1 = ixheaac_add32_sat(r1, t);
2168
2169
1.40M
    t = ixheaac_mult32_shl(ixheaac_add32_sat(r4, r2), c_51);
2170
1.40M
    r4 = ixheaac_add32_sat(t, ixheaac_mult32_shl(r4, c_52) << 1);
2171
1.40M
    r2 = ixheaac_add32_sat(t, ixheaac_mult32_shl(r2, c_53));
2172
2173
1.40M
    s1 = ixheaac_add32_sat(buf1a[3], buf1a[9]);
2174
1.40M
    s4 = ixheaac_sub32_sat(buf1a[3], buf1a[9]);
2175
1.40M
    s3 = ixheaac_add32_sat(buf1a[5], buf1a[7]);
2176
1.40M
    s2 = ixheaac_sub32_sat(buf1a[5], buf1a[7]);
2177
2178
1.40M
    t = ixheaac_mult32_shl(ixheaac_sub32_sat(s1, s3), c_54);
2179
2180
1.40M
    s1 = ixheaac_add32_sat(s1, s3);
2181
2182
1.40M
    temp2 = ixheaac_add32_sat(buf1a[1], s1);
2183
2184
1.40M
    s1 = ixheaac_add32_sat(temp2, (ixheaac_mult32_shl(s1, c_55) << 1));
2185
2186
1.40M
    s3 = ixheaac_sub32_sat(s1, t);
2187
1.40M
    s1 = ixheaac_add32_sat(s1, t);
2188
2189
1.40M
    t = ixheaac_mult32_shl(ixheaac_add32_sat(s4, s2), c_51);
2190
1.40M
    s4 = ixheaac_add32_sat(t, (ixheaac_mult32_shl(s4, c_52) << 1));
2191
1.40M
    s2 = ixheaac_add32_sat(t, ixheaac_mult32_shl(s2, c_53));
2192
2193
1.40M
    *buf2++ = temp1;
2194
1.40M
    *buf2++ = temp2;
2195
1.40M
    *buf2++ = ixheaac_add32_sat(r1, s2);
2196
1.40M
    *buf2++ = ixheaac_sub32_sat(s1, r2);
2197
1.40M
    *buf2++ = ixheaac_sub32_sat(r3, s4);
2198
1.40M
    *buf2++ = ixheaac_add32_sat(s3, r4);
2199
1.40M
    *buf2++ = ixheaac_add32_sat(r3, s4);
2200
1.40M
    *buf2++ = ixheaac_sub32_sat(s3, r4);
2201
1.40M
    *buf2++ = ixheaac_sub32_sat(r1, s2);
2202
1.40M
    *buf2++ = ixheaac_add32_sat(s1, r2);
2203
1.40M
    buf1a = buf1;
2204
1.40M
  }
2205
2206
1.40M
  n = 0;
2207
8.43M
  for (i = 0; i < FFT5; i++) {
2208
7.02M
    xr_0 = fft3outptr[0];
2209
7.02M
    xi_0 = fft3outptr[1];
2210
2211
7.02M
    xr_1 = fft3outptr[10];
2212
7.02M
    xi_1 = fft3outptr[11];
2213
2214
7.02M
    xr_2 = fft3outptr[20];
2215
7.02M
    xi_2 = fft3outptr[21];
2216
2217
7.02M
    x_01_r = ixheaac_add32_sat(xr_0, xr_1);
2218
7.02M
    x_01_i = ixheaac_add32_sat(xi_0, xi_1);
2219
2220
7.02M
    add_r = ixheaac_add32_sat(xr_1, xr_2);
2221
7.02M
    add_i = ixheaac_add32_sat(xi_1, xi_2);
2222
2223
7.02M
    sub_r = ixheaac_sub32_sat(xr_1, xr_2);
2224
7.02M
    sub_i = ixheaac_sub32_sat(xi_1, xi_2);
2225
2226
7.02M
    p1 = add_r >> 1;
2227
2228
7.02M
    p2 = ixheaac_mult32_shl(sub_i, sinmu);
2229
7.02M
    p3 = ixheaac_mult32_shl(sub_r, sinmu);
2230
2231
7.02M
    p4 = add_i >> 1;
2232
2233
7.02M
    temp = ixheaac_sub32_sat(xr_0, p1);
2234
7.02M
    temp1 = ixheaac_add32_sat(xi_0, p3);
2235
7.02M
    temp2 = ixheaac_sub32_sat(xi_0, p3);
2236
2237
7.02M
    idx = ptr_re_arr_tab_sml_240[n++] << 1;
2238
7.02M
    op[idx] = ixheaac_add32_sat(x_01_r, xr_2);
2239
7.02M
    op[idx + 1] = ixheaac_add32_sat(x_01_i, xi_2);
2240
2241
7.02M
    idx = ptr_re_arr_tab_sml_240[n++] << 1;
2242
7.02M
    op[idx] = ixheaac_add32_sat(temp, p2);
2243
7.02M
    op[idx + 1] = ixheaac_sub32_sat(temp2, p4);
2244
2245
7.02M
    idx = ptr_re_arr_tab_sml_240[n++] << 1;
2246
7.02M
    op[idx] = ixheaac_sub32_sat(temp, p2);
2247
7.02M
    op[idx + 1] = ixheaac_sub32_sat(temp1, p4);
2248
7.02M
    fft3outptr += 2;
2249
7.02M
  }
2250
1.40M
  return;
2251
1.40M
}
2252
2253
VOID ixheaacd_fft_120(ia_aac_dec_imdct_tables_struct *imdct_tables_ptr,
2254
297k
                      WORD32 npoints, WORD32* ptr_x, WORD32* ptr_y) {
2255
297k
  WORD32 i;
2256
297k
  WORD32 *buf1, *buf2;
2257
297k
  WORD32 *inp, *op;
2258
2259
297k
  inp = ptr_x;
2260
297k
  op = ptr_y;
2261
2262
297k
  ixheaacd_dec_rearrange_short(inp, op, 60, imdct_tables_ptr->re_arr_tab_4);
2263
297k
  buf1 = op;
2264
297k
  buf2 = inp;
2265
2266
4.75M
  for (i = 0; i < FFT15; i++) {
2267
4.46M
    {
2268
4.46M
      WORD32   x_0, x_1, x_2, x_3, x_4, x_5, x_6, x_7;
2269
4.46M
      WORD32   *y0, *y1, *y2, *y3;
2270
4.46M
      WORD32   *x0;
2271
4.46M
      WORD32   xh0_0, xh1_0, xh0_1, xh1_1, xl0_0, xl1_0, xl0_1, xl1_1;
2272
4.46M
      WORD32   h2;
2273
4.46M
      WORD32   n00, n01, n10, n11, n20, n21, n30, n31;
2274
2275
4.46M
      ptr_x = buf1;
2276
4.46M
      ptr_y = buf2;
2277
4.46M
      npoints = 4;
2278
4.46M
      h2 = 0;
2279
2280
4.46M
      y0 = ptr_y;
2281
4.46M
      y2 = ptr_y + (WORD32)npoints;
2282
4.46M
      x0 = ptr_x;
2283
2284
4.46M
      y1 = y0 + (WORD32)(npoints >> 1);
2285
4.46M
      y3 = y2 + (WORD32)(npoints >> 1);
2286
2287
4.46M
      x_0 = x0[0];         x_1 = x0[1];
2288
4.46M
      x_2 = x0[2];         x_3 = x0[3];
2289
4.46M
      x_4 = x0[4];         x_5 = x0[5];
2290
4.46M
      x_6 = x0[6];         x_7 = x0[7];
2291
4.46M
      x0 += 8;
2292
2293
4.46M
      xh0_0 = ixheaac_add32_sat(x_0, x_4);
2294
4.46M
      xh1_0 = ixheaac_add32_sat(x_1, x_5);
2295
4.46M
      xl0_0 = ixheaac_sub32_sat(x_0, x_4);
2296
4.46M
      xl1_0 = ixheaac_sub32_sat(x_1, x_5);
2297
4.46M
      xh0_1 = ixheaac_add32_sat(x_2, x_6);
2298
4.46M
      xh1_1 = ixheaac_add32_sat(x_3, x_7);
2299
4.46M
      xl0_1 = ixheaac_sub32_sat(x_2, x_6);
2300
4.46M
      xl1_1 = ixheaac_sub32_sat(x_3, x_7);
2301
2302
4.46M
      n00 = ixheaac_add32_sat(xh0_0, xh0_1);
2303
4.46M
      n01 = ixheaac_add32_sat(xh1_0, xh1_1);
2304
4.46M
      n10 = ixheaac_add32_sat(xl0_0, xl1_1);
2305
4.46M
      n11 = ixheaac_sub32_sat(xl1_0, xl0_1);
2306
4.46M
      n20 = ixheaac_sub32_sat(xh0_0, xh0_1);
2307
4.46M
      n21 = ixheaac_sub32_sat(xh1_0, xh1_1);
2308
4.46M
      n30 = ixheaac_sub32_sat(xl0_0, xl1_1);
2309
4.46M
      n31 = ixheaac_add32_sat(xl1_0, xl0_1);
2310
2311
4.46M
      y0[2 * h2] = n00;            y0[2 * h2 + 1] = n01;
2312
4.46M
      y1[2 * h2] = n10;            y1[2 * h2 + 1] = n11;
2313
4.46M
      y2[2 * h2] = n20;            y2[2 * h2 + 1] = n21;
2314
4.46M
      y3[2 * h2] = n30;            y3[2 * h2 + 1] = n31;
2315
4.46M
    }
2316
4.46M
    buf1 += (FFT4 * 2);
2317
4.46M
    buf2 += (FFT4 * 2);
2318
4.46M
  }
2319
2320
297k
  ixheaacd_dec_rearrange_short(inp, op, 60, imdct_tables_ptr->re_arr_tab_15_4);
2321
2322
297k
  buf1 = op;
2323
297k
  buf2 = inp;
2324
2325
1.48M
  for (i = 0; i < FFT4; i++) {
2326
1.18M
    ixheaacd_fft_960_15(buf1, buf2, imdct_tables_ptr);
2327
1.18M
    buf1 += (FFT15 * 2);
2328
1.18M
    buf2 += (FFT15 * 2);
2329
1.18M
  }
2330
2331
297k
  ixheaacd_dec_rearrange_short(inp, op, 60, imdct_tables_ptr->re_arr_tab_120);
2332
297k
}
2333
2334
VOID ixheaacd_fft_960_15(WORD32 *inp, WORD32 *op,
2335
1.18M
                         ia_aac_dec_imdct_tables_struct *imdct_tables_ptr) {
2336
1.18M
  WORD32 i;
2337
1.18M
  WORD32 *buf1, *buf2;
2338
2339
1.18M
  ixheaacd_dec_rearrange_short(inp, op, FFT15, imdct_tables_ptr->re_arr_tab_5);
2340
2341
1.18M
  buf1 = op;
2342
1.18M
  buf2 = inp;
2343
2344
4.75M
  for (i = 0; i < FFT3; i++) {
2345
3.56M
    ixheaacd_fft_5(buf1, buf2);
2346
2347
3.56M
    buf1 += (FFT5 * 2);
2348
3.56M
    buf2 += (FFT5 * 2);
2349
3.56M
  }
2350
2351
1.18M
  ixheaacd_dec_rearrange_short(inp, op, FFT15, imdct_tables_ptr->re_arr_tab_3);
2352
2353
1.18M
  buf1 = op;
2354
1.18M
  buf2 = inp;
2355
2356
7.13M
  for (i = 0; i < FFT5; i++) {
2357
5.94M
    ixheaacd_fft_3(buf1, buf2);
2358
2359
5.94M
    buf1 += (FFT3 * 2);
2360
5.94M
    buf2 += (FFT3 * 2);
2361
5.94M
  }
2362
2363
1.18M
  ixheaacd_dec_rearrange_short(inp, op, FFT15, imdct_tables_ptr->re_arr_tab_sml);
2364
1.18M
}
2365
2366
5.94M
VOID ixheaacd_fft_3(WORD32 *inp, WORD32 *op) {
2367
5.94M
  WORD32 add_r, sub_r;
2368
5.94M
  WORD32 add_i, sub_i;
2369
5.94M
  WORD32 x_01_r, x_01_i, temp;
2370
2371
5.94M
  WORD32 p1, p2, p3, p4;
2372
5.94M
  WORD32 sinmu = 1859775393;
2373
2374
5.94M
  x_01_r = ixheaac_add32_sat(inp[0], inp[2]);
2375
5.94M
  x_01_i = ixheaac_add32_sat(inp[1], inp[3]);
2376
2377
5.94M
  add_r = ixheaac_add32_sat(inp[2], inp[4]);
2378
5.94M
  add_i = ixheaac_add32_sat(inp[3], inp[5]);
2379
2380
5.94M
  sub_r = ixheaac_sub32_sat(inp[2], inp[4]);
2381
5.94M
  sub_i = ixheaac_sub32_sat(inp[3], inp[5]);
2382
2383
5.94M
  p1 = add_r >> 1;
2384
5.94M
  p2 = ixheaac_mult32_shl(sub_i, sinmu);
2385
5.94M
  p3 = ixheaac_mult32_shl(sub_r, sinmu);
2386
5.94M
  p4 = add_i >> 1;
2387
2388
5.94M
  temp = ixheaac_sub32_sat(inp[0], p1);
2389
2390
5.94M
  op[0] = ixheaac_add32_sat(x_01_r, inp[4]);
2391
5.94M
  op[1] = ixheaac_add32_sat(x_01_i, inp[5]);
2392
5.94M
  op[2] = ixheaac_add32_sat(temp, p2);
2393
5.94M
  op[3] = ixheaac_sub32_sat(ixheaac_sub32_sat(inp[1], p3), p4);
2394
5.94M
  op[4] = ixheaac_sub32_sat(temp, p2);
2395
5.94M
  op[5] = ixheaac_sub32_sat(ixheaac_add32_sat(inp[1], p3), p4);
2396
5.94M
}
2397
2398
3.56M
VOID ixheaacd_fft_5(WORD32 *inp, WORD32 *op) {
2399
3.56M
  WORD32 c_51 = 2042378317;
2400
3.56M
  WORD32 c_52 = -1652318768;
2401
3.56M
  WORD32 c_53 = -780119100;
2402
3.56M
  WORD32 c_54 = 1200479854;
2403
3.56M
  WORD32 c_55 = -1342177280;
2404
2405
3.56M
  WORD32 r1, r2, r3, r4;
2406
3.56M
  WORD32 s1, s2, s3, s4, t, temp1, temp2;
2407
2408
3.56M
  r1 = ixheaac_add32_sat(inp[2], inp[8]);
2409
3.56M
  r4 = ixheaac_sub32_sat(inp[2], inp[8]);
2410
3.56M
  r3 = ixheaac_add32_sat(inp[4], inp[6]);
2411
3.56M
  r2 = ixheaac_sub32_sat(inp[4], inp[6]);
2412
2413
3.56M
  t = ixheaac_mult32_shl(ixheaac_sub32_sat(r1, r3), c_54);
2414
3.56M
  r1 = ixheaac_add32_sat(r1, r3);
2415
2416
3.56M
  temp1 = ixheaac_add32_sat(inp[0], r1);
2417
3.56M
  r1 = ixheaac_add32_sat(
2418
3.56M
      temp1, ixheaac_shl32_sat((ixheaac_mult32_shl(r1, c_55)), 1));
2419
2420
3.56M
  r3 = ixheaac_sub32_sat(r1, t);
2421
3.56M
  r1 = ixheaac_add32_sat(r1, t);
2422
2423
3.56M
  t = ixheaac_mult32_shl(ixheaac_add32_sat(r4, r2), c_51);
2424
3.56M
  r4 = ixheaac_add32_sat(
2425
3.56M
      t, ixheaac_shl32_sat(ixheaac_mult32_shl(r4, c_52), 1));
2426
3.56M
  r2 = ixheaac_add32_sat(t, ixheaac_mult32_shl(r2, c_53));
2427
2428
3.56M
  s1 = ixheaac_add32_sat(inp[3], inp[9]);
2429
3.56M
  s4 = ixheaac_sub32_sat(inp[3], inp[9]);
2430
3.56M
  s3 = ixheaac_add32_sat(inp[5], inp[7]);
2431
3.56M
  s2 = ixheaac_sub32_sat(inp[5], inp[7]);
2432
2433
3.56M
  t = ixheaac_mult32_shl(ixheaac_sub32_sat(s1, s3), c_54);
2434
3.56M
  s1 = ixheaac_add32_sat(s1, s3);
2435
2436
3.56M
  temp2 = ixheaac_add32_sat(inp[1], s1);
2437
2438
3.56M
  s1 = ixheaac_add32_sat(
2439
3.56M
      temp2, ixheaac_shl32_sat((ixheaac_mult32_shl(s1, c_55)), 1));
2440
2441
3.56M
  s3 = ixheaac_sub32_sat(s1, t);
2442
3.56M
  s1 = ixheaac_add32_sat(s1, t);
2443
2444
3.56M
  t = ixheaac_mult32_shl(ixheaac_add32_sat(s4, s2), c_51);
2445
3.56M
  s4 = ixheaac_add32_sat(
2446
3.56M
      t, ixheaac_shl32_sat((ixheaac_mult32_shl(s4, c_52)), 1));
2447
3.56M
  s2 = ixheaac_add32_sat(t, (ixheaac_mult32_shl(s2, c_53)));
2448
2449
3.56M
  op[0] = temp1;
2450
3.56M
  op[1] = temp2;
2451
3.56M
  op[2] = ixheaac_add32_sat(r1, s2);
2452
3.56M
  op[3] = ixheaac_sub32_sat(s1, r2);
2453
3.56M
  op[4] = ixheaac_sub32_sat(r3, s4);
2454
3.56M
  op[5] = ixheaac_add32_sat(s3, r4);
2455
3.56M
  op[6] = ixheaac_add32_sat(r3, s4);
2456
3.56M
  op[7] = ixheaac_sub32_sat(s3, r4);
2457
3.56M
  op[8] = ixheaac_sub32_sat(r1, s2);
2458
3.56M
  op[9] = ixheaac_add32_sat(s1, r2);
2459
3.56M
}
2460
2461
VOID ixheaacd_fft_480_ld(WORD32 *inp, WORD32 *op,
2462
48.5k
                         ia_aac_dec_imdct_tables_struct *imdct_tables_ptr) {
2463
48.5k
  WORD32 i;
2464
48.5k
  WORD32 *buf1, *buf2;
2465
48.5k
  UWORD8 *re_arr_tab_sml_240_ptr;
2466
2467
48.5k
  (*ixheaacd_aac_ld_dec_rearrange)(inp, op, MDCT_LEN_BY2,
2468
48.5k
                                   imdct_tables_ptr->re_arr_tab_16);
2469
2470
48.5k
  buf1 = op;
2471
48.5k
  buf2 = inp;
2472
2473
776k
  for (i = 0; i < FFT15; i++) {
2474
727k
    (*ixheaacd_fft32x32_ld2)(imdct_tables_ptr, 16, buf1, buf2);
2475
2476
727k
    buf1 += (FFT16X2);
2477
727k
    buf2 += (FFT16X2);
2478
727k
  }
2479
48.5k
  re_arr_tab_sml_240_ptr = imdct_tables_ptr->re_arr_tab_sml_240;
2480
48.5k
  buf1 = inp;
2481
2482
824k
  for (i = 0; i < FFT16; i++) {
2483
776k
    (*ixheaacd_fft_15_ld)(buf1, op, ixheaacd_fft5out, re_arr_tab_sml_240_ptr);
2484
776k
    re_arr_tab_sml_240_ptr += FFT15;
2485
776k
    buf1 += 2;
2486
776k
  }
2487
48.5k
}
2488
2489
VOID ixheaacd_pre_twiddle_960(WORD32 *xptr, WORD32 *data, WORD32 n,
2490
43.9k
                              WORD32 *cos_sin_ptr, WORD32 neg_expo) {
2491
43.9k
  WORD npoints_4, i;
2492
43.9k
  WORD32 tempr, tempi, temp;
2493
43.9k
  WORD32 c, c1, s, s1;
2494
43.9k
  WORD32 *in_ptr1, *in_ptr2;
2495
43.9k
  WORD32 *xprt1 = xptr + (n - 1);
2496
2497
43.9k
  npoints_4 = n >> 2;
2498
2499
43.9k
  in_ptr1 = data;
2500
43.9k
  in_ptr2 = data + n - 1;
2501
2502
10.5M
  for (i = 0; i < npoints_4; i++) {
2503
10.5M
    c = *cos_sin_ptr++;
2504
10.5M
    s = *cos_sin_ptr++;
2505
2506
10.5M
    tempr = *in_ptr1++;
2507
10.5M
    tempi = *in_ptr2--;
2508
2509
10.5M
    temp = -ixheaac_add32(ixheaac_mult32x32in32(tempr, c),
2510
10.5M
                           ixheaac_mult32x32in32(tempi, s));
2511
10.5M
    *xptr++ = ixheaac_shr32_dir_sat(temp, neg_expo);
2512
2513
10.5M
    temp = -ixheaac_sub32(ixheaac_mult32x32in32(tempi, c),
2514
10.5M
                           ixheaac_mult32x32in32(tempr, s));
2515
10.5M
    *xptr++ = ixheaac_shr32_dir_sat(temp, neg_expo);
2516
2517
10.5M
    c1 = *cos_sin_ptr++;
2518
10.5M
    s1 = *cos_sin_ptr++;
2519
2520
10.5M
    tempi = *in_ptr1++;
2521
10.5M
    tempr = *in_ptr2--;
2522
2523
10.5M
    temp = -ixheaac_sub32(ixheaac_mult32x32in32(tempi, c1),
2524
10.5M
                           ixheaac_mult32x32in32(tempr, s1));
2525
10.5M
    *xprt1-- = ixheaac_shr32_dir_sat(temp, neg_expo);
2526
2527
10.5M
    temp = -ixheaac_add32(ixheaac_mult32x32in32(tempr, c1),
2528
10.5M
                           ixheaac_mult32x32in32(tempi, s1));
2529
10.5M
    *xprt1-- = ixheaac_shr32_dir_sat(temp, neg_expo);
2530
10.5M
  }
2531
43.9k
}
2532
2533
VOID ixheaacd_pre_twiddle_120(WORD32 *xptr, WORD32 *data, WORD32 n,
2534
297k
                              WORD16 *cos_sin_ptr, WORD32 neg_expo) {
2535
297k
  WORD npoints_4, i;
2536
297k
  WORD32 tempr, tempi, temp;
2537
297k
  WORD16 c, c1, s, s1;
2538
297k
  WORD32 *in_ptr1, *in_ptr2;
2539
297k
  WORD32 *xprt1 = xptr + (n - 1);
2540
2541
297k
  npoints_4 = n >> 2;
2542
2543
297k
  in_ptr1 = data;
2544
297k
  in_ptr2 = data + n - 1;
2545
2546
9.21M
  for (i = 0; i < npoints_4; i++) {
2547
8.92M
    c = *cos_sin_ptr++;
2548
8.92M
    s = *cos_sin_ptr++;
2549
2550
8.92M
    tempr = *in_ptr1++;
2551
8.92M
    tempi = *in_ptr2--;
2552
2553
8.92M
    temp = -ixheaac_add32(ixheaac_mult32x16in32(tempr, c),
2554
8.92M
                           ixheaac_mult32x16in32(tempi, s));
2555
8.92M
    *xptr++ = ixheaac_shr32_dir_sat(temp, neg_expo);
2556
2557
8.92M
    temp = -ixheaac_sub32(ixheaac_mult32x16in32(tempi, c),
2558
8.92M
                           ixheaac_mult32x16in32(tempr, s));
2559
8.92M
    *xptr++ = ixheaac_shr32_dir_sat(temp, neg_expo);
2560
2561
8.92M
    c1 = *cos_sin_ptr++;
2562
8.92M
    s1 = *cos_sin_ptr++;
2563
2564
8.92M
    tempi = *in_ptr1++;
2565
8.92M
    tempr = *in_ptr2--;
2566
2567
8.92M
    temp = -ixheaac_sub32(ixheaac_mult32x16in32(tempi, c1),
2568
8.92M
                           ixheaac_mult32x16in32(tempr, s1));
2569
8.92M
    *xprt1-- = ixheaac_shr32_dir_sat(temp, neg_expo);
2570
2571
8.92M
    temp = -ixheaac_add32(ixheaac_mult32x16in32(tempr, c1),
2572
8.92M
                           ixheaac_mult32x16in32(tempi, s1));
2573
8.92M
    *xprt1-- = ixheaac_shr32_dir_sat(temp, neg_expo);
2574
8.92M
  }
2575
297k
}
2576
2577
VOID ixheaacd_pre_twiddle(WORD32 *xptr, WORD32 *data, WORD32 n,
2578
144k
                          WORD32 *cos_sin_ptr, WORD32 neg_expo) {
2579
144k
  WORD npoints_4, i;
2580
144k
  WORD32 tempr, tempi, temp;
2581
144k
  WORD32 c, c1, s, s1;
2582
144k
  WORD32 *in_ptr1, *in_ptr2;
2583
2584
144k
  npoints_4 = n >> 2;
2585
2586
144k
  in_ptr1 = data;
2587
144k
  in_ptr2 = data + n - 1;
2588
2589
144k
  if (neg_expo >= 0) {
2590
4.22M
    for (i = npoints_4 - 1; i >= 0; i--) {
2591
4.19M
      c = *cos_sin_ptr++;
2592
4.19M
      c1 = *cos_sin_ptr++;
2593
4.19M
      s = *cos_sin_ptr++;
2594
4.19M
      s1 = *cos_sin_ptr++;
2595
2596
4.19M
      tempr = *in_ptr1;
2597
4.19M
      tempi = *in_ptr2;
2598
2599
4.19M
      in_ptr1 += 2;
2600
4.19M
      in_ptr2 -= 2;
2601
2602
4.19M
      temp =
2603
4.19M
          -ixheaac_add32(ixheaac_mult32(tempr, c), ixheaac_mult32(tempi, s));
2604
4.19M
      *xptr++ = ixheaac_shr32(temp, neg_expo);
2605
2606
4.19M
      temp =
2607
4.19M
          ixheaac_sub32(ixheaac_mult32(tempr, s), ixheaac_mult32(tempi, c));
2608
4.19M
      *xptr++ = ixheaac_shr32(temp, neg_expo);
2609
2610
4.19M
      tempr = *in_ptr1;
2611
4.19M
      tempi = *in_ptr2;
2612
2613
4.19M
      in_ptr1 += 2;
2614
4.19M
      in_ptr2 -= 2;
2615
2616
4.19M
      temp = -ixheaac_add32(ixheaac_mult32(tempr, c1),
2617
4.19M
                             ixheaac_mult32(tempi, s1));
2618
4.19M
      *xptr++ = ixheaac_shr32(temp, neg_expo);
2619
2620
4.19M
      temp = ixheaac_sub32(ixheaac_mult32(tempr, s1),
2621
4.19M
                            ixheaac_mult32(tempi, c1));
2622
4.19M
      *xptr++ = ixheaac_shr32(temp, neg_expo);
2623
4.19M
    }
2624
110k
  } else {
2625
110k
    neg_expo = -neg_expo;
2626
2627
14.0M
    for (i = npoints_4 - 1; i >= 0; i--) {
2628
13.9M
      c = *cos_sin_ptr++;
2629
13.9M
      c1 = *cos_sin_ptr++;
2630
13.9M
      s = *cos_sin_ptr++;
2631
13.9M
      s1 = *cos_sin_ptr++;
2632
2633
13.9M
      tempr = *in_ptr1;
2634
13.9M
      tempi = *in_ptr2;
2635
2636
13.9M
      in_ptr1 += 2;
2637
13.9M
      in_ptr2 -= 2;
2638
2639
13.9M
      temp =
2640
13.9M
          -ixheaac_add32(ixheaac_mult32(tempr, c), ixheaac_mult32(tempi, s));
2641
13.9M
      *xptr++ = ixheaac_shl32(temp, neg_expo);
2642
2643
13.9M
      temp =
2644
13.9M
          ixheaac_sub32(ixheaac_mult32(tempr, s), ixheaac_mult32(tempi, c));
2645
13.9M
      *xptr++ = ixheaac_shl32(temp, neg_expo);
2646
2647
13.9M
      tempr = *in_ptr1;
2648
13.9M
      tempi = *in_ptr2;
2649
2650
13.9M
      in_ptr1 += 2;
2651
13.9M
      in_ptr2 -= 2;
2652
2653
13.9M
      temp = -ixheaac_add32(ixheaac_mult32(tempr, c1),
2654
13.9M
                             ixheaac_mult32(tempi, s1));
2655
13.9M
      *xptr++ = ixheaac_shl32(temp, neg_expo);
2656
2657
13.9M
      temp = ixheaac_sub32(ixheaac_mult32(tempr, s1),
2658
13.9M
                            ixheaac_mult32(tempi, c1));
2659
13.9M
      *xptr++ = ixheaac_shl32(temp, neg_expo);
2660
13.9M
    }
2661
110k
  }
2662
144k
}
2663
2664
VOID ixheaacd_post_twiddle_120(WORD32 out[], WORD32 x[],
2665
297k
                               const WORD16 *cos_sin_ptr, WORD m) {
2666
297k
  WORD i;
2667
297k
  WORD16 c, c1, s, s1;
2668
297k
  WORD32 tempr, tempi, temp;
2669
297k
  WORD32 *in_ptr2 = x + (m - 1);
2670
297k
  WORD32 *in_ptr1 = x;
2671
297k
  WORD32 *xptr = out;
2672
297k
  WORD32 *xptr1 = out + (m - 1);
2673
2674
9.21M
  for (i = 0; i < m; i += 4) {
2675
8.92M
    c = *cos_sin_ptr++;
2676
8.92M
    s = *cos_sin_ptr++;
2677
8.92M
    c1 = *cos_sin_ptr++;
2678
8.92M
    s1 = *cos_sin_ptr++;
2679
2680
8.92M
    tempr = *in_ptr1++;
2681
8.92M
    tempi = *in_ptr1++;
2682
2683
8.92M
    temp = -ixheaac_sub32_sat(ixheaac_mult32x16in32(tempr, s),
2684
8.92M
                               ixheaac_mult32x16in32(tempi, c));
2685
8.92M
    *xptr1-- = temp;
2686
2687
8.92M
    temp = -ixheaac_add32_sat(ixheaac_mult32x16in32(tempr, c),
2688
8.92M
                               ixheaac_mult32x16in32(tempi, s));
2689
8.92M
    *xptr++ = temp;
2690
2691
8.92M
    tempi = *in_ptr2--;
2692
8.92M
    tempr = *in_ptr2--;
2693
2694
8.92M
    temp = -ixheaac_sub32_sat(ixheaac_mult32x16in32(tempr, s1),
2695
8.92M
                               ixheaac_mult32x16in32(tempi, c1));
2696
8.92M
    *xptr++ = temp;
2697
2698
8.92M
    temp = -ixheaac_add32_sat(ixheaac_mult32x16in32(tempr, c1),
2699
8.92M
                               ixheaac_mult32x16in32(tempi, s1));
2700
8.92M
    *xptr1-- = temp;
2701
8.92M
  }
2702
297k
}
2703
2704
VOID ixheaacd_post_twiddle_960(WORD32 out[], WORD32 x[],
2705
43.9k
                               const WORD32 *cos_sin_ptr, WORD m) {
2706
43.9k
  WORD i;
2707
43.9k
  WORD32 c, c1, s, s1;
2708
43.9k
  WORD32 tempr, tempi, temp;
2709
43.9k
  WORD32 *in_ptr2 = x + (m - 1);
2710
43.9k
  WORD32 *in_ptr1 = x;
2711
43.9k
  WORD32 *xptr = out;
2712
43.9k
  WORD32 *xptr1 = out + (m - 1);
2713
2714
10.5M
  for (i = 0; i < m; i += 4) {
2715
10.5M
    c = *cos_sin_ptr++;
2716
10.5M
    s = *cos_sin_ptr++;
2717
10.5M
    c1 = *cos_sin_ptr++;
2718
10.5M
    s1 = *cos_sin_ptr++;
2719
2720
10.5M
    tempr = *in_ptr1++;
2721
10.5M
    tempi = *in_ptr1++;
2722
2723
10.5M
    temp = -ixheaac_sub32_sat(ixheaac_mult32x32in32(tempr, s),
2724
10.5M
                               ixheaac_mult32x32in32(tempi, c));
2725
10.5M
    *xptr1-- = temp;
2726
2727
10.5M
    temp = -ixheaac_add32_sat(ixheaac_mult32x32in32(tempr, c),
2728
10.5M
                               ixheaac_mult32x32in32(tempi, s));
2729
10.5M
    *xptr++ = temp;
2730
2731
10.5M
    tempi = *in_ptr2--;
2732
10.5M
    tempr = *in_ptr2--;
2733
2734
10.5M
    temp = -ixheaac_sub32_sat(ixheaac_mult32x32in32(tempr, s1),
2735
10.5M
                               ixheaac_mult32x32in32(tempi, c1));
2736
10.5M
    *xptr++ = temp;
2737
2738
10.5M
    temp = -ixheaac_add32_sat(ixheaac_mult32x32in32(tempr, c1),
2739
10.5M
                               ixheaac_mult32x32in32(tempi, s1));
2740
10.5M
    *xptr1-- = temp;
2741
10.5M
  }
2742
43.9k
}
2743
2744
VOID ixheaacd_post_twiddle_ld(WORD32 out[], WORD32 x[],
2745
81.6k
                              const WORD32 *cos_sin_ptr, WORD m) {
2746
81.6k
  WORD i;
2747
2748
81.6k
  WORD32 *ptr_x = &x[0];
2749
81.6k
  WORD32 *ptr_out, *ptr_out1;
2750
2751
81.6k
  ptr_out = &out[0];
2752
81.6k
  ptr_out1 = &out[m - 1];
2753
2754
10.3M
  for (i = (m >> 2) - 1; i >= 0; i--) {
2755
10.3M
    WORD32 c, c1, s, s1;
2756
10.3M
    WORD32 re, im;
2757
2758
10.3M
    c = *cos_sin_ptr++;
2759
10.3M
    c1 = *cos_sin_ptr++;
2760
10.3M
    s = *cos_sin_ptr++;
2761
10.3M
    s1 = *cos_sin_ptr++;
2762
2763
10.3M
    re = *ptr_x++;
2764
10.3M
    im = *ptr_x++;
2765
2766
10.3M
    *ptr_out1 = ixheaac_sub32(ixheaac_mult32(im, c), ixheaac_mult32(re, s));
2767
2768
10.3M
    *ptr_out = -ixheaac_add32(ixheaac_mult32(re, c), ixheaac_mult32(im, s));
2769
2770
10.3M
    ptr_out += 2;
2771
10.3M
    ptr_out1 -= 2;
2772
2773
10.3M
    re = *ptr_x++;
2774
10.3M
    im = *ptr_x++;
2775
2776
10.3M
    *ptr_out1 =
2777
10.3M
        ixheaac_sub32(ixheaac_mult32(im, c1), ixheaac_mult32(re, s1));
2778
10.3M
    *ptr_out =
2779
10.3M
        -ixheaac_add32(ixheaac_mult32(re, c1), ixheaac_mult32(im, s1));
2780
2781
10.3M
    ptr_out += 2;
2782
10.3M
    ptr_out1 -= 2;
2783
10.3M
  }
2784
81.6k
}
2785
2786
VOID ixheaacd_post_twiddle_eld(WORD32 out[], WORD32 x[],
2787
62.8k
                               const WORD32 *cos_sin_ptr, WORD m) {
2788
62.8k
  WORD i = 0;
2789
2790
62.8k
  WORD32 *ptr_x = &x[0];
2791
62.8k
  WORD32 *ptr_out_767, *ptr_out_256;
2792
62.8k
  WORD32 *ptr_out_768, *ptr_out_255;
2793
62.8k
  WORD32 *ptr_out_0, *ptr_out_1279;
2794
62.8k
  WORD32 tempr, tempi;
2795
2796
62.8k
  ptr_out_767 = &out[m + (m >> 1) - 1 - 2 * i];
2797
62.8k
  ptr_out_256 = &out[(m >> 1) + 2 * i];
2798
2799
62.8k
  ptr_out_768 = &out[m + (m >> 1) + 2 * i];
2800
62.8k
  ptr_out_255 = &out[(m >> 1) - 1 - 2 * i];
2801
2802
3.96M
  for (i = 0; i < (m >> 3); i++) {
2803
3.90M
    WORD32 c, c1, s, s1;
2804
3.90M
    WORD32 re, im;
2805
2806
3.90M
    c = *cos_sin_ptr++;
2807
3.90M
    c1 = *cos_sin_ptr++;
2808
3.90M
    s = *cos_sin_ptr++;
2809
3.90M
    s1 = *cos_sin_ptr++;
2810
2811
3.90M
    re = *ptr_x++;
2812
3.90M
    im = *ptr_x++;
2813
2814
3.90M
    tempi = ixheaac_sub32(ixheaac_mult32(im, c), ixheaac_mult32(re, s));
2815
3.90M
    tempr = -ixheaac_add32(ixheaac_mult32(re, c), ixheaac_mult32(im, s));
2816
2817
3.90M
    *ptr_out_767 = tempr;
2818
3.90M
    *ptr_out_256 = tempi;
2819
2820
3.90M
    *ptr_out_768 = *ptr_out_767;
2821
3.90M
    *ptr_out_255 = -*ptr_out_256;
2822
2823
3.90M
    ptr_out_256 += 2;
2824
3.90M
    ptr_out_767 -= 2;
2825
3.90M
    ptr_out_768 += 2;
2826
3.90M
    ptr_out_255 -= 2;
2827
2828
3.90M
    re = *ptr_x++;
2829
3.90M
    im = *ptr_x++;
2830
2831
3.90M
    tempi = ixheaac_sub32(ixheaac_mult32(im, c1), ixheaac_mult32(re, s1));
2832
3.90M
    tempr = -ixheaac_add32(ixheaac_mult32(re, c1), ixheaac_mult32(im, s1));
2833
2834
3.90M
    *ptr_out_767 = tempr;
2835
3.90M
    *ptr_out_256 = tempi;
2836
2837
3.90M
    *ptr_out_768 = *ptr_out_767;
2838
3.90M
    *ptr_out_255 = -*ptr_out_256;
2839
2840
3.90M
    ptr_out_256 += 2;
2841
3.90M
    ptr_out_767 -= 2;
2842
3.90M
    ptr_out_768 += 2;
2843
3.90M
    ptr_out_255 -= 2;
2844
3.90M
  }
2845
2846
62.8k
  ptr_out_0 = &out[2 * 2 * i - (m >> 1)];
2847
62.8k
  ptr_out_1279 = &out[m + m + (m >> 1) - 1 - 2 * 2 * i];
2848
2849
3.96M
  for (; i < (m >> 2); i++) {
2850
3.90M
    WORD32 c, c1, s, s1;
2851
3.90M
    WORD32 re, im;
2852
2853
3.90M
    c = *cos_sin_ptr++;
2854
3.90M
    c1 = *cos_sin_ptr++;
2855
3.90M
    s = *cos_sin_ptr++;
2856
3.90M
    s1 = *cos_sin_ptr++;
2857
2858
3.90M
    re = *ptr_x++;
2859
3.90M
    im = *ptr_x++;
2860
2861
3.90M
    tempi = ixheaac_sub32(ixheaac_mult32(im, c), ixheaac_mult32(re, s));
2862
3.90M
    tempr = -ixheaac_add32(ixheaac_mult32(re, c), ixheaac_mult32(im, s));
2863
2864
3.90M
    *ptr_out_767 = tempr;
2865
3.90M
    *ptr_out_256 = tempi;
2866
2867
3.90M
    *ptr_out_0 = -*ptr_out_767;
2868
3.90M
    *ptr_out_1279 = *ptr_out_256;
2869
2870
3.90M
    ptr_out_256 += 2;
2871
3.90M
    ptr_out_767 -= 2;
2872
3.90M
    ptr_out_0 += 2;
2873
3.90M
    ptr_out_1279 -= 2;
2874
2875
3.90M
    re = *ptr_x++;
2876
3.90M
    im = *ptr_x++;
2877
2878
3.90M
    tempi = ixheaac_sub32(ixheaac_mult32(im, c1), ixheaac_mult32(re, s1));
2879
3.90M
    tempr = -ixheaac_add32(ixheaac_mult32(re, c1), ixheaac_mult32(im, s1));
2880
2881
3.90M
    *ptr_out_767 = tempr;
2882
3.90M
    *ptr_out_256 = tempi;
2883
2884
3.90M
    *ptr_out_0 = -*ptr_out_767;
2885
3.90M
    *ptr_out_1279 = *ptr_out_256;
2886
2887
3.90M
    ptr_out_256 += 2;
2888
3.90M
    ptr_out_767 -= 2;
2889
3.90M
    ptr_out_0 += 2;
2890
3.90M
    ptr_out_1279 -= 2;
2891
3.90M
  }
2892
62.8k
}
2893
2894
VOID ixheaacd_fft32x32_ld_dec(ia_aac_dec_imdct_tables_struct *imdct_tables_ptr,
2895
823k
                              WORD32 npoints, WORD32 *ptr_x, WORD32 *ptr_y) {
2896
823k
  WORD32 i, j, l1, l2, h2, predj, tw_offset, stride, fft_jmp, k;
2897
823k
  WORD32 xt0_0, yt0_0, xt1_0, yt1_0, xt2_0, yt2_0;
2898
823k
  WORD32 xh0_0, xh1_0, xh20_0, xh21_0, xl0_0, xl1_0, xl20_0, xl21_0;
2899
823k
  WORD32 xh0_1, xh1_1, xl0_1, xl1_1;
2900
823k
  WORD32 x_0, x_1, x_2, x_3, x_l1_0, x_l1_1, x_l2_0, x_l2_1;
2901
823k
  WORD32 xh0_2, xh1_2, xl0_2, xl1_2, xh0_3, xh1_3, xl0_3, xl1_3;
2902
823k
  WORD32 x_4, x_5, x_6, x_7, x_h2_0, x_h2_1;
2903
823k
  WORD32 x_8, x_9, x_a, x_b, x_c, x_d, x_e, x_f;
2904
823k
  WORD32 si10, si20, si30, co10, co20, co30;
2905
823k
  WORD32 *w;
2906
823k
  WORD32 *x, *x2, *x0;
2907
823k
  WORD32 *y0, *y1, *y2, *y3;
2908
823k
  WORD32 n00, n10, n20, n30, n01, n11, n21, n31;
2909
823k
  WORD32 n02, n12, n22, n32, n03, n13, n23, n33;
2910
823k
  WORD32 n0, j0;
2911
823k
  WORD32 radix;
2912
823k
  WORD32 norm;
2913
823k
  WORD32 m;
2914
823k
  WORD32 *ptr_w;
2915
2916
823k
  if (npoints == 256)
2917
95.9k
    ptr_w = imdct_tables_ptr->w_256;
2918
727k
  else if (npoints == 32)
2919
0
    ptr_w = (WORD32*)imdct_tables_ptr->w_32;
2920
727k
  else
2921
727k
    ptr_w = imdct_tables_ptr->w_16;
2922
2923
22.6M
  for (i = 31, m = 1; (npoints & (1 << i)) == 0; i--, m++)
2924
21.8M
    ;
2925
823k
  radix = m & 1 ? 2 : 4;
2926
823k
  norm = m - 2;
2927
2928
823k
  stride = npoints;
2929
823k
  tw_offset = 0;
2930
823k
  fft_jmp = 6 * stride;
2931
2932
1.83M
  while (stride > radix) {
2933
1.01M
    j = 0;
2934
1.01M
    fft_jmp >>= 2;
2935
2936
1.01M
    h2 = stride >> 1;
2937
1.01M
    l1 = stride;
2938
1.01M
    l2 = stride + (stride >> 1);
2939
2940
1.01M
    x = ptr_x;
2941
1.01M
    w = ptr_w + tw_offset;
2942
1.01M
    tw_offset += fft_jmp;
2943
2944
1.01M
    stride >>= 2;
2945
2946
22.3M
    for (i = 0; i < npoints; i += 4) {
2947
21.3M
      co10 = w[j + 1];
2948
21.3M
      si10 = w[j + 0];
2949
21.3M
      co20 = w[j + 3];
2950
21.3M
      si20 = w[j + 2];
2951
21.3M
      co30 = w[j + 5];
2952
21.3M
      si30 = w[j + 4];
2953
2954
21.3M
      x_0 = x[0];
2955
21.3M
      x_1 = x[1];
2956
21.3M
      x_l1_0 = x[l1];
2957
21.3M
      x_l1_1 = x[l1 + 1];
2958
21.3M
      x_l2_0 = x[l2];
2959
21.3M
      x_l2_1 = x[l2 + 1];
2960
21.3M
      x_h2_0 = x[h2];
2961
21.3M
      x_h2_1 = x[h2 + 1];
2962
2963
21.3M
      xh0_0 = ixheaac_add32_sat(x_0, x_l1_0);
2964
21.3M
      xh1_0 = ixheaac_add32_sat(x_1, x_l1_1);
2965
21.3M
      xl0_0 = ixheaac_sub32_sat(x_0, x_l1_0);
2966
21.3M
      xl1_0 = ixheaac_sub32_sat(x_1, x_l1_1);
2967
21.3M
      xh20_0 = ixheaac_add32_sat(x_h2_0, x_l2_0);
2968
21.3M
      xh21_0 = ixheaac_add32_sat(x_h2_1, x_l2_1);
2969
21.3M
      xl20_0 = ixheaac_sub32_sat(x_h2_0, x_l2_0);
2970
21.3M
      xl21_0 = ixheaac_sub32_sat(x_h2_1, x_l2_1);
2971
2972
21.3M
      x0 = x;
2973
21.3M
      x2 = x0;
2974
2975
21.3M
      j += 6;
2976
21.3M
      x += 2;
2977
21.3M
      predj = (j - fft_jmp);
2978
21.3M
      if (!predj) x += fft_jmp;
2979
21.3M
      if (!predj) j = 0;
2980
2981
21.3M
      x0[0] = ixheaac_add32_sat(xh0_0, xh20_0);
2982
21.3M
      x0[1] = ixheaac_add32_sat(xh1_0, xh21_0);
2983
21.3M
      xt0_0 = ixheaac_sub32_sat(xh0_0, xh20_0);
2984
21.3M
      yt0_0 = ixheaac_sub32_sat(xh1_0, xh21_0);
2985
21.3M
      xt1_0 = ixheaac_add32_sat(xl0_0, xl21_0);
2986
21.3M
      yt2_0 = ixheaac_add32_sat(xl1_0, xl20_0);
2987
21.3M
      xt2_0 = ixheaac_sub32_sat(xl0_0, xl21_0);
2988
21.3M
      yt1_0 = ixheaac_sub32_sat(xl1_0, xl20_0);
2989
2990
21.3M
      x2[h2] =
2991
21.3M
          MPYHIRC(si10, yt1_0) + MPYHIRC(co10, xt1_0) +
2992
21.3M
          (((MPYLUHS(si10, yt1_0) + MPYLUHS(co10, xt1_0) + 0x8000) >> 16) << 1);
2993
2994
21.3M
      x2[h2 + 1] =
2995
21.3M
          MPYHIRC(co10, yt1_0) - MPYHIRC(si10, xt1_0) +
2996
21.3M
          (((MPYLUHS(co10, yt1_0) - MPYLUHS(si10, xt1_0) + 0x8000) >> 16) << 1);
2997
2998
21.3M
      x2[l1] =
2999
21.3M
          MPYHIRC(si20, yt0_0) + MPYHIRC(co20, xt0_0) +
3000
21.3M
          (((MPYLUHS(si20, yt0_0) + MPYLUHS(co20, xt0_0) + 0x8000) >> 16) << 1);
3001
3002
21.3M
      x2[l1 + 1] =
3003
21.3M
          MPYHIRC(co20, yt0_0) - MPYHIRC(si20, xt0_0) +
3004
21.3M
          (((MPYLUHS(co20, yt0_0) - MPYLUHS(si20, xt0_0) + 0x8000) >> 16) << 1);
3005
3006
21.3M
      x2[l2] =
3007
21.3M
          MPYHIRC(si30, yt2_0) + MPYHIRC(co30, xt2_0) +
3008
21.3M
          (((MPYLUHS(si30, yt2_0) + MPYLUHS(co30, xt2_0) + 0x8000) >> 16) << 1);
3009
3010
21.3M
      x2[l2 + 1] =
3011
21.3M
          MPYHIRC(co30, yt2_0) - MPYHIRC(si30, xt2_0) +
3012
21.3M
          (((MPYLUHS(co30, yt2_0) - MPYLUHS(si30, xt2_0) + 0x8000) >> 16) << 1);
3013
21.3M
    }
3014
1.01M
  }
3015
3016
823k
  y0 = ptr_y;
3017
823k
  y2 = ptr_y + (WORD32)npoints;
3018
823k
  x0 = ptr_x;
3019
823k
  x2 = ptr_x + (WORD32)(npoints >> 1);
3020
3021
823k
  if (radix == 2) {
3022
0
    y1 = y0 + (WORD32)(npoints >> 2);
3023
0
    y3 = y2 + (WORD32)(npoints >> 2);
3024
0
    l1 = norm + 1;
3025
0
    j0 = 8;
3026
0
    n0 = npoints >> 1;
3027
823k
  } else {
3028
823k
    y1 = y0 + (WORD32)(npoints >> 1);
3029
823k
    y3 = y2 + (WORD32)(npoints >> 1);
3030
823k
    l1 = norm + 2;
3031
823k
    j0 = 4;
3032
823k
    n0 = npoints >> 2;
3033
823k
  }
3034
3035
823k
  j = 0;
3036
823k
  k = 0;
3037
3038
5.35M
  for (i = 0; i < npoints; i += 8) {
3039
4.52M
    if (npoints == 32)
3040
0
      h2 = rev_dig[k++];
3041
4.52M
    else
3042
4.52M
      DIG_REV(j, l1, h2);
3043
3044
4.52M
    x_0 = x0[0];
3045
4.52M
    x_1 = x0[1];
3046
4.52M
    x_2 = x0[2];
3047
4.52M
    x_3 = x0[3];
3048
4.52M
    x_4 = x0[4];
3049
4.52M
    x_5 = x0[5];
3050
4.52M
    x_6 = x0[6];
3051
4.52M
    x_7 = x0[7];
3052
4.52M
    x0 += 8;
3053
3054
4.52M
    xh0_0 = ixheaac_add32_sat(x_0, x_4);
3055
4.52M
    xh1_0 = ixheaac_add32_sat(x_1, x_5);
3056
4.52M
    xl0_0 = ixheaac_sub32_sat(x_0, x_4);
3057
4.52M
    xl1_0 = ixheaac_sub32_sat(x_1, x_5);
3058
4.52M
    xh0_1 = ixheaac_add32_sat(x_2, x_6);
3059
4.52M
    xh1_1 = ixheaac_add32_sat(x_3, x_7);
3060
4.52M
    xl0_1 = ixheaac_sub32_sat(x_2, x_6);
3061
4.52M
    xl1_1 = ixheaac_sub32_sat(x_3, x_7);
3062
3063
4.52M
    n00 = ixheaac_add32_sat(xh0_0, xh0_1);
3064
4.52M
    n01 = ixheaac_add32_sat(xh1_0, xh1_1);
3065
4.52M
    n10 = ixheaac_add32_sat(xl0_0, xl1_1);
3066
4.52M
    n11 = ixheaac_sub32_sat(xl1_0, xl0_1);
3067
4.52M
    n20 = ixheaac_sub32_sat(xh0_0, xh0_1);
3068
4.52M
    n21 = ixheaac_sub32_sat(xh1_0, xh1_1);
3069
4.52M
    n30 = ixheaac_sub32_sat(xl0_0, xl1_1);
3070
4.52M
    n31 = ixheaac_add32_sat(xl1_0, xl0_1);
3071
3072
4.52M
    if (radix == 2) {
3073
0
      n00 = ixheaac_add32_sat(x_0, x_2);
3074
0
      n01 = ixheaac_add32_sat(x_1, x_3);
3075
0
      n20 = ixheaac_sub32_sat(x_0, x_2);
3076
0
      n21 = ixheaac_sub32_sat(x_1, x_3);
3077
0
      n10 = ixheaac_add32_sat(x_4, x_6);
3078
0
      n11 = ixheaac_add32_sat(x_5, x_7);
3079
0
      n30 = ixheaac_sub32_sat(x_4, x_6);
3080
0
      n31 = ixheaac_sub32_sat(x_5, x_7);
3081
0
    }
3082
3083
4.52M
    y0[2 * h2] = n00;
3084
4.52M
    y0[2 * h2 + 1] = n01;
3085
4.52M
    y1[2 * h2] = n10;
3086
4.52M
    y1[2 * h2 + 1] = n11;
3087
4.52M
    y2[2 * h2] = n20;
3088
4.52M
    y2[2 * h2 + 1] = n21;
3089
4.52M
    y3[2 * h2] = n30;
3090
4.52M
    y3[2 * h2 + 1] = n31;
3091
3092
4.52M
    x_8 = x2[0];
3093
4.52M
    x_9 = x2[1];
3094
4.52M
    x_a = x2[2];
3095
4.52M
    x_b = x2[3];
3096
4.52M
    x_c = x2[4];
3097
4.52M
    x_d = x2[5];
3098
4.52M
    x_e = x2[6];
3099
4.52M
    x_f = x2[7];
3100
4.52M
    x2 += 8;
3101
3102
4.52M
    xh0_2 = ixheaac_add32_sat(x_8, x_c);
3103
4.52M
    xh1_2 = ixheaac_add32_sat(x_9, x_d);
3104
4.52M
    xl0_2 = ixheaac_sub32_sat(x_8, x_c);
3105
4.52M
    xl1_2 = ixheaac_sub32_sat(x_9, x_d);
3106
4.52M
    xh0_3 = ixheaac_add32_sat(x_a, x_e);
3107
4.52M
    xh1_3 = ixheaac_add32_sat(x_b, x_f);
3108
4.52M
    xl0_3 = ixheaac_sub32_sat(x_a, x_e);
3109
4.52M
    xl1_3 = ixheaac_sub32_sat(x_b, x_f);
3110
3111
4.52M
    n02 = ixheaac_add32_sat(xh0_2, xh0_3);
3112
4.52M
    n03 = ixheaac_add32_sat(xh1_2, xh1_3);
3113
4.52M
    n12 = ixheaac_add32_sat(xl0_2, xl1_3);
3114
4.52M
    n13 = ixheaac_sub32_sat(xl1_2, xl0_3);
3115
4.52M
    n22 = ixheaac_sub32_sat(xh0_2, xh0_3);
3116
4.52M
    n23 = ixheaac_sub32_sat(xh1_2, xh1_3);
3117
4.52M
    n32 = ixheaac_sub32_sat(xl0_2, xl1_3);
3118
4.52M
    n33 = ixheaac_add32_sat(xl1_2, xl0_3);
3119
3120
4.52M
    if (radix == 2) {
3121
0
      n02 = ixheaac_add32_sat(x_8, x_a);
3122
0
      n03 = ixheaac_add32_sat(x_9, x_b);
3123
0
      n22 = ixheaac_sub32_sat(x_8, x_a);
3124
0
      n23 = ixheaac_sub32_sat(x_9, x_b);
3125
0
      n12 = ixheaac_add32_sat(x_c, x_e);
3126
0
      n13 = ixheaac_add32_sat(x_d, x_f);
3127
0
      n32 = ixheaac_sub32_sat(x_c, x_e);
3128
0
      n33 = ixheaac_sub32_sat(x_d, x_f);
3129
0
    }
3130
3131
4.52M
    y0[2 * h2 + 2] = n02;
3132
4.52M
    y0[2 * h2 + 3] = n03;
3133
4.52M
    y1[2 * h2 + 2] = n12;
3134
4.52M
    y1[2 * h2 + 3] = n13;
3135
4.52M
    y2[2 * h2 + 2] = n22;
3136
4.52M
    y2[2 * h2 + 3] = n23;
3137
4.52M
    y3[2 * h2 + 2] = n32;
3138
4.52M
    y3[2 * h2 + 3] = n33;
3139
3140
4.52M
    j += j0;
3141
3142
4.52M
    if (j == n0) {
3143
823k
      j += n0;
3144
823k
      x0 += (WORD32)npoints >> 1;
3145
823k
      x2 += (WORD32)npoints >> 1;
3146
823k
    }
3147
4.52M
  }
3148
823k
}
3149
3150
VOID ixheaacd_rearrange_dec(WORD32 *ip, WORD32 *op, WORD32 mdct_len_2,
3151
48.5k
                            UWORD8 *re_arr_tab) {
3152
48.5k
  WORD32 n, i = 0;
3153
3154
11.6M
  for (n = 0; n < mdct_len_2; n++) {
3155
11.6M
    WORD32 idx = re_arr_tab[n] << 1;
3156
3157
11.6M
    op[i++] = ip[idx];
3158
11.6M
    op[i++] = ip[idx + 1];
3159
11.6M
  }
3160
48.5k
}
3161
3162
VOID ixheaacd_fft_15_ld_dec(WORD32 *inp, WORD32 *op, WORD32 *fft3out,
3163
776k
                            UWORD8 *re_arr_tab_sml_240_ptr) {
3164
776k
  WORD32 i, n, idx;
3165
776k
  WORD32 *buf1, *buf2, *buf1a;
3166
776k
  WORD32 add_r, sub_r;
3167
776k
  WORD32 add_i, sub_i;
3168
776k
  WORD32 x01_real, x_01_imag, temp;
3169
776k
  WORD32 p1, p2, p3, p4;
3170
3171
776k
  WORD32 sinmu = 1859775393;
3172
776k
  WORD32 cos_51 = 2042378317;
3173
776k
  WORD32 cos_52 = -1652318768;
3174
776k
  WORD32 cos_53 = -780119100;
3175
776k
  WORD32 cos_54 = 1200479854;
3176
776k
  WORD32 cos_55 = -1342177280;
3177
3178
776k
  WORD32 r1, r2, r3, r4;
3179
776k
  WORD32 s1, s2, s3, s4, t, temp1, temp2;
3180
776k
  WORD32 *fft3outptr = fft3out;
3181
3182
776k
  WORD32 xr_0, xr_1, xr_2;
3183
776k
  WORD32 xi_0, xi_1, xi_2;
3184
3185
776k
  buf2 = fft3out;
3186
776k
  buf1 = buf1a = fft3out;
3187
776k
  n = 0;
3188
3189
776k
  {
3190
776k
    *buf1++ = inp[0];
3191
776k
    *buf1++ = inp[1];
3192
3193
776k
    *buf1++ = inp[96];
3194
776k
    *buf1++ = inp[97];
3195
3196
776k
    *buf1++ = inp[192];
3197
776k
    *buf1++ = inp[193];
3198
3199
776k
    *buf1++ = inp[288];
3200
776k
    *buf1++ = inp[289];
3201
3202
776k
    *buf1++ = inp[384];
3203
776k
    *buf1++ = inp[385];
3204
3205
776k
    r1 = ixheaac_add32_sat(buf1a[2], buf1a[8]);
3206
776k
    r4 = ixheaac_sub32_sat(buf1a[2], buf1a[8]);
3207
776k
    r3 = ixheaac_add32_sat(buf1a[4], buf1a[6]);
3208
776k
    r2 = ixheaac_sub32_sat(buf1a[4], buf1a[6]);
3209
3210
776k
    t = ixheaac_mult32_shl(ixheaac_sub32_sat(r1, r3), cos_54);
3211
3212
776k
    r1 = ixheaac_add32_sat(r1, r3);
3213
3214
776k
    temp1 = ixheaac_add32_sat(buf1a[0], r1);
3215
3216
776k
    r1 = ixheaac_add32_sat(
3217
776k
        temp1, ixheaac_shl32_sat((ixheaac_mult32_shl(r1, cos_55)), 1));
3218
3219
776k
    r3 = ixheaac_sub32_sat(r1, t);
3220
776k
    r1 = ixheaac_add32_sat(r1, t);
3221
3222
776k
    t = ixheaac_mult32_shl(ixheaac_add32_sat(r4, r2), cos_51);
3223
776k
    r4 = ixheaac_add32_sat(
3224
776k
        t, ixheaac_shl32_sat(ixheaac_mult32_shl(r4, cos_52), 1));
3225
776k
    r2 = ixheaac_add32_sat(t, ixheaac_mult32_shl(r2, cos_53));
3226
3227
776k
    s1 = ixheaac_add32_sat(buf1a[3], buf1a[9]);
3228
776k
    s4 = ixheaac_sub32_sat(buf1a[3], buf1a[9]);
3229
776k
    s3 = ixheaac_add32_sat(buf1a[5], buf1a[7]);
3230
776k
    s2 = ixheaac_sub32_sat(buf1a[5], buf1a[7]);
3231
3232
776k
    t = ixheaac_mult32_shl(ixheaac_sub32_sat(s1, s3), cos_54);
3233
776k
    s1 = ixheaac_add32_sat(s1, s3);
3234
3235
776k
    temp2 = ixheaac_add32_sat(buf1a[1], s1);
3236
3237
776k
    s1 = ixheaac_add32_sat(
3238
776k
        temp2, ixheaac_shl32_sat((ixheaac_mult32_shl(s1, cos_55)), 1));
3239
3240
776k
    s3 = ixheaac_sub32_sat(s1, t);
3241
776k
    s1 = ixheaac_add32_sat(s1, t);
3242
3243
776k
    t = ixheaac_mult32_shl(ixheaac_add32_sat(s4, s2), cos_51);
3244
776k
    s4 = ixheaac_add32_sat(
3245
776k
        t, ixheaac_shl32_sat((ixheaac_mult32_shl(s4, cos_52)), 1));
3246
776k
    s2 = ixheaac_add32_sat(t, (ixheaac_mult32_shl(s2, cos_53)));
3247
3248
776k
    *buf2++ = temp1;
3249
776k
    *buf2++ = temp2;
3250
776k
    *buf2++ = ixheaac_add32_sat(r1, s2);
3251
776k
    *buf2++ = ixheaac_sub32_sat(s1, r2);
3252
776k
    *buf2++ = ixheaac_sub32_sat(r3, s4);
3253
776k
    *buf2++ = ixheaac_add32_sat(s3, r4);
3254
776k
    *buf2++ = ixheaac_add32_sat(r3, s4);
3255
776k
    *buf2++ = ixheaac_sub32_sat(s3, r4);
3256
776k
    *buf2++ = ixheaac_sub32_sat(r1, s2);
3257
776k
    *buf2++ = ixheaac_add32_sat(s1, r2);
3258
776k
    buf1a = buf1;
3259
3260
776k
    *buf1++ = inp[160];
3261
776k
    *buf1++ = inp[161];
3262
3263
776k
    *buf1++ = inp[256];
3264
776k
    *buf1++ = inp[257];
3265
3266
776k
    *buf1++ = inp[352];
3267
776k
    *buf1++ = inp[353];
3268
3269
776k
    *buf1++ = inp[448];
3270
776k
    *buf1++ = inp[449];
3271
3272
776k
    *buf1++ = inp[64];
3273
776k
    *buf1++ = inp[65];
3274
3275
776k
    r1 = ixheaac_add32_sat(buf1a[2], buf1a[8]);
3276
776k
    r4 = ixheaac_sub32_sat(buf1a[2], buf1a[8]);
3277
776k
    r3 = ixheaac_add32_sat(buf1a[4], buf1a[6]);
3278
776k
    r2 = ixheaac_sub32_sat(buf1a[4], buf1a[6]);
3279
3280
776k
    t = ixheaac_mult32_shl(ixheaac_sub32_sat(r1, r3), cos_54);
3281
3282
776k
    r1 = ixheaac_add32_sat(r1, r3);
3283
3284
776k
    temp1 = ixheaac_add32_sat(buf1a[0], r1);
3285
3286
776k
    r1 = ixheaac_add32_sat(
3287
776k
        temp1, ixheaac_shl32_sat((ixheaac_mult32_shl(r1, cos_55)), 1));
3288
3289
776k
    r3 = ixheaac_sub32_sat(r1, t);
3290
776k
    r1 = ixheaac_add32_sat(r1, t);
3291
3292
776k
    t = ixheaac_mult32_shl(ixheaac_add32_sat(r4, r2), cos_51);
3293
776k
    r4 = ixheaac_add32_sat(
3294
776k
        t, ixheaac_shl32_sat(ixheaac_mult32_shl(r4, cos_52), 1));
3295
776k
    r2 = ixheaac_add32_sat(t, ixheaac_mult32_shl(r2, cos_53));
3296
3297
776k
    s1 = ixheaac_add32_sat(buf1a[3], buf1a[9]);
3298
776k
    s4 = ixheaac_sub32_sat(buf1a[3], buf1a[9]);
3299
776k
    s3 = ixheaac_add32_sat(buf1a[5], buf1a[7]);
3300
776k
    s2 = ixheaac_sub32_sat(buf1a[5], buf1a[7]);
3301
3302
776k
    t = ixheaac_mult32_shl(ixheaac_sub32_sat(s1, s3), cos_54);
3303
3304
776k
    s1 = ixheaac_add32_sat(s1, s3);
3305
3306
776k
    temp2 = ixheaac_add32_sat(buf1a[1], s1);
3307
3308
776k
    s1 = ixheaac_add32_sat(
3309
776k
        temp2, ixheaac_shl32_sat((ixheaac_mult32_shl(s1, cos_55)), 1));
3310
3311
776k
    s3 = ixheaac_sub32_sat(s1, t);
3312
776k
    s1 = ixheaac_add32_sat(s1, t);
3313
3314
776k
    t = ixheaac_mult32_shl(ixheaac_add32_sat(s4, s2), cos_51);
3315
776k
    s4 = ixheaac_add32_sat(
3316
776k
        t, ixheaac_shl32_sat((ixheaac_mult32_shl(s4, cos_52)), 1));
3317
776k
    s2 = ixheaac_add32_sat(t, (ixheaac_mult32_shl(s2, cos_53)));
3318
3319
776k
    *buf2++ = temp1;
3320
776k
    *buf2++ = temp2;
3321
776k
    *buf2++ = ixheaac_add32_sat(r1, s2);
3322
776k
    *buf2++ = ixheaac_sub32_sat(s1, r2);
3323
776k
    *buf2++ = ixheaac_sub32_sat(r3, s4);
3324
776k
    *buf2++ = ixheaac_add32_sat(s3, r4);
3325
776k
    *buf2++ = ixheaac_add32_sat(r3, s4);
3326
776k
    *buf2++ = ixheaac_sub32_sat(s3, r4);
3327
776k
    *buf2++ = ixheaac_sub32_sat(r1, s2);
3328
776k
    *buf2++ = ixheaac_add32_sat(s1, r2);
3329
776k
    buf1a = buf1;
3330
776k
    ;
3331
3332
776k
    *buf1++ = inp[320];
3333
776k
    *buf1++ = inp[321];
3334
3335
776k
    *buf1++ = inp[416];
3336
776k
    *buf1++ = inp[417];
3337
3338
776k
    *buf1++ = inp[32];
3339
776k
    *buf1++ = inp[33];
3340
3341
776k
    *buf1++ = inp[128];
3342
776k
    *buf1++ = inp[129];
3343
3344
776k
    *buf1++ = inp[224];
3345
776k
    *buf1++ = inp[225];
3346
3347
776k
    r1 = ixheaac_add32_sat(buf1a[2], buf1a[8]);
3348
776k
    r4 = ixheaac_sub32_sat(buf1a[2], buf1a[8]);
3349
776k
    r3 = ixheaac_add32_sat(buf1a[4], buf1a[6]);
3350
776k
    r2 = ixheaac_sub32_sat(buf1a[4], buf1a[6]);
3351
3352
776k
    t = ixheaac_mult32_shl(ixheaac_sub32_sat(r1, r3), cos_54);
3353
3354
776k
    r1 = ixheaac_add32_sat(r1, r3);
3355
3356
776k
    temp1 = ixheaac_add32_sat(buf1a[0], r1);
3357
3358
776k
    r1 = ixheaac_add32_sat(
3359
776k
        temp1, ixheaac_shl32_sat((ixheaac_mult32_shl(r1, cos_55)), 1));
3360
3361
776k
    r3 = ixheaac_sub32_sat(r1, t);
3362
776k
    r1 = ixheaac_add32_sat(r1, t);
3363
3364
776k
    t = ixheaac_mult32_shl(ixheaac_add32_sat(r4, r2), cos_51);
3365
776k
    r4 = ixheaac_add32_sat(
3366
776k
        t, ixheaac_shl32_sat(ixheaac_mult32_shl(r4, cos_52), 1));
3367
776k
    r2 = ixheaac_add32_sat(t, ixheaac_mult32_shl(r2, cos_53));
3368
3369
776k
    s1 = ixheaac_add32_sat(buf1a[3], buf1a[9]);
3370
776k
    s4 = ixheaac_sub32_sat(buf1a[3], buf1a[9]);
3371
776k
    s3 = ixheaac_add32_sat(buf1a[5], buf1a[7]);
3372
776k
    s2 = ixheaac_sub32_sat(buf1a[5], buf1a[7]);
3373
3374
776k
    t = ixheaac_mult32_shl(ixheaac_sub32_sat(s1, s3), cos_54);
3375
3376
776k
    s1 = ixheaac_add32_sat(s1, s3);
3377
3378
776k
    temp2 = ixheaac_add32_sat(buf1a[1], s1);
3379
3380
776k
    s1 = ixheaac_add32_sat(
3381
776k
        temp2, ixheaac_shl32_sat((ixheaac_mult32_shl(s1, cos_55)), 1));
3382
3383
776k
    s3 = ixheaac_sub32_sat(s1, t);
3384
776k
    s1 = ixheaac_add32_sat(s1, t);
3385
3386
776k
    t = ixheaac_mult32_shl(ixheaac_add32_sat(s4, s2), cos_51);
3387
776k
    s4 = ixheaac_add32_sat(
3388
776k
        t, ixheaac_shl32_sat((ixheaac_mult32_shl(s4, cos_52)), 1));
3389
776k
    s2 = ixheaac_add32_sat(t, (ixheaac_mult32_shl(s2, cos_53)));
3390
3391
776k
    *buf2++ = temp1;
3392
776k
    *buf2++ = temp2;
3393
776k
    *buf2++ = ixheaac_add32_sat(r1, s2);
3394
776k
    *buf2++ = ixheaac_sub32_sat(s1, r2);
3395
776k
    *buf2++ = ixheaac_sub32_sat(r3, s4);
3396
776k
    *buf2++ = ixheaac_add32_sat(s3, r4);
3397
776k
    *buf2++ = ixheaac_add32_sat(r3, s4);
3398
776k
    *buf2++ = ixheaac_sub32_sat(s3, r4);
3399
776k
    *buf2++ = ixheaac_sub32_sat(r1, s2);
3400
776k
    *buf2++ = ixheaac_add32_sat(s1, r2);
3401
776k
    buf1a = buf1;
3402
776k
    ;
3403
776k
  }
3404
3405
776k
  n = 0;
3406
4.65M
  for (i = 0; i < FFT5; i++) {
3407
3.88M
    xr_0 = fft3outptr[0];
3408
3.88M
    xi_0 = fft3outptr[1];
3409
3410
3.88M
    xr_1 = fft3outptr[10];
3411
3.88M
    xi_1 = fft3outptr[11];
3412
3413
3.88M
    xr_2 = fft3outptr[20];
3414
3.88M
    xi_2 = fft3outptr[21];
3415
3416
3.88M
    x01_real = ixheaac_add32_sat(xr_0, xr_1);
3417
3.88M
    x_01_imag = ixheaac_add32_sat(xi_0, xi_1);
3418
3419
3.88M
    add_r = ixheaac_add32_sat(xr_1, xr_2);
3420
3.88M
    add_i = ixheaac_add32_sat(xi_1, xi_2);
3421
3422
3.88M
    sub_r = ixheaac_sub32_sat(xr_1, xr_2);
3423
3.88M
    sub_i = ixheaac_sub32_sat(xi_1, xi_2);
3424
3425
3.88M
    p1 = add_r >> 1;
3426
3427
3.88M
    p2 = ixheaac_mult32_shl(sub_i, sinmu);
3428
3.88M
    p3 = ixheaac_mult32_shl(sub_r, sinmu);
3429
3430
3.88M
    p4 = add_i >> 1;
3431
3432
3.88M
    temp = ixheaac_sub32_sat(xr_0, p1);
3433
3.88M
    temp1 = ixheaac_add32_sat(xi_0, p3);
3434
3.88M
    temp2 = ixheaac_sub32_sat(xi_0, p3);
3435
3436
3.88M
    idx = re_arr_tab_sml_240_ptr[n++] << 1;
3437
3.88M
    op[idx] = ixheaac_add32_sat(x01_real, xr_2);
3438
3.88M
    op[idx + 1] = ixheaac_add32_sat(x_01_imag, xi_2);
3439
3440
3.88M
    idx = re_arr_tab_sml_240_ptr[n++] << 1;
3441
3.88M
    op[idx] = ixheaac_add32_sat(temp, p2);
3442
3.88M
    op[idx + 1] = ixheaac_sub32_sat(temp2, p4);
3443
3444
3.88M
    idx = re_arr_tab_sml_240_ptr[n++] << 1;
3445
3.88M
    op[idx] = ixheaac_sub32_sat(temp, p2);
3446
3.88M
    op[idx + 1] = ixheaac_sub32_sat(temp1, p4);
3447
3.88M
    fft3outptr += 2;
3448
3.88M
  }
3449
776k
}