/src/libxaac/encoder/iusace_acelp_tools.c
Line | Count | Source (jump to first uncovered line) |
1 | | /****************************************************************************** |
2 | | * * |
3 | | * Copyright (C) 2023 The Android Open Source Project |
4 | | * |
5 | | * Licensed under the Apache License, Version 2.0 (the "License"); |
6 | | * you may not use this file except in compliance with the License. |
7 | | * You may obtain a copy of the License at: |
8 | | * |
9 | | * http://www.apache.org/licenses/LICENSE-2.0 |
10 | | * |
11 | | * Unless required by applicable law or agreed to in writing, software |
12 | | * distributed under the License is distributed on an "AS IS" BASIS, |
13 | | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
14 | | * See the License for the specific language governing permissions and |
15 | | * limitations under the License. |
16 | | * |
17 | | ***************************************************************************** |
18 | | * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore |
19 | | */ |
20 | | |
21 | | #include <string.h> |
22 | | #include <math.h> |
23 | | #include "ixheaac_type_def.h" |
24 | | #include "iusace_cnst.h" |
25 | | #include "iusace_lpd_rom.h" |
26 | | |
27 | | static VOID iusace_acelp_ir_vec_corr1(FLOAT32 *ir, FLOAT32 *vec, UWORD8 track, FLOAT32 *sign, |
28 | | FLOAT32 (*corr_ir)[16], FLOAT32 *corr_out, WORD32 *dn2_pos, |
29 | 46.5M | WORD32 num_pluse_pos) { |
30 | 46.5M | WORD16 i, j; |
31 | 46.5M | WORD32 dn; |
32 | 46.5M | WORD32 *dn2; |
33 | 46.5M | FLOAT32 *p0; |
34 | 46.5M | FLOAT32 s; |
35 | 46.5M | dn2 = &dn2_pos[track * 8]; |
36 | 46.5M | p0 = corr_ir[track]; |
37 | 328M | for (i = 0; i < num_pluse_pos; i++) { |
38 | 281M | dn = dn2[i]; |
39 | 281M | s = 0.0F; |
40 | 11.1G | for (j = 0; j < (LEN_SUBFR - dn); j++) { |
41 | 10.8G | s += ir[j] * vec[dn + j]; |
42 | 10.8G | } |
43 | 281M | corr_out[dn >> 2] = sign[dn] * s + p0[dn >> 2]; |
44 | 281M | } |
45 | 46.5M | } |
46 | | |
47 | | static VOID iusace_acelp_ir_vec_corr2(FLOAT32 *ir, FLOAT32 *vec, UWORD8 track, FLOAT32 *sign, |
48 | 46.5M | FLOAT32 (*corr_ir)[16], FLOAT32 *corr_out) { |
49 | 46.5M | WORD32 i, j; |
50 | 46.5M | FLOAT32 *p0; |
51 | 46.5M | FLOAT32 s; |
52 | 46.5M | p0 = corr_ir[track]; |
53 | 791M | for (i = 0; i < 16; i++) { |
54 | 745M | s = 0.0F; |
55 | 25.0G | for (j = 0; j < LEN_SUBFR - track; j++) { |
56 | 24.2G | s += ir[j] * vec[track + j]; |
57 | 24.2G | } |
58 | 745M | corr_out[i] = s * sign[track] + p0[i]; |
59 | 745M | track += 4; |
60 | 745M | } |
61 | 46.5M | } |
62 | | |
63 | | static VOID iusace_acelp_get_2p_pos(WORD32 nb_pos_ix, UWORD8 track_p1, UWORD8 track_p2, |
64 | | FLOAT32 *corr_pulses, FLOAT32 *ener_pulses, WORD32 *pos_p1, |
65 | | WORD32 *pos_p2, FLOAT32 *dn, WORD32 *dn2, FLOAT32 *corr_p1, |
66 | 46.5M | FLOAT32 *corr_p2, FLOAT32 (*corr_p1p2)[256]) { |
67 | 46.5M | WORD32 x, x2, y, x_save = 0, y_save = 0, i, *pos_x; |
68 | 46.5M | FLOAT32 ps0, alp0; |
69 | 46.5M | FLOAT32 ps1, ps2, sq, sqk; |
70 | 46.5M | FLOAT32 alp1, alp2, alpk; |
71 | 46.5M | FLOAT32 *p1, *p2; |
72 | 46.5M | FLOAT32 s; |
73 | 46.5M | pos_x = &dn2[track_p1 << 3]; |
74 | 46.5M | ps0 = *corr_pulses; |
75 | 46.5M | alp0 = *ener_pulses; |
76 | 46.5M | sqk = -1.0F; |
77 | 46.5M | alpk = 1.0F; |
78 | | |
79 | 328M | for (i = 0; i < nb_pos_ix; i++) { |
80 | 281M | x = pos_x[i]; |
81 | 281M | x2 = x >> 2; |
82 | | |
83 | 281M | ps1 = ps0 + dn[x]; |
84 | 281M | alp1 = alp0 + corr_p1[x2]; |
85 | 281M | p1 = corr_p2; |
86 | 281M | p2 = &corr_p1p2[track_p1][x2 << 4]; |
87 | 4.79G | for (y = track_p2; y < LEN_SUBFR; y += 4) { |
88 | 4.51G | ps2 = ps1 + dn[y]; |
89 | 4.51G | alp2 = alp1 + (*p1++) + (*p2++); |
90 | 4.51G | sq = ps2 * ps2; |
91 | 4.51G | s = (alpk * sq) - (sqk * alp2); |
92 | 4.51G | if (s > 0.0F) { |
93 | 308M | sqk = sq; |
94 | 308M | alpk = alp2; |
95 | 308M | y_save = y; |
96 | 308M | x_save = x; |
97 | 308M | } |
98 | 4.51G | } |
99 | 281M | } |
100 | 46.5M | *corr_pulses = ps0 + dn[x_save] + dn[y_save]; |
101 | 46.5M | *ener_pulses = alpk; |
102 | 46.5M | *pos_p1 = x_save; |
103 | 46.5M | *pos_p2 = y_save; |
104 | 46.5M | } |
105 | | |
106 | | static VOID iusace_acelp_get_1p_pos(UWORD8 track_p1, UWORD8 track_p2, FLOAT32 *corr_pulses, |
107 | | FLOAT32 *alp, WORD32 *pos_p1, FLOAT32 *dn, FLOAT32 *corr_p1, |
108 | 0 | FLOAT32 *corr_p2) { |
109 | 0 | WORD32 x, x_save = 0; |
110 | 0 | FLOAT32 ps0, alp0; |
111 | 0 | FLOAT32 ps1, sq, sqk; |
112 | 0 | FLOAT32 alp1, alpk; |
113 | 0 | FLOAT32 s; |
114 | |
|
115 | 0 | ps0 = *corr_pulses; |
116 | 0 | alp0 = *alp; |
117 | 0 | sqk = -1.0F; |
118 | 0 | alpk = 1.0F; |
119 | |
|
120 | 0 | for (x = track_p1; x < LEN_SUBFR; x += 4) { |
121 | 0 | ps1 = ps0 + dn[x]; |
122 | 0 | alp1 = alp0 + corr_p1[x >> 2]; |
123 | 0 | sq = ps1 * ps1; |
124 | 0 | s = (alpk * sq) - (sqk * alp1); |
125 | 0 | if (s > 0.0F) { |
126 | 0 | sqk = sq; |
127 | 0 | alpk = alp1; |
128 | 0 | x_save = x; |
129 | 0 | } |
130 | 0 | } |
131 | |
|
132 | 0 | if (track_p2 != track_p1) { |
133 | 0 | for (x = track_p2; x < LEN_SUBFR; x += 4) { |
134 | 0 | ps1 = ps0 + dn[x]; |
135 | 0 | alp1 = alp0 + corr_p2[x >> 2]; |
136 | 0 | sq = ps1 * ps1; |
137 | 0 | s = (alpk * sq) - (sqk * alp1); |
138 | 0 | if (s > 0.0F) { |
139 | 0 | sqk = sq; |
140 | 0 | alpk = alp1; |
141 | 0 | x_save = x; |
142 | 0 | } |
143 | 0 | } |
144 | 0 | } |
145 | |
|
146 | 0 | *corr_pulses = ps0 + dn[x_save]; |
147 | 0 | *alp = alpk; |
148 | 0 | *pos_p1 = x_save; |
149 | 0 | } |
150 | | |
151 | 8.55M | static WORD32 iusace_acelp_quant_1p_n1bits(WORD32 pos_pulse, WORD32 num_bits_pos) { |
152 | 8.55M | WORD32 mask; |
153 | 8.55M | WORD32 index; |
154 | 8.55M | mask = ((1 << num_bits_pos) - 1); |
155 | | |
156 | 8.55M | index = (pos_pulse & mask); |
157 | 8.55M | if ((pos_pulse & 16) != 0) { |
158 | 4.23M | index += 1 << num_bits_pos; |
159 | 4.23M | } |
160 | 8.55M | return (index); |
161 | 8.55M | } |
162 | | |
163 | 16.0M | static WORD32 iusace_acelp_quant_2p_2n1bits(WORD32 pos_p1, WORD32 pos_p2, WORD32 num_bits_pos) { |
164 | 16.0M | WORD32 mask; |
165 | 16.0M | WORD32 index; |
166 | 16.0M | mask = ((1 << num_bits_pos) - 1); |
167 | | |
168 | 16.0M | if (((pos_p2 ^ pos_p1) & 16) == 0) { |
169 | 11.5M | if ((pos_p1 - pos_p2) <= 0) { |
170 | 8.19M | index = ((pos_p1 & mask) << num_bits_pos) + (pos_p2 & mask); |
171 | 8.19M | } else { |
172 | 3.39M | index = ((pos_p2 & mask) << num_bits_pos) + (pos_p1 & mask); |
173 | 3.39M | } |
174 | 11.5M | if ((pos_p1 & 16) != 0) { |
175 | 5.69M | index += 1 << (2 * num_bits_pos); |
176 | 5.69M | } |
177 | 11.5M | } else { |
178 | 4.43M | if (((pos_p1 & mask) - (pos_p2 & mask)) <= 0) { |
179 | 2.46M | index = ((pos_p2 & mask) << num_bits_pos) + (pos_p1 & mask); |
180 | 2.46M | if ((pos_p2 & 16) != 0) { |
181 | 1.20M | index += 1 << (2 * num_bits_pos); |
182 | 1.20M | } |
183 | 2.46M | } else { |
184 | 1.96M | index = ((pos_p1 & mask) << num_bits_pos) + (pos_p2 & mask); |
185 | 1.96M | if ((pos_p1 & 16) != 0) { |
186 | 982k | index += 1 << (2 * num_bits_pos); |
187 | 982k | } |
188 | 1.96M | } |
189 | 4.43M | } |
190 | 16.0M | return (index); |
191 | 16.0M | } |
192 | | |
193 | | static WORD32 iusace_acelp_quant_3p_3n1bits(WORD32 pos_p1, WORD32 pos_p2, WORD32 pos_p3, |
194 | 4.38M | WORD32 num_bits_pos) { |
195 | 4.38M | WORD32 nb_pos; |
196 | 4.38M | WORD32 index; |
197 | 4.38M | nb_pos = (1 << (num_bits_pos - 1)); |
198 | | |
199 | 4.38M | if (((pos_p1 ^ pos_p2) & nb_pos) == 0) { |
200 | 2.31M | index = iusace_acelp_quant_2p_2n1bits(pos_p1, pos_p2, (num_bits_pos - 1)); |
201 | 2.31M | index += (pos_p1 & nb_pos) << num_bits_pos; |
202 | 2.31M | index += iusace_acelp_quant_1p_n1bits(pos_p3, num_bits_pos) << (2 * num_bits_pos); |
203 | 2.31M | } else if (((pos_p1 ^ pos_p3) & nb_pos) == 0) { |
204 | 1.15M | index = iusace_acelp_quant_2p_2n1bits(pos_p1, pos_p3, (num_bits_pos - 1)); |
205 | 1.15M | index += (pos_p1 & nb_pos) << num_bits_pos; |
206 | 1.15M | index += iusace_acelp_quant_1p_n1bits(pos_p2, num_bits_pos) << (2 * num_bits_pos); |
207 | 1.15M | } else { |
208 | 919k | index = iusace_acelp_quant_2p_2n1bits(pos_p2, pos_p3, (num_bits_pos - 1)); |
209 | 919k | index += (pos_p2 & nb_pos) << num_bits_pos; |
210 | 919k | index += iusace_acelp_quant_1p_n1bits(pos_p1, num_bits_pos) << (2 * num_bits_pos); |
211 | 919k | } |
212 | 4.38M | return (index); |
213 | 4.38M | } |
214 | | |
215 | | static WORD32 iusace_acelp_quant_4p_4n1bits(WORD32 pos_p1, WORD32 pos_p2, WORD32 pos_p3, |
216 | 2.47M | WORD32 pos_p4, WORD32 num_bits_pos) { |
217 | 2.47M | WORD32 nb_pos; |
218 | 2.47M | WORD32 index; |
219 | 2.47M | nb_pos = (1 << (num_bits_pos - 1)); |
220 | | |
221 | 2.47M | if (((pos_p1 ^ pos_p2) & nb_pos) == 0) { |
222 | 1.63M | index = iusace_acelp_quant_2p_2n1bits(pos_p1, pos_p2, (num_bits_pos - 1)); |
223 | 1.63M | index += (pos_p1 & nb_pos) << num_bits_pos; |
224 | 1.63M | index += iusace_acelp_quant_2p_2n1bits(pos_p3, pos_p4, num_bits_pos) << (2 * num_bits_pos); |
225 | 1.63M | } else if (((pos_p1 ^ pos_p3) & nb_pos) == 0) { |
226 | 515k | index = iusace_acelp_quant_2p_2n1bits(pos_p1, pos_p3, (num_bits_pos - 1)); |
227 | 515k | index += (pos_p1 & nb_pos) << num_bits_pos; |
228 | 515k | index += iusace_acelp_quant_2p_2n1bits(pos_p2, pos_p4, num_bits_pos) << (2 * num_bits_pos); |
229 | 515k | } else { |
230 | 323k | index = iusace_acelp_quant_2p_2n1bits(pos_p2, pos_p3, (num_bits_pos - 1)); |
231 | 323k | index += (pos_p2 & nb_pos) << num_bits_pos; |
232 | 323k | index += iusace_acelp_quant_2p_2n1bits(pos_p1, pos_p4, num_bits_pos) << (2 * num_bits_pos); |
233 | 323k | } |
234 | 2.47M | return (index); |
235 | 2.47M | } |
236 | | |
237 | 9.47M | static WORD32 iusace_acelp_quant_4p_4nbits(WORD32 *pos_pulses, WORD32 num_bits_pos) { |
238 | 9.47M | WORD32 i, j, k, nb_pos, n_1; |
239 | 9.47M | WORD32 pos_a[4], pos_b[4]; |
240 | 9.47M | WORD32 index = 0; |
241 | 9.47M | n_1 = num_bits_pos - 1; |
242 | 9.47M | nb_pos = (1 << n_1); |
243 | 9.47M | i = 0; |
244 | 9.47M | j = 0; |
245 | 47.3M | for (k = 0; k < 4; k++) { |
246 | 37.9M | if ((pos_pulses[k] & nb_pos) == 0) { |
247 | 23.9M | pos_a[i++] = pos_pulses[k]; |
248 | 23.9M | } else { |
249 | 13.9M | pos_b[j++] = pos_pulses[k]; |
250 | 13.9M | } |
251 | 37.9M | } |
252 | 9.47M | switch (i) { |
253 | 398k | case 0: |
254 | 398k | index = 1 << ((4 * num_bits_pos) - 3); |
255 | 398k | index += iusace_acelp_quant_4p_4n1bits(pos_b[0], pos_b[1], pos_b[2], pos_b[3], n_1); |
256 | 398k | break; |
257 | 1.27M | case 1: |
258 | 1.27M | index = iusace_acelp_quant_1p_n1bits(pos_a[0], n_1) << ((3 * n_1) + 1); |
259 | 1.27M | index += iusace_acelp_quant_3p_3n1bits(pos_b[0], pos_b[1], pos_b[2], n_1); |
260 | 1.27M | break; |
261 | 2.82M | case 2: |
262 | 2.82M | index = iusace_acelp_quant_2p_2n1bits(pos_a[0], pos_a[1], n_1) << ((2 * n_1) + 1); |
263 | 2.82M | index += iusace_acelp_quant_2p_2n1bits(pos_b[0], pos_b[1], n_1); |
264 | 2.82M | break; |
265 | 2.89M | case 3: |
266 | 2.89M | index = iusace_acelp_quant_3p_3n1bits(pos_a[0], pos_a[1], pos_a[2], n_1) << num_bits_pos; |
267 | 2.89M | index += iusace_acelp_quant_1p_n1bits(pos_b[0], n_1); |
268 | 2.89M | break; |
269 | 2.07M | case 4: |
270 | 2.07M | index = iusace_acelp_quant_4p_4n1bits(pos_a[0], pos_a[1], pos_a[2], pos_a[3], n_1); |
271 | 2.07M | break; |
272 | 9.47M | } |
273 | 9.47M | index += (i & 3) << ((4 * num_bits_pos) - 2); |
274 | 9.47M | return (index); |
275 | 9.47M | } |
276 | | |
277 | 0 | static WORD32 iusace_acelp_quant_5p_5nbits(WORD32 *pos_pulses, WORD32 num_bits_pos) { |
278 | 0 | WORD32 i, j, k, nb_pos, n_1; |
279 | 0 | WORD32 pos_a[5], pos_b[5]; |
280 | 0 | WORD32 index = 0; |
281 | 0 | n_1 = num_bits_pos - 1; |
282 | 0 | nb_pos = (1 << n_1); |
283 | 0 | i = 0; |
284 | 0 | j = 0; |
285 | 0 | for (k = 0; k < 5; k++) { |
286 | 0 | if ((pos_pulses[k] & nb_pos) == 0) { |
287 | 0 | pos_a[i++] = pos_pulses[k]; |
288 | 0 | } else { |
289 | 0 | pos_b[j++] = pos_pulses[k]; |
290 | 0 | } |
291 | 0 | } |
292 | 0 | switch (i) { |
293 | 0 | case 0: |
294 | 0 | index = 1 << ((5 * num_bits_pos) - 1); |
295 | 0 | index += iusace_acelp_quant_3p_3n1bits(pos_b[0], pos_b[1], pos_b[2], n_1) |
296 | 0 | << ((2 * num_bits_pos) + 1); |
297 | 0 | index += iusace_acelp_quant_2p_2n1bits(pos_b[3], pos_b[4], num_bits_pos); |
298 | 0 | break; |
299 | 0 | case 1: |
300 | 0 | index = 1 << ((5 * num_bits_pos) - 1); |
301 | 0 | index += iusace_acelp_quant_3p_3n1bits(pos_b[0], pos_b[1], pos_b[2], n_1) |
302 | 0 | << ((2 * num_bits_pos) + 1); |
303 | 0 | index += iusace_acelp_quant_2p_2n1bits(pos_b[3], pos_a[0], num_bits_pos); |
304 | 0 | break; |
305 | 0 | case 2: |
306 | 0 | index = 1 << ((5 * num_bits_pos) - 1); |
307 | 0 | index += iusace_acelp_quant_3p_3n1bits(pos_b[0], pos_b[1], pos_b[2], n_1) |
308 | 0 | << ((2 * num_bits_pos) + 1); |
309 | 0 | index += iusace_acelp_quant_2p_2n1bits(pos_a[0], pos_a[1], num_bits_pos); |
310 | 0 | break; |
311 | 0 | case 3: |
312 | 0 | index = iusace_acelp_quant_3p_3n1bits(pos_a[0], pos_a[1], pos_a[2], n_1) |
313 | 0 | << ((2 * num_bits_pos) + 1); |
314 | 0 | index += iusace_acelp_quant_2p_2n1bits(pos_b[0], pos_b[1], num_bits_pos); |
315 | 0 | break; |
316 | 0 | case 4: |
317 | 0 | index = iusace_acelp_quant_3p_3n1bits(pos_a[0], pos_a[1], pos_a[2], n_1) |
318 | 0 | << ((2 * num_bits_pos) + 1); |
319 | 0 | index += iusace_acelp_quant_2p_2n1bits(pos_a[3], pos_b[0], num_bits_pos); |
320 | 0 | break; |
321 | 0 | case 5: |
322 | 0 | index = iusace_acelp_quant_3p_3n1bits(pos_a[0], pos_a[1], pos_a[2], n_1) |
323 | 0 | << ((2 * num_bits_pos) + 1); |
324 | 0 | index += iusace_acelp_quant_2p_2n1bits(pos_a[3], pos_a[4], num_bits_pos); |
325 | 0 | break; |
326 | 0 | } |
327 | 0 | return (index); |
328 | 0 | } |
329 | | |
330 | 0 | static WORD32 iusace_acelp_quant_6p_6n_2bits(WORD32 *pos_pulses, WORD32 num_bits_pos) { |
331 | 0 | WORD32 i, j, k, nb_pos, n_1; |
332 | 0 | WORD32 pos_a[6], pos_b[6]; |
333 | 0 | WORD32 index = 0; |
334 | 0 | n_1 = num_bits_pos - 1; |
335 | 0 | nb_pos = 1 << n_1; |
336 | 0 | i = 0; |
337 | 0 | j = 0; |
338 | 0 | for (k = 0; k < 6; k++) { |
339 | 0 | if ((pos_pulses[k] & nb_pos) == 0) { |
340 | 0 | pos_a[i++] = pos_pulses[k]; |
341 | 0 | } else { |
342 | 0 | pos_b[j++] = pos_pulses[k]; |
343 | 0 | } |
344 | 0 | } |
345 | |
|
346 | 0 | switch (i) { |
347 | 0 | case 0: |
348 | 0 | index = 1 << ((6 * num_bits_pos) - 5); |
349 | 0 | index += iusace_acelp_quant_5p_5nbits(pos_b, n_1) << num_bits_pos; |
350 | 0 | index += iusace_acelp_quant_1p_n1bits(pos_b[5], n_1); |
351 | 0 | break; |
352 | 0 | case 1: |
353 | 0 | index = 1 << ((6 * num_bits_pos) - 5); |
354 | 0 | index += iusace_acelp_quant_5p_5nbits(pos_b, n_1) << num_bits_pos; |
355 | 0 | index += iusace_acelp_quant_1p_n1bits(pos_a[0], n_1); |
356 | 0 | break; |
357 | 0 | case 2: |
358 | 0 | index = 1 << ((6 * num_bits_pos) - 5); |
359 | 0 | index += iusace_acelp_quant_4p_4nbits(pos_b, n_1) << ((2 * n_1) + 1); |
360 | 0 | index += iusace_acelp_quant_2p_2n1bits(pos_a[0], pos_a[1], n_1); |
361 | 0 | break; |
362 | 0 | case 3: |
363 | 0 | index = iusace_acelp_quant_3p_3n1bits(pos_a[0], pos_a[1], pos_a[2], n_1) << ((3 * n_1) + 1); |
364 | 0 | index += iusace_acelp_quant_3p_3n1bits(pos_b[0], pos_b[1], pos_b[2], n_1); |
365 | 0 | break; |
366 | 0 | case 4: |
367 | 0 | i = 2; |
368 | 0 | index = iusace_acelp_quant_4p_4nbits(pos_a, n_1) << ((2 * n_1) + 1); |
369 | 0 | index += iusace_acelp_quant_2p_2n1bits(pos_b[0], pos_b[1], n_1); |
370 | 0 | break; |
371 | 0 | case 5: |
372 | 0 | i = 1; |
373 | 0 | index = iusace_acelp_quant_5p_5nbits(pos_a, n_1) << num_bits_pos; |
374 | 0 | index += iusace_acelp_quant_1p_n1bits(pos_b[0], n_1); |
375 | 0 | break; |
376 | 0 | case 6: |
377 | 0 | i = 0; |
378 | 0 | index = iusace_acelp_quant_5p_5nbits(pos_a, n_1) << num_bits_pos; |
379 | 0 | index += iusace_acelp_quant_1p_n1bits(pos_a[5], n_1); |
380 | 0 | break; |
381 | 0 | } |
382 | 0 | index += (i & 3) << ((6 * num_bits_pos) - 4); |
383 | 0 | return (index); |
384 | 0 | } |
385 | | |
386 | 2.68M | VOID iusace_acelp_tgt_ir_corr(FLOAT32 *x, FLOAT32 *ir_wsyn, FLOAT32 *corr_out) { |
387 | 2.68M | WORD16 i, j; |
388 | 2.68M | FLOAT32 sum; |
389 | 174M | for (i = 0; i < LEN_SUBFR; i++) { |
390 | 171M | sum = 0.0F; |
391 | 5.74G | for (j = i; j < LEN_SUBFR; j++) { |
392 | 5.57G | sum += x[j] * ir_wsyn[j - i]; |
393 | 5.57G | } |
394 | 171M | corr_out[i] = sum; |
395 | 171M | } |
396 | 2.68M | } |
397 | | |
398 | 5.36M | FLOAT32 iusace_acelp_tgt_cb_corr2(FLOAT32 *xn, FLOAT32 *y1, FLOAT32 *corr_out) { |
399 | 5.36M | FLOAT32 gain; |
400 | 5.36M | FLOAT32 t0, t1; |
401 | 5.36M | WORD16 i; |
402 | 5.36M | t0 = xn[0] * y1[0]; |
403 | 5.36M | t1 = y1[0] * y1[0]; |
404 | 53.6M | for (i = 1; i < LEN_SUBFR; i += 7) { |
405 | 48.2M | t0 += xn[i] * y1[i]; |
406 | 48.2M | t1 += y1[i] * y1[i]; |
407 | 48.2M | t0 += xn[i + 1] * y1[i + 1]; |
408 | 48.2M | t1 += y1[i + 1] * y1[i + 1]; |
409 | 48.2M | t0 += xn[i + 2] * y1[i + 2]; |
410 | 48.2M | t1 += y1[i + 2] * y1[i + 2]; |
411 | 48.2M | t0 += xn[i + 3] * y1[i + 3]; |
412 | 48.2M | t1 += y1[i + 3] * y1[i + 3]; |
413 | 48.2M | t0 += xn[i + 4] * y1[i + 4]; |
414 | 48.2M | t1 += y1[i + 4] * y1[i + 4]; |
415 | 48.2M | t0 += xn[i + 5] * y1[i + 5]; |
416 | 48.2M | t1 += y1[i + 5] * y1[i + 5]; |
417 | 48.2M | t0 += xn[i + 6] * y1[i + 6]; |
418 | 48.2M | t1 += y1[i + 6] * y1[i + 6]; |
419 | 48.2M | } |
420 | 5.36M | corr_out[0] = t1; |
421 | 5.36M | corr_out[1] = -2.0F * t0 + 0.01F; |
422 | | |
423 | 5.36M | if (t1) { |
424 | 5.32M | gain = t0 / t1; |
425 | 5.32M | } else { |
426 | 39.6k | gain = 1.0F; |
427 | 39.6k | } |
428 | 5.36M | if (gain < 0.0) { |
429 | 879k | gain = 0.0; |
430 | 4.48M | } else if (gain > 1.2F) { |
431 | 834k | gain = 1.2F; |
432 | 834k | } |
433 | 5.36M | return gain; |
434 | 5.36M | } |
435 | | |
436 | 2.68M | VOID iusace_acelp_tgt_cb_corr1(FLOAT32 *xn, FLOAT32 *y1, FLOAT32 *y2, FLOAT32 *corr_out) { |
437 | 2.68M | WORD32 i; |
438 | 2.68M | FLOAT32 temp1, temp2, temp3; |
439 | 2.68M | temp1 = 0.01F + y2[0] * y2[0]; |
440 | 2.68M | temp2 = 0.01F + xn[0] * y2[0]; |
441 | 2.68M | temp3 = 0.01F + y1[0] * y2[0]; |
442 | 2.68M | temp1 += y2[1] * y2[1]; |
443 | 2.68M | temp2 += xn[1] * y2[1]; |
444 | 2.68M | temp3 += y1[1] * y2[1]; |
445 | 2.68M | temp1 += y2[2] * y2[2]; |
446 | 2.68M | temp2 += xn[2] * y2[2]; |
447 | 2.68M | temp3 += y1[2] * y2[2]; |
448 | 2.68M | temp1 += y2[3] * y2[3]; |
449 | 2.68M | temp2 += xn[3] * y2[3]; |
450 | 2.68M | temp3 += y1[3] * y2[3]; |
451 | 29.4M | for (i = 4; i < LEN_SUBFR; i += 6) { |
452 | 26.8M | temp1 += y2[i] * y2[i]; |
453 | 26.8M | temp2 += xn[i] * y2[i]; |
454 | 26.8M | temp3 += y1[i] * y2[i]; |
455 | 26.8M | temp1 += y2[i + 1] * y2[i + 1]; |
456 | 26.8M | temp2 += xn[i + 1] * y2[i + 1]; |
457 | 26.8M | temp3 += y1[i + 1] * y2[i + 1]; |
458 | 26.8M | temp1 += y2[i + 2] * y2[i + 2]; |
459 | 26.8M | temp2 += xn[i + 2] * y2[i + 2]; |
460 | 26.8M | temp3 += y1[i + 2] * y2[i + 2]; |
461 | 26.8M | temp1 += y2[i + 3] * y2[i + 3]; |
462 | 26.8M | temp2 += xn[i + 3] * y2[i + 3]; |
463 | 26.8M | temp3 += y1[i + 3] * y2[i + 3]; |
464 | 26.8M | temp1 += y2[i + 4] * y2[i + 4]; |
465 | 26.8M | temp2 += xn[i + 4] * y2[i + 4]; |
466 | 26.8M | temp3 += y1[i + 4] * y2[i + 4]; |
467 | 26.8M | temp1 += y2[i + 5] * y2[i + 5]; |
468 | 26.8M | temp2 += xn[i + 5] * y2[i + 5]; |
469 | 26.8M | temp3 += y1[i + 5] * y2[i + 5]; |
470 | 26.8M | } |
471 | 2.68M | corr_out[2] = temp1; |
472 | 2.68M | corr_out[3] = -2.0F * temp2; |
473 | 2.68M | corr_out[4] = 2.0F * temp3; |
474 | 2.68M | } |
475 | | |
476 | 10.7M | VOID iusace_acelp_cb_target_update(FLOAT32 *x, FLOAT32 *new_x, FLOAT32 *cb_vec, FLOAT32 gain) { |
477 | 10.7M | WORD16 i; |
478 | 696M | for (i = 0; i < LEN_SUBFR; i++) { |
479 | 686M | new_x[i] = x[i] - gain * cb_vec[i]; |
480 | 686M | } |
481 | 10.7M | } |
482 | | |
483 | | VOID iusace_acelp_cb_exc(FLOAT32 *corr_input, FLOAT32 *lp_residual, FLOAT32 *ir_wsyn, |
484 | | WORD16 *alg_cb_exc_out, FLOAT32 *filt_cb_exc, WORD32 num_bits_cb, |
485 | 2.68M | WORD32 *acelp_param_out, FLOAT32 *scratch_acelp_ir_buf) { |
486 | 2.68M | FLOAT32 sign[LEN_SUBFR], vec[LEN_SUBFR]; |
487 | 2.68M | FLOAT32 corr_x[16], corr_y[16]; |
488 | 2.68M | FLOAT32 *ir_buf = scratch_acelp_ir_buf; |
489 | 2.68M | FLOAT32 corr_ir[4][16]; |
490 | 2.68M | FLOAT32 corr_p1p2[4][256]; |
491 | 2.68M | FLOAT32 dn2[LEN_SUBFR]; |
492 | 2.68M | WORD32 pulse_pos[NPMAXPT * 4] = {0}; |
493 | 2.68M | WORD32 codvec[MAX_NUM_PULSES] = {0}; |
494 | 2.68M | WORD32 num_pulse_position[10] = {0}; |
495 | 2.68M | WORD32 pos_max[4]; |
496 | 2.68M | WORD32 dn2_pos[8 * 4]; |
497 | 2.68M | UWORD8 ipos[MAX_NUM_PULSES] = {0}; |
498 | 2.68M | WORD32 i, j, k, st, pos = 0, index, track, num_pulses = 0, num_iter = 4; |
499 | 2.68M | WORD32 l_index; |
500 | 2.68M | FLOAT32 psk, ps, alpk, alp = 0.0F; |
501 | 2.68M | FLOAT32 val; |
502 | 2.68M | FLOAT32 s, cor; |
503 | 2.68M | FLOAT32 *p0, *p1, *p2, *p3, *psign; |
504 | 2.68M | FLOAT32 *p1_ir_buf, *p2_ir_buf, *p3_ir_buf, *p4_ir_buf, *ir_sign_inv; |
505 | 2.68M | switch (num_bits_cb) { |
506 | 0 | case ACELP_NUM_BITS_20: |
507 | 0 | num_iter = 4; |
508 | 0 | alp = 2.0; |
509 | 0 | num_pulses = 4; |
510 | 0 | num_pulse_position[0] = 4; |
511 | 0 | num_pulse_position[1] = 8; |
512 | 0 | break; |
513 | 0 | case ACELP_NUM_BITS_28: |
514 | 0 | num_iter = 4; |
515 | 0 | alp = 1.5; |
516 | 0 | num_pulses = 6; |
517 | 0 | num_pulse_position[0] = 4; |
518 | 0 | num_pulse_position[1] = 8; |
519 | 0 | num_pulse_position[2] = 8; |
520 | 0 | break; |
521 | | |
522 | 260k | case ACELP_NUM_BITS_36: |
523 | 260k | num_iter = 4; |
524 | 260k | alp = 1.0; |
525 | 260k | num_pulses = 8; |
526 | 260k | num_pulse_position[0] = 4; |
527 | 260k | num_pulse_position[1] = 8; |
528 | 260k | num_pulse_position[2] = 8; |
529 | 260k | break; |
530 | 0 | case ACELP_NUM_BITS_44: |
531 | 0 | num_iter = 4; |
532 | 0 | alp = 1.0; |
533 | 0 | num_pulses = 10; |
534 | 0 | num_pulse_position[0] = 4; |
535 | 0 | num_pulse_position[1] = 6; |
536 | 0 | num_pulse_position[2] = 8; |
537 | 0 | num_pulse_position[3] = 8; |
538 | 0 | break; |
539 | 51.5k | case ACELP_NUM_BITS_52: |
540 | 51.5k | num_iter = 4; |
541 | 51.5k | alp = 1.0; |
542 | 51.5k | num_pulses = 12; |
543 | 51.5k | num_pulse_position[0] = 4; |
544 | 51.5k | num_pulse_position[1] = 6; |
545 | 51.5k | num_pulse_position[2] = 8; |
546 | 51.5k | num_pulse_position[3] = 8; |
547 | 51.5k | break; |
548 | 2.36M | case ACELP_NUM_BITS_64: |
549 | 2.36M | num_iter = 3; |
550 | 2.36M | alp = 0.8F; |
551 | 2.36M | num_pulses = 16; |
552 | 2.36M | num_pulse_position[0] = 4; |
553 | 2.36M | num_pulse_position[1] = 4; |
554 | 2.36M | num_pulse_position[2] = 6; |
555 | 2.36M | num_pulse_position[3] = 6; |
556 | 2.36M | num_pulse_position[4] = 8; |
557 | 2.36M | num_pulse_position[5] = 8; |
558 | 2.36M | break; |
559 | 2.68M | } |
560 | | |
561 | 2.68M | val = (lp_residual[0] * lp_residual[0]) + 1.0F; |
562 | 2.68M | cor = (corr_input[0] * corr_input[0]) + 1.0F; |
563 | 26.8M | for (i = 1; i < LEN_SUBFR; i += 7) { |
564 | 24.1M | val += (lp_residual[i] * lp_residual[i]); |
565 | 24.1M | cor += (corr_input[i] * corr_input[i]); |
566 | 24.1M | val += (lp_residual[i + 1] * lp_residual[i + 1]); |
567 | 24.1M | cor += (corr_input[i + 1] * corr_input[i + 1]); |
568 | 24.1M | val += (lp_residual[i + 2] * lp_residual[i + 2]); |
569 | 24.1M | cor += (corr_input[i + 2] * corr_input[i + 2]); |
570 | 24.1M | val += (lp_residual[i + 3] * lp_residual[i + 3]); |
571 | 24.1M | cor += (corr_input[i + 3] * corr_input[i + 3]); |
572 | 24.1M | val += (lp_residual[i + 4] * lp_residual[i + 4]); |
573 | 24.1M | cor += (corr_input[i + 4] * corr_input[i + 4]); |
574 | 24.1M | val += (lp_residual[i + 5] * lp_residual[i + 5]); |
575 | 24.1M | cor += (corr_input[i + 5] * corr_input[i + 5]); |
576 | 24.1M | val += (lp_residual[i + 6] * lp_residual[i + 6]); |
577 | 24.1M | cor += (corr_input[i + 6] * corr_input[i + 6]); |
578 | 24.1M | } |
579 | 2.68M | s = (FLOAT32)sqrt(cor / val); |
580 | 174M | for (j = 0; j < LEN_SUBFR; j++) { |
581 | 171M | cor = (s * lp_residual[j]) + (alp * corr_input[j]); |
582 | 171M | if (cor >= 0.0F) { |
583 | 84.0M | sign[j] = 1.0F; |
584 | 84.0M | vec[j] = -1.0F; |
585 | 84.0M | dn2[j] = cor; |
586 | 87.5M | } else { |
587 | 87.5M | sign[j] = -1.0F; |
588 | 87.5M | vec[j] = 1.0F; |
589 | 87.5M | corr_input[j] = -corr_input[j]; |
590 | 87.5M | dn2[j] = -cor; |
591 | 87.5M | } |
592 | 171M | } |
593 | 13.4M | for (i = 0; i < 4; i++) { |
594 | 96.4M | for (k = 0; k < 8; k++) { |
595 | 85.7M | ps = -1; |
596 | 1.45G | for (j = i; j < LEN_SUBFR; j += 4) { |
597 | 1.37G | if (dn2[j] > ps) { |
598 | 245M | ps = dn2[j]; |
599 | 245M | pos = j; |
600 | 245M | } |
601 | 1.37G | } |
602 | 85.7M | dn2[pos] = (FLOAT32)k - 8; |
603 | 85.7M | dn2_pos[i * 8 + k] = pos; |
604 | 85.7M | } |
605 | 10.7M | pos_max[i] = dn2_pos[i * 8]; |
606 | 10.7M | } |
607 | | |
608 | 2.68M | memset(ir_buf, 0, LEN_SUBFR * sizeof(FLOAT32)); |
609 | 2.68M | memset(ir_buf + (2 * LEN_SUBFR), 0, LEN_SUBFR * sizeof(FLOAT32)); |
610 | 2.68M | p1_ir_buf = ir_buf + LEN_SUBFR; |
611 | 2.68M | ir_sign_inv = ir_buf + (3 * LEN_SUBFR); |
612 | 2.68M | memcpy(p1_ir_buf, ir_wsyn, LEN_SUBFR * sizeof(FLOAT32)); |
613 | 2.68M | ir_sign_inv[0] = -p1_ir_buf[0]; |
614 | 2.68M | ir_sign_inv[1] = -p1_ir_buf[1]; |
615 | 2.68M | ir_sign_inv[2] = -p1_ir_buf[2]; |
616 | 2.68M | ir_sign_inv[3] = -p1_ir_buf[3]; |
617 | 29.4M | for (i = 4; i < LEN_SUBFR; i += 6) { |
618 | 26.8M | ir_sign_inv[i] = -p1_ir_buf[i]; |
619 | 26.8M | ir_sign_inv[i + 1] = -p1_ir_buf[i + 1]; |
620 | 26.8M | ir_sign_inv[i + 2] = -p1_ir_buf[i + 2]; |
621 | 26.8M | ir_sign_inv[i + 3] = -p1_ir_buf[i + 3]; |
622 | 26.8M | ir_sign_inv[i + 4] = -p1_ir_buf[i + 4]; |
623 | 26.8M | ir_sign_inv[i + 5] = -p1_ir_buf[i + 5]; |
624 | 26.8M | } |
625 | | |
626 | 2.68M | p0 = &corr_ir[0][16 - 1]; |
627 | 2.68M | p1 = &corr_ir[1][16 - 1]; |
628 | 2.68M | p2 = &corr_ir[2][16 - 1]; |
629 | 2.68M | p3 = &corr_ir[3][16 - 1]; |
630 | 2.68M | p2_ir_buf = p1_ir_buf; |
631 | 2.68M | cor = 0.0F; |
632 | 45.5M | for (i = 0; i < 16; i++) { |
633 | 42.8M | cor += (*p2_ir_buf) * (*p2_ir_buf); |
634 | 42.8M | p2_ir_buf++; |
635 | 42.8M | *p3-- = cor * 0.5F; |
636 | 42.8M | cor += (*p2_ir_buf) * (*p2_ir_buf); |
637 | 42.8M | p2_ir_buf++; |
638 | 42.8M | *p2-- = cor * 0.5F; |
639 | 42.8M | cor += (*p2_ir_buf) * (*p2_ir_buf); |
640 | 42.8M | p2_ir_buf++; |
641 | 42.8M | *p1-- = cor * 0.5F; |
642 | 42.8M | cor += (*p2_ir_buf) * (*p2_ir_buf); |
643 | 42.8M | p2_ir_buf++; |
644 | 42.8M | *p0-- = cor * 0.5F; |
645 | 42.8M | } |
646 | 2.68M | pos = 256 - 1; |
647 | 2.68M | p4_ir_buf = p1_ir_buf + 1; |
648 | 45.5M | for (k = 0; k < 16; k++) { |
649 | 42.8M | p3 = &corr_p1p2[2][pos]; |
650 | 42.8M | p2 = &corr_p1p2[1][pos]; |
651 | 42.8M | p1 = &corr_p1p2[0][pos]; |
652 | 42.8M | if (k == 15) { |
653 | 2.68M | p0 = &corr_p1p2[3][pos - 15]; |
654 | 40.2M | } else { |
655 | 40.2M | p0 = &corr_p1p2[3][pos - 16]; |
656 | 40.2M | } |
657 | 42.8M | cor = 0.0F; |
658 | 42.8M | p2_ir_buf = p1_ir_buf; |
659 | 42.8M | p3_ir_buf = p4_ir_buf; |
660 | 364M | for (i = k + 1; i < 16; i++) { |
661 | 321M | cor += (*p2_ir_buf) * (*p3_ir_buf); |
662 | 321M | p2_ir_buf++; |
663 | 321M | p3_ir_buf++; |
664 | 321M | *p3 = cor; |
665 | 321M | cor += (*p2_ir_buf) * (*p3_ir_buf); |
666 | 321M | p2_ir_buf++; |
667 | 321M | p3_ir_buf++; |
668 | 321M | *p2 = cor; |
669 | 321M | cor += (*p2_ir_buf) * (*p3_ir_buf); |
670 | 321M | p2_ir_buf++; |
671 | 321M | p3_ir_buf++; |
672 | 321M | *p1 = cor; |
673 | 321M | cor += (*p2_ir_buf) * (*p3_ir_buf); |
674 | 321M | p2_ir_buf++; |
675 | 321M | p3_ir_buf++; |
676 | 321M | *p0 = cor; |
677 | 321M | p3 -= (16 + 1); |
678 | 321M | p2 -= (16 + 1); |
679 | 321M | p1 -= (16 + 1); |
680 | 321M | p0 -= (16 + 1); |
681 | 321M | } |
682 | 42.8M | cor += (*p2_ir_buf) * (*p3_ir_buf); |
683 | 42.8M | p2_ir_buf++; |
684 | 42.8M | p3_ir_buf++; |
685 | 42.8M | *p3 = cor; |
686 | 42.8M | cor += (*p2_ir_buf) * (*p3_ir_buf); |
687 | 42.8M | p2_ir_buf++; |
688 | 42.8M | p3_ir_buf++; |
689 | 42.8M | *p2 = cor; |
690 | 42.8M | cor += (*p2_ir_buf) * (*p3_ir_buf); |
691 | 42.8M | p2_ir_buf++; |
692 | 42.8M | p3_ir_buf++; |
693 | 42.8M | *p1 = cor; |
694 | 42.8M | pos -= 16; |
695 | 42.8M | p4_ir_buf += 4; |
696 | 42.8M | } |
697 | 2.68M | pos = 256 - 1; |
698 | 2.68M | p4_ir_buf = p1_ir_buf + 3; |
699 | 45.5M | for (k = 0; k < 16; k++) { |
700 | 42.8M | p3 = &corr_p1p2[3][pos]; |
701 | 42.8M | p2 = &corr_p1p2[2][pos - 1]; |
702 | 42.8M | p1 = &corr_p1p2[1][pos - 1]; |
703 | 42.8M | p0 = &corr_p1p2[0][pos - 1]; |
704 | 42.8M | cor = 0.0F; |
705 | 42.8M | p2_ir_buf = p1_ir_buf; |
706 | 42.8M | p3_ir_buf = p4_ir_buf; |
707 | 364M | for (i = k + 1; i < 16; i++) { |
708 | 321M | cor += (*p2_ir_buf) * (*p3_ir_buf); |
709 | 321M | p2_ir_buf++; |
710 | 321M | p3_ir_buf++; |
711 | 321M | *p3 = cor; |
712 | 321M | cor += (*p2_ir_buf) * (*p3_ir_buf); |
713 | 321M | p2_ir_buf++; |
714 | 321M | p3_ir_buf++; |
715 | 321M | *p2 = cor; |
716 | 321M | cor += (*p2_ir_buf) * (*p3_ir_buf); |
717 | 321M | p2_ir_buf++; |
718 | 321M | p3_ir_buf++; |
719 | 321M | *p1 = cor; |
720 | 321M | cor += (*p2_ir_buf) * (*p3_ir_buf); |
721 | 321M | p2_ir_buf++; |
722 | 321M | p3_ir_buf++; |
723 | 321M | *p0 = cor; |
724 | 321M | p3 -= (16 + 1); |
725 | 321M | p2 -= (16 + 1); |
726 | 321M | p1 -= (16 + 1); |
727 | 321M | p0 -= (16 + 1); |
728 | 321M | } |
729 | 42.8M | cor += (*p2_ir_buf) * (*p3_ir_buf); |
730 | 42.8M | p2_ir_buf++; |
731 | 42.8M | p3_ir_buf++; |
732 | 42.8M | *p3 = cor; |
733 | 42.8M | pos--; |
734 | 42.8M | p4_ir_buf += 4; |
735 | 42.8M | } |
736 | | |
737 | 2.68M | p0 = &corr_p1p2[0][0]; |
738 | 13.4M | for (k = 0; k < 4; k++) { |
739 | 182M | for (i = k; i < LEN_SUBFR; i += 4) { |
740 | 171M | psign = sign; |
741 | 171M | if (psign[i] < 0.0F) { |
742 | 87.5M | psign = vec; |
743 | 87.5M | } |
744 | 171M | j = (k + 1) % 4; |
745 | 171M | p0[0] = p0[0] * psign[j]; |
746 | 171M | p0[1] = p0[1] * psign[j + 4]; |
747 | 171M | p0[2] = p0[2] * psign[j + 8]; |
748 | 171M | p0[3] = p0[3] * psign[j + 12]; |
749 | 171M | p0[4] = p0[4] * psign[j + 16]; |
750 | 171M | p0[5] = p0[5] * psign[j + 20]; |
751 | 171M | p0[6] = p0[6] * psign[j + 24]; |
752 | 171M | p0[7] = p0[7] * psign[j + 28]; |
753 | 171M | p0[8] = p0[8] * psign[j + 32]; |
754 | 171M | p0[9] = p0[9] * psign[j + 36]; |
755 | 171M | p0[10] = p0[10] * psign[j + 40]; |
756 | 171M | p0[11] = p0[11] * psign[j + 44]; |
757 | 171M | p0[12] = p0[12] * psign[j + 48]; |
758 | 171M | p0[13] = p0[13] * psign[j + 52]; |
759 | 171M | p0[14] = p0[14] * psign[j + 56]; |
760 | 171M | p0[15] = p0[15] * psign[j + 60]; |
761 | 171M | p0 += 16; |
762 | 171M | } |
763 | 10.7M | } |
764 | 2.68M | psk = -1.0; |
765 | 2.68M | alpk = 1.0; |
766 | 11.0M | for (k = 0; k < num_iter; k++) { |
767 | 46.7M | for (i = 0; i < num_pulses - (num_pulses % 3); i += 3) { |
768 | 38.4M | ipos[i] = iusace_acelp_ipos[(k * 4) + i]; |
769 | 38.4M | ipos[i + 1] = iusace_acelp_ipos[(k * 4) + i + 1]; |
770 | 38.4M | ipos[i + 2] = iusace_acelp_ipos[(k * 4) + i + 2]; |
771 | 38.4M | } |
772 | 17.5M | for (; i < num_pulses; i++) { |
773 | 9.18M | ipos[i] = iusace_acelp_ipos[(k * 4) + i]; |
774 | 9.18M | } |
775 | | |
776 | 8.35M | if ((num_bits_cb == 20) | (num_bits_cb == 28) | (num_bits_cb == 12) | (num_bits_cb == 16)) { |
777 | 0 | pos = 0; |
778 | 0 | ps = 0.0F; |
779 | 0 | alp = 0.0F; |
780 | 0 | memset(vec, 0, LEN_SUBFR * sizeof(FLOAT32)); |
781 | 0 | if (num_bits_cb == 28) { |
782 | 0 | ipos[4] = 0; |
783 | 0 | ipos[5] = 1; |
784 | 0 | } |
785 | |
|
786 | 0 | if (num_bits_cb == 16) { |
787 | 0 | ipos[0] = 0; |
788 | 0 | ipos[1] = 2; |
789 | 0 | ipos[2] = 1; |
790 | 0 | ipos[3] = 3; |
791 | 0 | } |
792 | 8.35M | } else if ((num_bits_cb == 36) | (num_bits_cb == 44)) { |
793 | 1.04M | pos = 2; |
794 | 1.04M | pulse_pos[0] = pos_max[ipos[0]]; |
795 | 1.04M | pulse_pos[1] = pos_max[ipos[1]]; |
796 | 1.04M | ps = corr_input[pulse_pos[0]] + corr_input[pulse_pos[1]]; |
797 | 1.04M | alp = corr_ir[ipos[0]][pulse_pos[0] >> 2] + corr_ir[ipos[1]][pulse_pos[1] >> 2] + |
798 | 1.04M | corr_p1p2[ipos[0]][((pulse_pos[0] >> 2) << 4) + (pulse_pos[1] >> 2)]; |
799 | 1.04M | if (sign[pulse_pos[0]] < 0.0) { |
800 | 584k | p0 = ir_sign_inv - pulse_pos[0]; |
801 | 584k | } else { |
802 | 455k | p0 = p1_ir_buf - pulse_pos[0]; |
803 | 455k | } |
804 | 1.04M | if (sign[pulse_pos[1]] < 0.0) { |
805 | 584k | p1 = ir_sign_inv - pulse_pos[1]; |
806 | 584k | } else { |
807 | 455k | p1 = p1_ir_buf - pulse_pos[1]; |
808 | 455k | } |
809 | 1.04M | vec[0] = p0[0] + p1[0]; |
810 | 1.04M | vec[1] = p0[1] + p1[1]; |
811 | 1.04M | vec[2] = p0[2] + p1[2]; |
812 | 1.04M | vec[3] = p0[3] + p1[3]; |
813 | 11.4M | for (i = 4; i < LEN_SUBFR; i += 6) { |
814 | 10.4M | vec[i] = p0[i] + p1[i]; |
815 | 10.4M | vec[i + 1] = p0[i + 1] + p1[i + 1]; |
816 | 10.4M | vec[i + 2] = p0[i + 2] + p1[i + 2]; |
817 | 10.4M | vec[i + 3] = p0[i + 3] + p1[i + 3]; |
818 | 10.4M | vec[i + 4] = p0[i + 4] + p1[i + 4]; |
819 | 10.4M | vec[i + 5] = p0[i + 5] + p1[i + 5]; |
820 | 10.4M | } |
821 | 1.04M | if (num_bits_cb == 44) { |
822 | 0 | ipos[8] = 0; |
823 | 0 | ipos[9] = 1; |
824 | 0 | } |
825 | 7.31M | } else { |
826 | 7.31M | pos = 4; |
827 | 7.31M | pulse_pos[0] = pos_max[ipos[0]]; |
828 | 7.31M | pulse_pos[1] = pos_max[ipos[1]]; |
829 | 7.31M | pulse_pos[2] = pos_max[ipos[2]]; |
830 | 7.31M | pulse_pos[3] = pos_max[ipos[3]]; |
831 | 7.31M | ps = corr_input[pulse_pos[0]] + corr_input[pulse_pos[1]] + corr_input[pulse_pos[2]] + |
832 | 7.31M | corr_input[pulse_pos[3]]; |
833 | 7.31M | p0 = p1_ir_buf - pulse_pos[0]; |
834 | 7.31M | if (sign[pulse_pos[0]] < 0.0) { |
835 | 3.58M | p0 = ir_sign_inv - pulse_pos[0]; |
836 | 3.58M | } |
837 | 7.31M | p1 = p1_ir_buf - pulse_pos[1]; |
838 | 7.31M | if (sign[pulse_pos[1]] < 0.0) { |
839 | 3.58M | p1 = ir_sign_inv - pulse_pos[1]; |
840 | 3.58M | } |
841 | 7.31M | p2 = p1_ir_buf - pulse_pos[2]; |
842 | 7.31M | if (sign[pulse_pos[2]] < 0.0) { |
843 | 3.57M | p2 = ir_sign_inv - pulse_pos[2]; |
844 | 3.57M | } |
845 | 7.31M | p3 = p1_ir_buf - pulse_pos[3]; |
846 | 7.31M | if (sign[pulse_pos[3]] < 0.0) { |
847 | 3.56M | p3 = ir_sign_inv - pulse_pos[3]; |
848 | 3.56M | } |
849 | 7.31M | vec[0] = p0[0] + p1[0] + p2[0] + p3[0]; |
850 | 160M | for (i = 1; i < LEN_SUBFR; i += 3) { |
851 | 153M | vec[i] = p0[i] + p1[i] + p2[i] + p3[i]; |
852 | 153M | vec[i + 1] = p0[i + 1] + p1[i + 1] + p2[i + 1] + p3[i + 1]; |
853 | 153M | vec[i + 2] = p0[i + 2] + p1[i + 2] + p2[i + 2] + p3[i + 2]; |
854 | 153M | } |
855 | 7.31M | alp = 0.0F; |
856 | 7.31M | alp += vec[0] * vec[0] + vec[1] * vec[1]; |
857 | 7.31M | alp += vec[2] * vec[2] + vec[3] * vec[3]; |
858 | 80.4M | for (i = 4; i < LEN_SUBFR; i += 6) { |
859 | 73.1M | alp += vec[i] * vec[i]; |
860 | 73.1M | alp += vec[i + 1] * vec[i + 1]; |
861 | 73.1M | alp += vec[i + 2] * vec[i + 2]; |
862 | 73.1M | alp += vec[i + 3] * vec[i + 3]; |
863 | 73.1M | alp += vec[i + 4] * vec[i + 4]; |
864 | 73.1M | alp += vec[i + 5] * vec[i + 5]; |
865 | 73.1M | } |
866 | 7.31M | alp *= 0.5F; |
867 | 7.31M | if (num_bits_cb == 72) { |
868 | 0 | ipos[16] = 0; |
869 | 0 | ipos[17] = 1; |
870 | 0 | } |
871 | 7.31M | } |
872 | | |
873 | 54.9M | for (j = pos, st = 0; j < num_pulses; j += 2, st++) { |
874 | 46.5M | if ((num_pulses - j) >= 2) { |
875 | 46.5M | iusace_acelp_ir_vec_corr1(p1_ir_buf, vec, ipos[j], sign, corr_ir, corr_x, dn2_pos, |
876 | 46.5M | num_pulse_position[st]); |
877 | 46.5M | iusace_acelp_ir_vec_corr2(p1_ir_buf, vec, ipos[j + 1], sign, corr_ir, corr_y); |
878 | | |
879 | 46.5M | iusace_acelp_get_2p_pos(num_pulse_position[st], ipos[j], ipos[j + 1], &ps, &alp, |
880 | 46.5M | &pulse_pos[j], &pulse_pos[j + 1], corr_input, dn2_pos, corr_x, |
881 | 46.5M | corr_y, corr_p1p2); |
882 | 46.5M | } else { |
883 | 0 | iusace_acelp_ir_vec_corr2(p1_ir_buf, vec, ipos[j], sign, corr_ir, corr_x); |
884 | 0 | iusace_acelp_ir_vec_corr2(p1_ir_buf, vec, ipos[j + 1], sign, corr_ir, corr_y); |
885 | 0 | iusace_acelp_get_1p_pos(ipos[j], ipos[j + 1], &ps, &alp, &pulse_pos[j], corr_input, |
886 | 0 | corr_x, corr_y); |
887 | 0 | } |
888 | 46.5M | if (j < (num_pulses - 2)) { |
889 | 38.2M | p0 = p1_ir_buf - pulse_pos[j]; |
890 | 38.2M | if (sign[pulse_pos[j]] < 0.0) { |
891 | 19.0M | p0 = ir_sign_inv - pulse_pos[j]; |
892 | 19.0M | } |
893 | 38.2M | p1 = p1_ir_buf - pulse_pos[j + 1]; |
894 | 38.2M | if (sign[pulse_pos[j + 1]] < 0.0) { |
895 | 18.8M | p1 = ir_sign_inv - pulse_pos[j + 1]; |
896 | 18.8M | } |
897 | 38.2M | vec[0] += p0[0] + p1[0]; |
898 | 38.2M | vec[1] += p0[1] + p1[1]; |
899 | 38.2M | vec[2] += p0[2] + p1[2]; |
900 | 38.2M | vec[3] += p0[3] + p1[3]; |
901 | 420M | for (i = 4; i < LEN_SUBFR; i += 6) { |
902 | 382M | vec[i] += p0[i] + p1[i]; |
903 | 382M | vec[i + 1] += p0[i + 1] + p1[i + 1]; |
904 | 382M | vec[i + 2] += p0[i + 2] + p1[i + 2]; |
905 | 382M | vec[i + 3] += p0[i + 3] + p1[i + 3]; |
906 | 382M | vec[i + 4] += p0[i + 4] + p1[i + 4]; |
907 | 382M | vec[i + 5] += p0[i + 5] + p1[i + 5]; |
908 | 382M | } |
909 | 38.2M | } |
910 | 46.5M | } |
911 | 8.35M | ps = ps * ps; |
912 | 8.35M | s = (alpk * ps) - (psk * alp); |
913 | 8.35M | if (s > 0.0F) { |
914 | 4.96M | psk = ps; |
915 | 4.96M | alpk = alp; |
916 | 4.96M | memcpy(codvec, pulse_pos, num_pulses * sizeof(WORD32)); |
917 | 4.96M | } |
918 | 8.35M | } |
919 | | |
920 | 2.68M | memset(alg_cb_exc_out, 0, LEN_SUBFR * sizeof(WORD16)); |
921 | 2.68M | memset(filt_cb_exc, 0, LEN_SUBFR * sizeof(FLOAT32)); |
922 | 2.68M | memset(pulse_pos, 0xffffffff, NPMAXPT * 4 * sizeof(WORD32)); |
923 | 43.2M | for (k = 0; k < num_pulses; k++) { |
924 | 40.5M | i = codvec[k]; |
925 | 40.5M | val = sign[i]; |
926 | 40.5M | index = i / 4; |
927 | 40.5M | track = i % 4; |
928 | 40.5M | if (val > 0) { |
929 | 20.5M | alg_cb_exc_out[i] += 512; |
930 | 20.5M | codvec[k] += (2 * LEN_SUBFR); |
931 | 20.5M | } else { |
932 | 20.0M | alg_cb_exc_out[i] -= 512; |
933 | 20.0M | index += 16; |
934 | 20.0M | } |
935 | 40.5M | i = track * NPMAXPT; |
936 | 99.1M | while (pulse_pos[i] >= 0) { |
937 | 58.5M | i++; |
938 | 58.5M | } |
939 | 40.5M | pulse_pos[i] = index; |
940 | 40.5M | p0 = ir_sign_inv - codvec[k]; |
941 | 40.5M | filt_cb_exc[0] += p0[0]; |
942 | 893M | for (i = 1; i < LEN_SUBFR; i += 3) { |
943 | 852M | filt_cb_exc[i] += p0[i]; |
944 | 852M | filt_cb_exc[i + 1] += p0[i + 1]; |
945 | 852M | filt_cb_exc[i + 2] += p0[i + 2]; |
946 | 852M | } |
947 | 40.5M | } |
948 | | |
949 | 2.68M | if (num_bits_cb == ACELP_NUM_BITS_20) { |
950 | 0 | for (track = 0; track < 4; track++) { |
951 | 0 | k = track * NPMAXPT; |
952 | 0 | acelp_param_out[track] = iusace_acelp_quant_1p_n1bits(pulse_pos[k], 4); |
953 | 0 | } |
954 | 2.68M | } else if (num_bits_cb == ACELP_NUM_BITS_28) { |
955 | 0 | for (track = 0; track < (4 - 2); track++) { |
956 | 0 | k = track * NPMAXPT; |
957 | 0 | acelp_param_out[track] = iusace_acelp_quant_2p_2n1bits(pulse_pos[k], pulse_pos[k + 1], 4); |
958 | 0 | } |
959 | 0 | for (track = 2; track < 4; track++) { |
960 | 0 | k = track * NPMAXPT; |
961 | 0 | acelp_param_out[track] = iusace_acelp_quant_1p_n1bits(pulse_pos[k], 4); |
962 | 0 | } |
963 | 2.68M | } else if (num_bits_cb == ACELP_NUM_BITS_36) { |
964 | 1.30M | for (track = 0; track < 4; track++) { |
965 | 1.04M | k = track * NPMAXPT; |
966 | 1.04M | acelp_param_out[track] = iusace_acelp_quant_2p_2n1bits(pulse_pos[k], pulse_pos[k + 1], 4); |
967 | 1.04M | } |
968 | 2.42M | } else if (num_bits_cb == ACELP_NUM_BITS_44) { |
969 | 0 | for (track = 0; track < (4 - 2); track++) { |
970 | 0 | k = track * NPMAXPT; |
971 | 0 | acelp_param_out[track] = |
972 | 0 | iusace_acelp_quant_3p_3n1bits(pulse_pos[k], pulse_pos[k + 1], pulse_pos[k + 2], 4); |
973 | 0 | } |
974 | 0 | for (track = 2; track < 4; track++) { |
975 | 0 | k = track * NPMAXPT; |
976 | 0 | acelp_param_out[track] = iusace_acelp_quant_2p_2n1bits(pulse_pos[k], pulse_pos[k + 1], 4); |
977 | 0 | } |
978 | 2.42M | } else if (num_bits_cb == ACELP_NUM_BITS_52) { |
979 | 257k | for (track = 0; track < 4; track++) { |
980 | 206k | k = track * NPMAXPT; |
981 | 206k | acelp_param_out[track] = |
982 | 206k | iusace_acelp_quant_3p_3n1bits(pulse_pos[k], pulse_pos[k + 1], pulse_pos[k + 2], 4); |
983 | 206k | } |
984 | 2.36M | } else if (num_bits_cb == ACELP_NUM_BITS_64) { |
985 | 11.8M | for (track = 0; track < 4; track++) { |
986 | 9.47M | k = track * NPMAXPT; |
987 | 9.47M | l_index = iusace_acelp_quant_4p_4nbits(&pulse_pos[k], 4); |
988 | 9.47M | acelp_param_out[track] = ((l_index >> 14) & 3); |
989 | 9.47M | acelp_param_out[track + 4] = (l_index & 0x3FFF); |
990 | 9.47M | } |
991 | 2.36M | } else if (num_bits_cb == ACELP_NUM_BITS_72) { |
992 | 0 | for (track = 0; track < (4 - 2); track++) { |
993 | 0 | k = track * NPMAXPT; |
994 | 0 | l_index = iusace_acelp_quant_5p_5nbits(&pulse_pos[k], 4); |
995 | 0 | acelp_param_out[track] = ((l_index >> 10) & 0x03FF); |
996 | 0 | acelp_param_out[track + 4] = (l_index & 0x03FF); |
997 | 0 | } |
998 | 0 | for (track = 2; track < 4; track++) { |
999 | 0 | k = track * NPMAXPT; |
1000 | 0 | l_index = iusace_acelp_quant_4p_4nbits(&pulse_pos[k], 4); |
1001 | 0 | acelp_param_out[track] = ((l_index >> 14) & 3); |
1002 | 0 | acelp_param_out[track + 4] = (l_index & 0x3FFF); |
1003 | 0 | } |
1004 | 0 | } else if (num_bits_cb == ACELP_NUM_BITS_88) { |
1005 | 0 | for (track = 0; track < 4; track++) { |
1006 | 0 | k = track * NPMAXPT; |
1007 | 0 | l_index = iusace_acelp_quant_6p_6n_2bits(&pulse_pos[k], 4); |
1008 | 0 | acelp_param_out[track] = ((l_index >> 11) & 0x07FF); |
1009 | 0 | acelp_param_out[track + 4] = (l_index & 0x07FF); |
1010 | 0 | } |
1011 | 0 | } |
1012 | 2.68M | return; |
1013 | 2.68M | } |
1014 | | |
1015 | 2.68M | VOID iusace_acelp_ltpred_cb_exc(FLOAT32 *exc, WORD32 t0, WORD32 t0_frac, WORD32 len_subfrm) { |
1016 | 2.68M | WORD32 i, j; |
1017 | 2.68M | FLOAT32 s, *x0, *x1, *x2; |
1018 | 2.68M | const FLOAT32 *c1, *c2; |
1019 | | |
1020 | 2.68M | x0 = &exc[-t0]; |
1021 | 2.68M | t0_frac = -t0_frac; |
1022 | 2.68M | if (t0_frac < 0) { |
1023 | 1.41M | t0_frac += T_UP_SAMP; |
1024 | 1.41M | x0--; |
1025 | 1.41M | } |
1026 | 176M | for (j = 0; j < len_subfrm; j++) { |
1027 | 174M | x1 = x0++; |
1028 | 174M | x2 = x1 + 1; |
1029 | 174M | c1 = &iusace_res_interp_filter1_4[t0_frac]; |
1030 | 174M | c2 = &iusace_res_interp_filter1_4[T_UP_SAMP - t0_frac]; |
1031 | 174M | s = 0.0; |
1032 | 2.96G | for (i = 0; i < INTER_LP_FIL_ORDER; i++, c1 += T_UP_SAMP, c2 += T_UP_SAMP) { |
1033 | 2.78G | s += (*x1--) * (*c1) + (*x2++) * (*c2); |
1034 | 2.78G | } |
1035 | 174M | exc[j] = s; |
1036 | 174M | } |
1037 | 2.68M | } |
1038 | | |
1039 | | VOID iusace_acelp_quant_gain(FLOAT32 *code, FLOAT32 *pitch_gain, FLOAT32 *code_gain, |
1040 | 2.68M | FLOAT32 *tgt_cb_corr_data, FLOAT32 mean_energy, WORD32 *qunt_idx) { |
1041 | 2.68M | WORD32 i, indice = 0, min_pitch_idx; |
1042 | 2.68M | FLOAT32 ener_code, pred_code_gain; |
1043 | 2.68M | FLOAT32 dist, dist_min, g_pitch, g_code; |
1044 | 2.68M | const FLOAT32 *p1_qua_gain_table, *p2_qua_gain_table; |
1045 | | |
1046 | 2.68M | p1_qua_gain_table = iusace_acelp_quant_gain_table; |
1047 | 2.68M | p2_qua_gain_table = (const FLOAT32 *)(iusace_acelp_quant_gain_table + ACELP_GAIN_TBL_OFFSET); |
1048 | 2.68M | min_pitch_idx = 0; |
1049 | 2.68M | g_pitch = *pitch_gain; |
1050 | 174M | for (i = 0; i < ACELP_RANGE_GAIN_PT_IDX_SEARCH; i++, p2_qua_gain_table += 2) { |
1051 | 171M | if (g_pitch > *p2_qua_gain_table) { |
1052 | 49.0M | continue; |
1053 | 49.0M | } |
1054 | 171M | } |
1055 | 2.68M | ener_code = 0.01F; |
1056 | | |
1057 | 174M | for (i = 0; i < LEN_SUBFR; i++) { |
1058 | 171M | ener_code += code[i] * code[i]; |
1059 | 171M | } |
1060 | | |
1061 | 2.68M | ener_code = (FLOAT32)(10.0 * log10(ener_code / (FLOAT32)LEN_SUBFR)); |
1062 | 2.68M | pred_code_gain = mean_energy - ener_code; |
1063 | 2.68M | pred_code_gain = (FLOAT32)pow(10.0, pred_code_gain / 20.0); |
1064 | | |
1065 | 2.68M | dist_min = MAX_FLT_VAL; |
1066 | 2.68M | p2_qua_gain_table = (const FLOAT32 *)(p1_qua_gain_table + min_pitch_idx * 2); |
1067 | 345M | for (i = 0; i < ACELP_SEARCH_RANGE_QUANTIZER_IDX; i++) { |
1068 | 343M | g_pitch = *p2_qua_gain_table++; |
1069 | 343M | g_code = pred_code_gain * *p2_qua_gain_table++; |
1070 | 343M | dist = g_pitch * g_pitch * tgt_cb_corr_data[0] + g_pitch * tgt_cb_corr_data[1] + |
1071 | 343M | g_code * g_code * tgt_cb_corr_data[2] + g_code * tgt_cb_corr_data[3] + |
1072 | 343M | g_pitch * g_code * tgt_cb_corr_data[4]; |
1073 | 343M | if (dist < dist_min) { |
1074 | 17.6M | dist_min = dist; |
1075 | 17.6M | indice = i; |
1076 | 17.6M | } |
1077 | 343M | } |
1078 | 2.68M | indice += min_pitch_idx; |
1079 | 2.68M | *pitch_gain = p1_qua_gain_table[indice * 2]; |
1080 | 2.68M | *code_gain = p1_qua_gain_table[indice * 2 + 1] * pred_code_gain; |
1081 | 2.68M | *qunt_idx = indice; |
1082 | 2.68M | } |