/src/adhd/cras/src/dsp/eq2.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* Copyright 2013 The ChromiumOS Authors |
2 | | * Use of this source code is governed by a BSD-style license that can be |
3 | | * found in the LICENSE file. |
4 | | */ |
5 | | |
6 | | #include "cras/src/dsp/eq2.h" |
7 | | |
8 | | #include <errno.h> |
9 | | #include <stdlib.h> |
10 | | |
11 | | #include "cras/src/dsp/biquad.h" |
12 | | #include "user/eq.h" |
13 | | |
14 | 0 | #define EQ2_NUM_CHANNELS 2 |
15 | | |
16 | | struct eq2 { |
17 | | int n[EQ2_NUM_CHANNELS]; |
18 | | struct biquad biquad[MAX_BIQUADS_PER_EQ2][EQ2_NUM_CHANNELS]; |
19 | | }; |
20 | | |
21 | 0 | struct eq2* eq2_new() { |
22 | 0 | int i, j; |
23 | 0 | struct eq2* eq2 = calloc(1, sizeof(*eq2)); |
24 | 0 | if (!eq2) { |
25 | 0 | return NULL; |
26 | 0 | } |
27 | | |
28 | | /* Initialize all biquads to identity filter, so if two channels have |
29 | | * different numbers of biquads, it still works. */ |
30 | 0 | for (i = 0; i < MAX_BIQUADS_PER_EQ2; i++) { |
31 | 0 | for (j = 0; j < EQ2_NUM_CHANNELS; j++) { |
32 | 0 | biquad_set(&eq2->biquad[i][j], BQ_NONE, 0, 0, 0); |
33 | 0 | } |
34 | 0 | } |
35 | |
|
36 | 0 | return eq2; |
37 | 0 | } |
38 | | |
39 | 0 | void eq2_free(struct eq2* eq2) { |
40 | 0 | free(eq2); |
41 | 0 | } |
42 | | |
43 | | int eq2_append_biquad(struct eq2* eq2, |
44 | | int channel, |
45 | | enum biquad_type type, |
46 | | float freq, |
47 | | float Q, |
48 | 0 | float gain) { |
49 | 0 | if (eq2->n[channel] >= MAX_BIQUADS_PER_EQ2) { |
50 | 0 | return -EINVAL; |
51 | 0 | } |
52 | 0 | biquad_set(&eq2->biquad[eq2->n[channel]++][channel], type, freq, Q, gain); |
53 | 0 | return 0; |
54 | 0 | } |
55 | | |
56 | | int eq2_append_biquad_direct(struct eq2* eq2, |
57 | | int channel, |
58 | 0 | const struct biquad* biquad) { |
59 | 0 | if (eq2->n[channel] >= MAX_BIQUADS_PER_EQ2) { |
60 | 0 | return -EINVAL; |
61 | 0 | } |
62 | 0 | eq2->biquad[eq2->n[channel]++][channel] = *biquad; |
63 | 0 | return 0; |
64 | 0 | } |
65 | | |
66 | | static inline void eq2_process_one(struct biquad (*bq)[2], |
67 | | float* data0, |
68 | | float* data1, |
69 | 0 | int count) { |
70 | 0 | struct biquad* qL = &bq[0][0]; |
71 | 0 | struct biquad* qR = &bq[0][1]; |
72 | |
|
73 | 0 | float x1L = qL->x1; |
74 | 0 | float x2L = qL->x2; |
75 | 0 | float y1L = qL->y1; |
76 | 0 | float y2L = qL->y2; |
77 | 0 | float b0L = qL->b0; |
78 | 0 | float b1L = qL->b1; |
79 | 0 | float b2L = qL->b2; |
80 | 0 | float a1L = qL->a1; |
81 | 0 | float a2L = qL->a2; |
82 | |
|
83 | 0 | float x1R = qR->x1; |
84 | 0 | float x2R = qR->x2; |
85 | 0 | float y1R = qR->y1; |
86 | 0 | float y2R = qR->y2; |
87 | 0 | float b0R = qR->b0; |
88 | 0 | float b1R = qR->b1; |
89 | 0 | float b2R = qR->b2; |
90 | 0 | float a1R = qR->a1; |
91 | 0 | float a2R = qR->a2; |
92 | |
|
93 | 0 | int j; |
94 | 0 | for (j = 0; j < count; j++) { |
95 | 0 | float xL = data0[j]; |
96 | 0 | float xR = data1[j]; |
97 | |
|
98 | 0 | float yL = b0L * xL + b1L * x1L + b2L * x2L - a1L * y1L - a2L * y2L; |
99 | 0 | x2L = x1L; |
100 | 0 | x1L = xL; |
101 | 0 | y2L = y1L; |
102 | 0 | y1L = yL; |
103 | |
|
104 | 0 | float yR = b0R * xR + b1R * x1R + b2R * x2R - a1R * y1R - a2R * y2R; |
105 | 0 | x2R = x1R; |
106 | 0 | x1R = xR; |
107 | 0 | y2R = y1R; |
108 | 0 | y1R = yR; |
109 | |
|
110 | 0 | data0[j] = yL; |
111 | 0 | data1[j] = yR; |
112 | 0 | } |
113 | |
|
114 | 0 | qL->x1 = x1L; |
115 | 0 | qL->x2 = x2L; |
116 | 0 | qL->y1 = y1L; |
117 | 0 | qL->y2 = y2L; |
118 | 0 | qR->x1 = x1R; |
119 | 0 | qR->x2 = x2R; |
120 | 0 | qR->y1 = y1R; |
121 | 0 | qR->y2 = y2R; |
122 | 0 | } |
123 | | |
124 | | #ifdef __ARM_NEON__ |
125 | | #include <arm_neon.h> |
126 | | static inline void eq2_process_two_neon(struct biquad (*bq)[2], |
127 | | float* data0, |
128 | | float* data1, |
129 | | int count) { |
130 | | struct biquad* qL = &bq[0][0]; |
131 | | struct biquad* rL = &bq[1][0]; |
132 | | struct biquad* qR = &bq[0][1]; |
133 | | struct biquad* rR = &bq[1][1]; |
134 | | |
135 | | float32x2_t x1 = {qL->x1, qR->x1}; |
136 | | float32x2_t x2 = {qL->x2, qR->x2}; |
137 | | float32x2_t y1 = {qL->y1, qR->y1}; |
138 | | float32x2_t y2 = {qL->y2, qR->y2}; |
139 | | float32x2_t qb0 = {qL->b0, qR->b0}; |
140 | | float32x2_t qb1 = {qL->b1, qR->b1}; |
141 | | float32x2_t qb2 = {qL->b2, qR->b2}; |
142 | | float32x2_t qa1 = {qL->a1, qR->a1}; |
143 | | float32x2_t qa2 = {qL->a2, qR->a2}; |
144 | | |
145 | | float32x2_t z1 = {rL->y1, rR->y1}; |
146 | | float32x2_t z2 = {rL->y2, rR->y2}; |
147 | | float32x2_t rb0 = {rL->b0, rR->b0}; |
148 | | float32x2_t rb1 = {rL->b1, rR->b1}; |
149 | | float32x2_t rb2 = {rL->b2, rR->b2}; |
150 | | float32x2_t ra1 = {rL->a1, rR->a1}; |
151 | | float32x2_t ra2 = {rL->a2, rR->a2}; |
152 | | |
153 | | // clang-format off |
154 | | __asm__ __volatile__( |
155 | | // d0 = x, d1 = y, d2 = z |
156 | | "1: \n" |
157 | | "vmul.f32 d1, %P[qb1], %P[x1] \n" |
158 | | "vld1.32 d0[0], [%[data0]] \n" |
159 | | "vld1.32 d0[1], [%[data1]] \n" |
160 | | "subs %[count], #1 \n" |
161 | | "vmul.f32 d2, %P[rb1], %P[y1] \n" |
162 | | "vmla.f32 d1, %P[qb0], d0 \n" |
163 | | "vmla.f32 d1, %P[qb2], %P[x2] \n" |
164 | | "vmov.f32 %P[x2], %P[x1] \n" |
165 | | "vmov.f32 %P[x1], d0 \n" |
166 | | "vmls.f32 d1, %P[qa1], %P[y1] \n" |
167 | | "vmls.f32 d1, %P[qa2], %P[y2] \n" |
168 | | "vmla.f32 d2, %P[rb0], d1 \n" |
169 | | "vmla.f32 d2, %P[rb2], %P[y2] \n" |
170 | | "vmov.f32 %P[y2], %P[y1] \n" |
171 | | "vmov.f32 %P[y1], d1 \n" |
172 | | "vmls.f32 d2, %P[ra1], %P[z1] \n" |
173 | | "vmls.f32 d2, %P[ra2], %P[z2] \n" |
174 | | "vmov.f32 %P[z2], %P[z1] \n" |
175 | | "vmov.f32 %P[z1], d2 \n" |
176 | | "vst1.f32 d2[0], [%[data0]]! \n" |
177 | | "vst1.f32 d2[1], [%[data1]]! \n" |
178 | | "bne 1b \n" |
179 | | : // output |
180 | | [data0]"+r"(data0), |
181 | | [data1]"+r"(data1), |
182 | | [count]"+r"(count), |
183 | | [x1]"+w"(x1), |
184 | | [x2]"+w"(x2), |
185 | | [y1]"+w"(y1), |
186 | | [y2]"+w"(y2), |
187 | | [z1]"+w"(z1), |
188 | | [z2]"+w"(z2) |
189 | | : // input |
190 | | [qb0]"w"(qb0), |
191 | | [qb1]"w"(qb1), |
192 | | [qb2]"w"(qb2), |
193 | | [qa1]"w"(qa1), |
194 | | [qa2]"w"(qa2), |
195 | | [rb0]"w"(rb0), |
196 | | [rb1]"w"(rb1), |
197 | | [rb2]"w"(rb2), |
198 | | [ra1]"w"(ra1), |
199 | | [ra2]"w"(ra2) |
200 | | : // clobber |
201 | | "d0", "d1", "d2", "memory", "cc"); |
202 | | // clang-format on |
203 | | |
204 | | qL->x1 = x1[0]; |
205 | | qL->x2 = x2[0]; |
206 | | qL->y1 = y1[0]; |
207 | | qL->y2 = y2[0]; |
208 | | rL->y1 = z1[0]; |
209 | | rL->y2 = z2[0]; |
210 | | qR->x1 = x1[1]; |
211 | | qR->x2 = x2[1]; |
212 | | qR->y1 = y1[1]; |
213 | | qR->y2 = y2[1]; |
214 | | rR->y1 = z1[1]; |
215 | | rR->y2 = z2[1]; |
216 | | } |
217 | | #endif |
218 | | |
219 | | #if defined(__SSE3__) && defined(__x86_64__) |
220 | | #include <emmintrin.h> |
221 | | static inline void eq2_process_two_sse3(struct biquad (*bq)[2], |
222 | | float* data0, |
223 | | float* data1, |
224 | | int count) { |
225 | | struct biquad* qL = &bq[0][0]; |
226 | | struct biquad* rL = &bq[1][0]; |
227 | | struct biquad* qR = &bq[0][1]; |
228 | | struct biquad* rR = &bq[1][1]; |
229 | | |
230 | | __m128 x1 = {qL->x1, qR->x1}; |
231 | | __m128 x2 = {qL->x2, qR->x2}; |
232 | | __m128 y1 = {qL->y1, qR->y1}; |
233 | | __m128 y2 = {qL->y2, qR->y2}; |
234 | | __m128 qb0 = {qL->b0, qR->b0}; |
235 | | __m128 qb1 = {qL->b1, qR->b1}; |
236 | | __m128 qb2 = {qL->b2, qR->b2}; |
237 | | __m128 qa1 = {qL->a1, qR->a1}; |
238 | | __m128 qa2 = {qL->a2, qR->a2}; |
239 | | |
240 | | __m128 z1 = {rL->y1, rR->y1}; |
241 | | __m128 z2 = {rL->y2, rR->y2}; |
242 | | __m128 rb0 = {rL->b0, rR->b0}; |
243 | | __m128 rb1 = {rL->b1, rR->b1}; |
244 | | __m128 rb2 = {rL->b2, rR->b2}; |
245 | | __m128 ra1 = {rL->a1, rR->a1}; |
246 | | __m128 ra2 = {rL->a2, rR->a2}; |
247 | | |
248 | | // clang-format off |
249 | | __asm__ __volatile__( |
250 | | "1: \n" |
251 | | "movss (%[data0]), %%xmm2 \n" |
252 | | "movss (%[data1]), %%xmm1 \n" |
253 | | "unpcklps %%xmm1, %%xmm2 \n" |
254 | | "mulps %[qb2],%[x2] \n" |
255 | | "lddqu %[qb0],%%xmm0 \n" |
256 | | "mulps %[ra2],%[z2] \n" |
257 | | "lddqu %[qb1],%%xmm1 \n" |
258 | | "mulps %%xmm2,%%xmm0 \n" |
259 | | "mulps %[x1],%%xmm1 \n" |
260 | | "addps %%xmm1,%%xmm0 \n" |
261 | | "movaps %[qa1],%%xmm1 \n" |
262 | | "mulps %[y1],%%xmm1 \n" |
263 | | "addps %[x2],%%xmm0 \n" |
264 | | "movaps %[rb1],%[x2] \n" |
265 | | "mulps %[y1],%[x2] \n" |
266 | | "subps %%xmm1,%%xmm0 \n" |
267 | | "movaps %[qa2],%%xmm1 \n" |
268 | | "mulps %[y2],%%xmm1 \n" |
269 | | "mulps %[rb2],%[y2] \n" |
270 | | "subps %%xmm1,%%xmm0 \n" |
271 | | "movaps %[rb0],%%xmm1 \n" |
272 | | "mulps %%xmm0,%%xmm1 \n" |
273 | | "addps %[x2],%%xmm1 \n" |
274 | | "movaps %[x1],%[x2] \n" |
275 | | "movaps %%xmm2,%[x1] \n" |
276 | | "addps %[y2],%%xmm1 \n" |
277 | | "movaps %[ra1],%[y2] \n" |
278 | | "mulps %[z1],%[y2] \n" |
279 | | "subps %[y2],%%xmm1 \n" |
280 | | "movaps %[y1],%[y2] \n" |
281 | | "movaps %%xmm0,%[y1] \n" |
282 | | "subps %[z2],%%xmm1 \n" |
283 | | "movaps %[z1],%[z2] \n" |
284 | | "movaps %%xmm1,%[z1] \n" |
285 | | "movss %%xmm1, (%[data0]) \n" |
286 | | "shufps $1, %%xmm1, %%xmm1 \n" |
287 | | "movss %%xmm1, (%[data1]) \n" |
288 | | "add $4, %[data0] \n" |
289 | | "add $4, %[data1] \n" |
290 | | "sub $1, %[count] \n" |
291 | | "jnz 1b \n" |
292 | | : // output |
293 | | [data0]"+r"(data0), |
294 | | [data1]"+r"(data1), |
295 | | [count]"+r"(count), |
296 | | [x1]"+x"(x1), |
297 | | [x2]"+x"(x2), |
298 | | [y1]"+x"(y1), |
299 | | [y2]"+x"(y2), |
300 | | [z1]"+x"(z1), |
301 | | [z2]"+x"(z2) |
302 | | : // input |
303 | | [qb0]"m"(qb0), |
304 | | [qb1]"m"(qb1), |
305 | | [qb2]"m"(qb2), |
306 | | [qa1]"x"(qa1), |
307 | | [qa2]"x"(qa2), |
308 | | [rb0]"x"(rb0), |
309 | | [rb1]"x"(rb1), |
310 | | [rb2]"x"(rb2), |
311 | | [ra1]"x"(ra1), |
312 | | [ra2]"x"(ra2) |
313 | | : // clobber |
314 | | "xmm0", "xmm1", "xmm2", "memory", "cc"); |
315 | | // clang-format on |
316 | | |
317 | | qL->x1 = x1[0]; |
318 | | qL->x2 = x2[0]; |
319 | | qL->y1 = y1[0]; |
320 | | qL->y2 = y2[0]; |
321 | | rL->y1 = z1[0]; |
322 | | rL->y2 = z2[0]; |
323 | | qR->x1 = x1[1]; |
324 | | qR->x2 = x2[1]; |
325 | | qR->y1 = y1[1]; |
326 | | qR->y2 = y2[1]; |
327 | | rR->y1 = z1[1]; |
328 | | rR->y2 = z2[1]; |
329 | | } |
330 | | #endif |
331 | | |
332 | 0 | void eq2_process(struct eq2* eq2, float* data0, float* data1, int count) { |
333 | 0 | int i; |
334 | 0 | int n; |
335 | 0 | if (!count) { |
336 | 0 | return; |
337 | 0 | } |
338 | 0 | n = eq2->n[0]; |
339 | 0 | if (eq2->n[1] > n) { |
340 | 0 | n = eq2->n[1]; |
341 | 0 | } |
342 | 0 | for (i = 0; i < n; i += EQ2_NUM_CHANNELS) { |
343 | 0 | if (i + 1 == n) { |
344 | 0 | eq2_process_one(&eq2->biquad[i], data0, data1, count); |
345 | 0 | } else { |
346 | | #if defined(__ARM_NEON__) |
347 | | eq2_process_two_neon(&eq2->biquad[i], data0, data1, count); |
348 | | #elif defined(__SSE3__) && defined(__x86_64__) |
349 | | eq2_process_two_sse3(&eq2->biquad[i], data0, data1, count); |
350 | | #else |
351 | 0 | eq2_process_one(&eq2->biquad[i], data0, data1, count); |
352 | 0 | eq2_process_one(&eq2->biquad[i + 1], data0, data1, count); |
353 | 0 | #endif |
354 | 0 | } |
355 | 0 | } |
356 | 0 | } |
357 | | |
358 | | int eq2_convert_channel_response(struct eq2* eq2, |
359 | | int32_t* bq_cfg, |
360 | 0 | int channel) { |
361 | 0 | float accumulated_gain = 1.0; |
362 | 0 | int ret; |
363 | |
|
364 | 0 | for (int i = 0; i < eq2->n[channel]; i++) { |
365 | 0 | struct biquad* bq = &eq2->biquad[i][channel]; |
366 | | |
367 | | /* For i = 0..(n-2), accumulated_gain is kept accumulating in loop. |
368 | | * For i = n-1, the last biquad element, accumulated_gain is dumped to the |
369 | | * converted blob by calling biquad_convert_blob() with dump_gain = 1. |
370 | | * To prevent the sample saturation on each node across the series of biquad |
371 | | * as the channel response intermediate nodes, considering that DSP EQ is |
372 | | * the fixed-point design. |
373 | | */ |
374 | 0 | ret = biquad_convert_blob(bq, bq_cfg, &accumulated_gain, |
375 | 0 | (i == eq2->n[channel] - 1) /* dump_gain */); |
376 | 0 | if (ret < 0) { |
377 | 0 | return ret; |
378 | 0 | } |
379 | 0 | bq_cfg += SOF_EQ_IIR_NBIQUAD; |
380 | 0 | } |
381 | 0 | return 0; |
382 | 0 | } |
383 | | |
384 | | int eq2_convert_params_to_blob(struct eq2* eq2, |
385 | | uint32_t** config, |
386 | 0 | size_t* config_size) { |
387 | 0 | const size_t biquad_size = sizeof(struct sof_eq_iir_biquad); |
388 | 0 | const size_t eq_iir_hdr_size = sizeof(struct sof_eq_iir_header); |
389 | 0 | const size_t eq_cfg_hdr_size = sizeof(struct sof_eq_iir_config); |
390 | |
|
391 | 0 | if (!eq2) { |
392 | 0 | return -ENOENT; |
393 | 0 | } |
394 | | |
395 | 0 | if (eq2->n[0] <= 0 || eq2->n[1] <= 0) { |
396 | 0 | return -ENODATA; |
397 | 0 | } |
398 | | |
399 | 0 | size_t response_config_size[EQ2_NUM_CHANNELS] = { |
400 | 0 | eq_iir_hdr_size + eq2->n[0] * biquad_size, /* response of ch-0 */ |
401 | 0 | eq_iir_hdr_size + eq2->n[1] * biquad_size /* response of ch-1 */ |
402 | 0 | }; |
403 | |
|
404 | 0 | size_t size = |
405 | 0 | eq_cfg_hdr_size + /* sof_eq_iir_config header */ |
406 | 0 | EQ2_NUM_CHANNELS * sizeof(uint32_t) + /* assign_response[channels] */ |
407 | 0 | response_config_size[0] + /* 1st response config data */ |
408 | 0 | response_config_size[1]; /* 2nd response config data */ |
409 | |
|
410 | 0 | struct sof_eq_iir_config* eq_config = |
411 | 0 | (struct sof_eq_iir_config*)calloc(1, size); |
412 | 0 | if (!eq_config) { |
413 | 0 | return -ENOMEM; |
414 | 0 | } |
415 | | |
416 | | /* Fill sof_eq_iir_config header. */ |
417 | 0 | eq_config->size = size; |
418 | 0 | eq_config->channels_in_config = EQ2_NUM_CHANNELS; |
419 | 0 | eq_config->number_of_responses = EQ2_NUM_CHANNELS; |
420 | | |
421 | | /* Fill assign_response[channels]. */ |
422 | 0 | eq_config->data[0] = 0; /* assign response-0 to ch-0 */ |
423 | 0 | eq_config->data[1] = 1; /* assign response-1 to ch-1 */ |
424 | | |
425 | | /* Fill config data per response. */ |
426 | 0 | struct sof_eq_iir_header* eq_hdr = |
427 | 0 | (struct sof_eq_iir_header*)(&eq_config->data[2]); |
428 | 0 | int ret; |
429 | 0 | for (int channel = 0; channel < EQ2_NUM_CHANNELS; channel++) { |
430 | | /* Fill the header information. */ |
431 | 0 | eq_hdr->num_sections = eq2->n[channel]; |
432 | 0 | eq_hdr->num_sections_in_series = eq2->n[channel]; |
433 | | |
434 | | /* Fill sof_eq_iir_biquad for biquads in one channel. */ |
435 | 0 | ret = eq2_convert_channel_response(eq2, eq_hdr->biquads, channel); |
436 | 0 | if (ret < 0) { |
437 | 0 | free(eq_config); |
438 | 0 | return ret; |
439 | 0 | } |
440 | | |
441 | | /* Move the address to the next sof_eq_iir_header element. */ |
442 | 0 | eq_hdr = (struct sof_eq_iir_header*)((uint8_t*)eq_hdr + |
443 | 0 | response_config_size[channel]); |
444 | 0 | } |
445 | | |
446 | 0 | *config = (uint32_t*)eq_config; |
447 | 0 | *config_size = size; |
448 | 0 | return 0; |
449 | 0 | } |