/src/adhd/cras/src/dsp/eq2.c

Source (jump to first uncovered line)
/* Copyright 2013 The ChromiumOS Authors
 * Use of this source code is governed by a BSD-style license that can be
 * found in the LICENSE file.
 */

#include "cras/src/dsp/eq2.h"

#include <errno.h>
#include <stdlib.h>

#include "cras/src/dsp/biquad.h"
#include "user/eq.h"

#define EQ2_NUM_CHANNELS 2

struct eq2 {
  int n[EQ2_NUM_CHANNELS];
  struct biquad biquad[MAX_BIQUADS_PER_EQ2][EQ2_NUM_CHANNELS];
};

struct eq2* eq2_new() {
  int i, j;
  struct eq2* eq2 = calloc(1, sizeof(*eq2));
  if (!eq2) {
    return NULL;
  }

  /* Initialize all biquads to identity filter, so if two channels have
   * different numbers of biquads, it still works. */
  for (i = 0; i < MAX_BIQUADS_PER_EQ2; i++) {
    for (j = 0; j < EQ2_NUM_CHANNELS; j++) {
      biquad_set(&eq2->biquad[i][j], BQ_NONE, 0, 0, 0);
    }
  }

  return eq2;
}

void eq2_free(struct eq2* eq2) {
  free(eq2);
}

int eq2_append_biquad(struct eq2* eq2,
                      int channel,
                      enum biquad_type type,
                      float freq,
                      float Q,
                      float gain) {
  if (eq2->n[channel] >= MAX_BIQUADS_PER_EQ2) {
    return -EINVAL;
  }
  biquad_set(&eq2->biquad[eq2->n[channel]++][channel], type, freq, Q, gain);
  return 0;
}

int eq2_append_biquad_direct(struct eq2* eq2,
                             int channel,
                             const struct biquad* biquad) {
  if (eq2->n[channel] >= MAX_BIQUADS_PER_EQ2) {
    return -EINVAL;
  }
  eq2->biquad[eq2->n[channel]++][channel] = *biquad;
  return 0;
}

static inline void eq2_process_one(struct biquad (*bq)[2],
                                   float* data0,
                                   float* data1,
                                   int count) {
  struct biquad* qL = &bq[0][0];
  struct biquad* qR = &bq[0][1];

  float x1L = qL->x1;
  float x2L = qL->x2;
  float y1L = qL->y1;
  float y2L = qL->y2;
  float b0L = qL->b0;
  float b1L = qL->b1;
  float b2L = qL->b2;
  float a1L = qL->a1;
  float a2L = qL->a2;

  float x1R = qR->x1;
  float x2R = qR->x2;
  float y1R = qR->y1;
  float y2R = qR->y2;
  float b0R = qR->b0;
  float b1R = qR->b1;
  float b2R = qR->b2;
  float a1R = qR->a1;
  float a2R = qR->a2;

  int j;
  for (j = 0; j < count; j++) {
    float xL = data0[j];
    float xR = data1[j];

    float yL = b0L * xL + b1L * x1L + b2L * x2L - a1L * y1L - a2L * y2L;
    x2L = x1L;
    x1L = xL;
    y2L = y1L;
    y1L = yL;

    float yR = b0R * xR + b1R * x1R + b2R * x2R - a1R * y1R - a2R * y2R;
    x2R = x1R;
    x1R = xR;
    y2R = y1R;
    y1R = yR;

    data0[j] = yL;
    data1[j] = yR;
  }

  qL->x1 = x1L;
  qL->x2 = x2L;
  qL->y1 = y1L;
  qL->y2 = y2L;
  qR->x1 = x1R;
  qR->x2 = x2R;
  qR->y1 = y1R;
  qR->y2 = y2R;
}

#ifdef __ARM_NEON__
#include <arm_neon.h>
static inline void eq2_process_two_neon(struct biquad (*bq)[2],
                                        float* data0,
                                        float* data1,
                                        int count) {
  struct biquad* qL = &bq[0][0];
  struct biquad* rL = &bq[1][0];
  struct biquad* qR = &bq[0][1];
  struct biquad* rR = &bq[1][1];

  float32x2_t x1 = {qL->x1, qR->x1};
  float32x2_t x2 = {qL->x2, qR->x2};
  float32x2_t y1 = {qL->y1, qR->y1};
  float32x2_t y2 = {qL->y2, qR->y2};
  float32x2_t qb0 = {qL->b0, qR->b0};
  float32x2_t qb1 = {qL->b1, qR->b1};
  float32x2_t qb2 = {qL->b2, qR->b2};
  float32x2_t qa1 = {qL->a1, qR->a1};
  float32x2_t qa2 = {qL->a2, qR->a2};

  float32x2_t z1 = {rL->y1, rR->y1};
  float32x2_t z2 = {rL->y2, rR->y2};
  float32x2_t rb0 = {rL->b0, rR->b0};
  float32x2_t rb1 = {rL->b1, rR->b1};
  float32x2_t rb2 = {rL->b2, rR->b2};
  float32x2_t ra1 = {rL->a1, rR->a1};
  float32x2_t ra2 = {rL->a2, rR->a2};

  // clang-format off
  __asm__ __volatile__(
    // d0 = x, d1 = y, d2 = z
    "1:                                     \n"
    "vmul.f32 d1, %P[qb1], %P[x1]           \n"
    "vld1.32 d0[0], [%[data0]]              \n"
    "vld1.32 d0[1], [%[data1]]              \n"
    "subs %[count], #1                      \n"
    "vmul.f32 d2, %P[rb1], %P[y1]           \n"
    "vmla.f32 d1, %P[qb0], d0               \n"
    "vmla.f32 d1, %P[qb2], %P[x2]           \n"
    "vmov.f32 %P[x2], %P[x1]                \n"
    "vmov.f32 %P[x1], d0                    \n"
    "vmls.f32 d1, %P[qa1], %P[y1]           \n"
    "vmls.f32 d1, %P[qa2], %P[y2]           \n"
    "vmla.f32 d2, %P[rb0], d1               \n"
    "vmla.f32 d2, %P[rb2], %P[y2]           \n"
    "vmov.f32 %P[y2], %P[y1]                \n"
    "vmov.f32 %P[y1], d1                    \n"
    "vmls.f32 d2, %P[ra1], %P[z1]           \n"
    "vmls.f32 d2, %P[ra2], %P[z2]           \n"
    "vmov.f32 %P[z2], %P[z1]                \n"
    "vmov.f32 %P[z1], d2                    \n"
    "vst1.f32 d2[0], [%[data0]]!            \n"
    "vst1.f32 d2[1], [%[data1]]!            \n"
    "bne 1b                                 \n"
    : // output
      [data0]"+r"(data0),
      [data1]"+r"(data1),
      [count]"+r"(count),
      [x1]"+w"(x1),
      [x2]"+w"(x2),
      [y1]"+w"(y1),
      [y2]"+w"(y2),
      [z1]"+w"(z1),
      [z2]"+w"(z2)
    : // input
      [qb0]"w"(qb0),
      [qb1]"w"(qb1),
      [qb2]"w"(qb2),
      [qa1]"w"(qa1),
      [qa2]"w"(qa2),
      [rb0]"w"(rb0),
      [rb1]"w"(rb1),
      [rb2]"w"(rb2),
      [ra1]"w"(ra1),
      [ra2]"w"(ra2)
    : // clobber
      "d0", "d1", "d2", "memory", "cc");
  // clang-format on

  qL->x1 = x1[0];
  qL->x2 = x2[0];
  qL->y1 = y1[0];
  qL->y2 = y2[0];
  rL->y1 = z1[0];
  rL->y2 = z2[0];
  qR->x1 = x1[1];
  qR->x2 = x2[1];
  qR->y1 = y1[1];
  qR->y2 = y2[1];
  rR->y1 = z1[1];
  rR->y2 = z2[1];
}
#endif

#if defined(__SSE3__) && defined(__x86_64__)
#include <emmintrin.h>
static inline void eq2_process_two_sse3(struct biquad (*bq)[2],
                                        float* data0,
                                        float* data1,
                                        int count) {
  struct biquad* qL = &bq[0][0];
  struct biquad* rL = &bq[1][0];
  struct biquad* qR = &bq[0][1];
  struct biquad* rR = &bq[1][1];

  __m128 x1 = {qL->x1, qR->x1};
  __m128 x2 = {qL->x2, qR->x2};
  __m128 y1 = {qL->y1, qR->y1};
  __m128 y2 = {qL->y2, qR->y2};
  __m128 qb0 = {qL->b0, qR->b0};
  __m128 qb1 = {qL->b1, qR->b1};
  __m128 qb2 = {qL->b2, qR->b2};
  __m128 qa1 = {qL->a1, qR->a1};
  __m128 qa2 = {qL->a2, qR->a2};

  __m128 z1 = {rL->y1, rR->y1};
  __m128 z2 = {rL->y2, rR->y2};
  __m128 rb0 = {rL->b0, rR->b0};
  __m128 rb1 = {rL->b1, rR->b1};
  __m128 rb2 = {rL->b2, rR->b2};
  __m128 ra1 = {rL->a1, rR->a1};
  __m128 ra2 = {rL->a2, rR->a2};

  // clang-format off
  __asm__ __volatile__(
    "1:                                     \n"
    "movss (%[data0]), %%xmm2               \n"
    "movss (%[data1]), %%xmm1               \n"
    "unpcklps %%xmm1, %%xmm2                \n"
    "mulps %[qb2],%[x2]                     \n"
    "lddqu %[qb0],%%xmm0                    \n"
    "mulps %[ra2],%[z2]                     \n"
    "lddqu %[qb1],%%xmm1                    \n"
    "mulps %%xmm2,%%xmm0                    \n"
    "mulps %[x1],%%xmm1                     \n"
    "addps %%xmm1,%%xmm0                    \n"
    "movaps %[qa1],%%xmm1                   \n"
    "mulps %[y1],%%xmm1                     \n"
    "addps %[x2],%%xmm0                     \n"
    "movaps %[rb1],%[x2]                    \n"
    "mulps %[y1],%[x2]                      \n"
    "subps %%xmm1,%%xmm0                    \n"
    "movaps %[qa2],%%xmm1                   \n"
    "mulps %[y2],%%xmm1                     \n"
    "mulps %[rb2],%[y2]                     \n"
    "subps %%xmm1,%%xmm0                    \n"
    "movaps %[rb0],%%xmm1                   \n"
    "mulps %%xmm0,%%xmm1                    \n"
    "addps %[x2],%%xmm1                     \n"
    "movaps %[x1],%[x2]                     \n"
    "movaps %%xmm2,%[x1]                    \n"
    "addps %[y2],%%xmm1                     \n"
    "movaps %[ra1],%[y2]                    \n"
    "mulps %[z1],%[y2]                      \n"
    "subps %[y2],%%xmm1                     \n"
    "movaps %[y1],%[y2]                     \n"
    "movaps %%xmm0,%[y1]                    \n"
    "subps %[z2],%%xmm1                     \n"
    "movaps %[z1],%[z2]                     \n"
    "movaps %%xmm1,%[z1]                    \n"
    "movss %%xmm1, (%[data0])               \n"
    "shufps $1, %%xmm1, %%xmm1              \n"
    "movss %%xmm1, (%[data1])               \n"
    "add $4, %[data0]                       \n"
    "add $4, %[data1]                       \n"
    "sub $1, %[count]                       \n"
    "jnz 1b                                 \n"
    : // output
      [data0]"+r"(data0),
      [data1]"+r"(data1),
      [count]"+r"(count),
      [x1]"+x"(x1),
      [x2]"+x"(x2),
      [y1]"+x"(y1),
      [y2]"+x"(y2),
      [z1]"+x"(z1),
      [z2]"+x"(z2)
    : // input
      [qb0]"m"(qb0),
      [qb1]"m"(qb1),
      [qb2]"m"(qb2),
      [qa1]"x"(qa1),
      [qa2]"x"(qa2),
      [rb0]"x"(rb0),
      [rb1]"x"(rb1),
      [rb2]"x"(rb2),
      [ra1]"x"(ra1),
      [ra2]"x"(ra2)
    : // clobber
      "xmm0", "xmm1", "xmm2", "memory", "cc");
  // clang-format on

  qL->x1 = x1[0];
  qL->x2 = x2[0];
  qL->y1 = y1[0];
  qL->y2 = y2[0];
  rL->y1 = z1[0];
  rL->y2 = z2[0];
  qR->x1 = x1[1];
  qR->x2 = x2[1];
  qR->y1 = y1[1];
  qR->y2 = y2[1];
  rR->y1 = z1[1];
  rR->y2 = z2[1];
}
#endif

void eq2_process(struct eq2* eq2, float* data0, float* data1, int count) {
  int i;
  int n;
  if (!count) {
    return;
  }
  n = eq2->n[0];
  if (eq2->n[1] > n) {
    n = eq2->n[1];
  }
  for (i = 0; i < n; i += EQ2_NUM_CHANNELS) {
    if (i + 1 == n) {
      eq2_process_one(&eq2->biquad[i], data0, data1, count);
    } else {
#if defined(__ARM_NEON__)
      eq2_process_two_neon(&eq2->biquad[i], data0, data1, count);
#elif defined(__SSE3__) && defined(__x86_64__)
      eq2_process_two_sse3(&eq2->biquad[i], data0, data1, count);
#else
      eq2_process_one(&eq2->biquad[i], data0, data1, count);
      eq2_process_one(&eq2->biquad[i + 1], data0, data1, count);
#endif
    }
  }
}

int eq2_convert_channel_response(struct eq2* eq2,
                                 int32_t* bq_cfg,
                                 int channel) {
  float accumulated_gain = 1.0;
  int ret;

  for (int i = 0; i < eq2->n[channel]; i++) {
    struct biquad* bq = &eq2->biquad[i][channel];

    /* For i = 0..(n-2), accumulated_gain is kept accumulating in loop.
     * For i = n-1, the last biquad element, accumulated_gain is dumped to the
     * converted blob by calling biquad_convert_blob() with dump_gain = 1.
     * To prevent the sample saturation on each node across the series of biquad
     * as the channel response intermediate nodes, considering that DSP EQ is
     * the fixed-point design.
     */
    ret = biquad_convert_blob(bq, bq_cfg, &accumulated_gain,
                              (i == eq2->n[channel] - 1) /* dump_gain */);
    if (ret < 0) {
      return ret;
    }
    bq_cfg += SOF_EQ_IIR_NBIQUAD;
  }
  return 0;
}

int eq2_convert_params_to_blob(struct eq2* eq2,
                               uint32_t** config,
                               size_t* config_size) {
  const size_t biquad_size = sizeof(struct sof_eq_iir_biquad);
  const size_t eq_iir_hdr_size = sizeof(struct sof_eq_iir_header);
  const size_t eq_cfg_hdr_size = sizeof(struct sof_eq_iir_config);

  if (!eq2) {
    return -ENOENT;
  }

  if (eq2->n[0] <= 0 || eq2->n[1] <= 0) {
    return -ENODATA;
  }

  size_t response_config_size[EQ2_NUM_CHANNELS] = {
      eq_iir_hdr_size + eq2->n[0] * biquad_size, /* response of ch-0 */
      eq_iir_hdr_size + eq2->n[1] * biquad_size  /* response of ch-1 */
  };

  size_t size =
      eq_cfg_hdr_size +                     /* sof_eq_iir_config header */
      EQ2_NUM_CHANNELS * sizeof(uint32_t) + /* assign_response[channels] */
      response_config_size[0] +             /* 1st response config data */
      response_config_size[1];              /* 2nd response config data */

  struct sof_eq_iir_config* eq_config =
      (struct sof_eq_iir_config*)calloc(1, size);
  if (!eq_config) {
    return -ENOMEM;
  }

  /* Fill sof_eq_iir_config header. */
  eq_config->size = size;
  eq_config->channels_in_config = EQ2_NUM_CHANNELS;
  eq_config->number_of_responses = EQ2_NUM_CHANNELS;

  /* Fill assign_response[channels]. */
  eq_config->data[0] = 0; /* assign response-0 to ch-0 */
  eq_config->data[1] = 1; /* assign response-1 to ch-1 */

  /* Fill config data per response. */
  struct sof_eq_iir_header* eq_hdr =
      (struct sof_eq_iir_header*)(&eq_config->data[2]);
  int ret;
  for (int channel = 0; channel < EQ2_NUM_CHANNELS; channel++) {
    /* Fill the header information. */
    eq_hdr->num_sections = eq2->n[channel];
    eq_hdr->num_sections_in_series = eq2->n[channel];

    /* Fill sof_eq_iir_biquad for biquads in one channel. */
    ret = eq2_convert_channel_response(eq2, eq_hdr->biquads, channel);
    if (ret < 0) {
      free(eq_config);
      return ret;
    }

    /* Move the address to the next sof_eq_iir_header element. */
    eq_hdr = (struct sof_eq_iir_header*)((uint8_t*)eq_hdr +
                                         response_config_size[channel]);
  }

  *config = (uint32_t*)eq_config;
  *config_size = size;
  return 0;
}

Coverage Report

Created: 2024-05-20 06:38

Line	Count	Source (jump to first uncovered line)
1		/* Copyright 2013 The ChromiumOS Authors
2		* Use of this source code is governed by a BSD-style license that can be
3		* found in the LICENSE file.
4		*/
5
6		#include "cras/src/dsp/eq2.h"
7
8		#include <errno.h>
9		#include <stdlib.h>
10
11		#include "cras/src/dsp/biquad.h"
12		#include "user/eq.h"
13
14	0	#define EQ2_NUM_CHANNELS 2
15
16		struct eq2 {
17		int n[EQ2_NUM_CHANNELS];
18		struct biquad biquad[MAX_BIQUADS_PER_EQ2][EQ2_NUM_CHANNELS];
19		};
20
21	0	struct eq2* eq2_new() {
22	0	int i, j;
23	0	struct eq2* eq2 = calloc(1, sizeof(*eq2));
24	0	if (!eq2) {
25	0	return NULL;
26	0	}
27
28		/* Initialize all biquads to identity filter, so if two channels have
29		* different numbers of biquads, it still works. */
30	0	for (i = 0; i < MAX_BIQUADS_PER_EQ2; i++) {
31	0	for (j = 0; j < EQ2_NUM_CHANNELS; j++) {
32	0	biquad_set(&eq2->biquad[i][j], BQ_NONE, 0, 0, 0);
33	0	}
34	0	}
35
36	0	return eq2;
37	0	}
38
39	0	void eq2_free(struct eq2* eq2) {
40	0	free(eq2);
41	0	}
42
43		int eq2_append_biquad(struct eq2* eq2,
44		int channel,
45		enum biquad_type type,
46		float freq,
47		float Q,
48	0	float gain) {
49	0	if (eq2->n[channel] >= MAX_BIQUADS_PER_EQ2) {
50	0	return -EINVAL;
51	0	}
52	0	biquad_set(&eq2->biquad[eq2->n[channel]++][channel], type, freq, Q, gain);
53	0	return 0;
54	0	}
55
56		int eq2_append_biquad_direct(struct eq2* eq2,
57		int channel,
58	0	const struct biquad* biquad) {
59	0	if (eq2->n[channel] >= MAX_BIQUADS_PER_EQ2) {
60	0	return -EINVAL;
61	0	}
62	0	eq2->biquad[eq2->n[channel]++][channel] = *biquad;
63	0	return 0;
64	0	}
65
66		static inline void eq2_process_one(struct biquad (*bq)[2],
67		float* data0,
68		float* data1,
69	0	int count) {
70	0	struct biquad* qL = &bq[0][0];
71	0	struct biquad* qR = &bq[0][1];
72
73	0	float x1L = qL->x1;
74	0	float x2L = qL->x2;
75	0	float y1L = qL->y1;
76	0	float y2L = qL->y2;
77	0	float b0L = qL->b0;
78	0	float b1L = qL->b1;
79	0	float b2L = qL->b2;
80	0	float a1L = qL->a1;
81	0	float a2L = qL->a2;
82
83	0	float x1R = qR->x1;
84	0	float x2R = qR->x2;
85	0	float y1R = qR->y1;
86	0	float y2R = qR->y2;
87	0	float b0R = qR->b0;
88	0	float b1R = qR->b1;
89	0	float b2R = qR->b2;
90	0	float a1R = qR->a1;
91	0	float a2R = qR->a2;
92
93	0	int j;
94	0	for (j = 0; j < count; j++) {
95	0	float xL = data0[j];
96	0	float xR = data1[j];
97
98	0	float yL = b0L * xL + b1L * x1L + b2L * x2L - a1L * y1L - a2L * y2L;
99	0	x2L = x1L;
100	0	x1L = xL;
101	0	y2L = y1L;
102	0	y1L = yL;
103
104	0	float yR = b0R * xR + b1R * x1R + b2R * x2R - a1R * y1R - a2R * y2R;
105	0	x2R = x1R;
106	0	x1R = xR;
107	0	y2R = y1R;
108	0	y1R = yR;
109
110	0	data0[j] = yL;
111	0	data1[j] = yR;
112	0	}
113
114	0	qL->x1 = x1L;
115	0	qL->x2 = x2L;
116	0	qL->y1 = y1L;
117	0	qL->y2 = y2L;
118	0	qR->x1 = x1R;
119	0	qR->x2 = x2R;
120	0	qR->y1 = y1R;
121	0	qR->y2 = y2R;
122	0	}
123
124		#ifdef __ARM_NEON__
125		#include <arm_neon.h>
126		static inline void eq2_process_two_neon(struct biquad (*bq)[2],
127		float* data0,
128		float* data1,
129		int count) {
130		struct biquad* qL = &bq[0][0];
131		struct biquad* rL = &bq[1][0];
132		struct biquad* qR = &bq[0][1];
133		struct biquad* rR = &bq[1][1];
134
135		float32x2_t x1 = {qL->x1, qR->x1};
136		float32x2_t x2 = {qL->x2, qR->x2};
137		float32x2_t y1 = {qL->y1, qR->y1};
138		float32x2_t y2 = {qL->y2, qR->y2};
139		float32x2_t qb0 = {qL->b0, qR->b0};
140		float32x2_t qb1 = {qL->b1, qR->b1};
141		float32x2_t qb2 = {qL->b2, qR->b2};
142		float32x2_t qa1 = {qL->a1, qR->a1};
143		float32x2_t qa2 = {qL->a2, qR->a2};
144
145		float32x2_t z1 = {rL->y1, rR->y1};
146		float32x2_t z2 = {rL->y2, rR->y2};
147		float32x2_t rb0 = {rL->b0, rR->b0};
148		float32x2_t rb1 = {rL->b1, rR->b1};
149		float32x2_t rb2 = {rL->b2, rR->b2};
150		float32x2_t ra1 = {rL->a1, rR->a1};
151		float32x2_t ra2 = {rL->a2, rR->a2};
152
153		// clang-format off
154		__asm__ __volatile__(
155		// d0 = x, d1 = y, d2 = z
156		"1: \n"
157		"vmul.f32 d1, %P[qb1], %P[x1] \n"
158		"vld1.32 d0[0], [%[data0]] \n"
159		"vld1.32 d0[1], [%[data1]] \n"
160		"subs %[count], #1 \n"
161		"vmul.f32 d2, %P[rb1], %P[y1] \n"
162		"vmla.f32 d1, %P[qb0], d0 \n"
163		"vmla.f32 d1, %P[qb2], %P[x2] \n"
164		"vmov.f32 %P[x2], %P[x1] \n"
165		"vmov.f32 %P[x1], d0 \n"
166		"vmls.f32 d1, %P[qa1], %P[y1] \n"
167		"vmls.f32 d1, %P[qa2], %P[y2] \n"
168		"vmla.f32 d2, %P[rb0], d1 \n"
169		"vmla.f32 d2, %P[rb2], %P[y2] \n"
170		"vmov.f32 %P[y2], %P[y1] \n"
171		"vmov.f32 %P[y1], d1 \n"
172		"vmls.f32 d2, %P[ra1], %P[z1] \n"
173		"vmls.f32 d2, %P[ra2], %P[z2] \n"
174		"vmov.f32 %P[z2], %P[z1] \n"
175		"vmov.f32 %P[z1], d2 \n"
176		"vst1.f32 d2[0], [%[data0]]! \n"
177		"vst1.f32 d2[1], [%[data1]]! \n"
178		"bne 1b \n"
179		: // output
180		[data0]"+r"(data0),
181		[data1]"+r"(data1),
182		[count]"+r"(count),
183		[x1]"+w"(x1),
184		[x2]"+w"(x2),
185		[y1]"+w"(y1),
186		[y2]"+w"(y2),
187		[z1]"+w"(z1),
188		[z2]"+w"(z2)
189		: // input
190		[qb0]"w"(qb0),
191		[qb1]"w"(qb1),
192		[qb2]"w"(qb2),
193		[qa1]"w"(qa1),
194		[qa2]"w"(qa2),
195		[rb0]"w"(rb0),
196		[rb1]"w"(rb1),
197		[rb2]"w"(rb2),
198		[ra1]"w"(ra1),
199		[ra2]"w"(ra2)
200		: // clobber
201		"d0", "d1", "d2", "memory", "cc");
202		// clang-format on
203
204		qL->x1 = x1[0];
205		qL->x2 = x2[0];
206		qL->y1 = y1[0];
207		qL->y2 = y2[0];
208		rL->y1 = z1[0];
209		rL->y2 = z2[0];
210		qR->x1 = x1[1];
211		qR->x2 = x2[1];
212		qR->y1 = y1[1];
213		qR->y2 = y2[1];
214		rR->y1 = z1[1];
215		rR->y2 = z2[1];
216		}
217		#endif
218
219		#if defined(__SSE3__) && defined(__x86_64__)
220		#include <emmintrin.h>
221		static inline void eq2_process_two_sse3(struct biquad (*bq)[2],
222		float* data0,
223		float* data1,
224		int count) {
225		struct biquad* qL = &bq[0][0];
226		struct biquad* rL = &bq[1][0];
227		struct biquad* qR = &bq[0][1];
228		struct biquad* rR = &bq[1][1];
229
230		__m128 x1 = {qL->x1, qR->x1};
231		__m128 x2 = {qL->x2, qR->x2};
232		__m128 y1 = {qL->y1, qR->y1};
233		__m128 y2 = {qL->y2, qR->y2};
234		__m128 qb0 = {qL->b0, qR->b0};
235		__m128 qb1 = {qL->b1, qR->b1};
236		__m128 qb2 = {qL->b2, qR->b2};
237		__m128 qa1 = {qL->a1, qR->a1};
238		__m128 qa2 = {qL->a2, qR->a2};
239
240		__m128 z1 = {rL->y1, rR->y1};
241		__m128 z2 = {rL->y2, rR->y2};
242		__m128 rb0 = {rL->b0, rR->b0};
243		__m128 rb1 = {rL->b1, rR->b1};
244		__m128 rb2 = {rL->b2, rR->b2};
245		__m128 ra1 = {rL->a1, rR->a1};
246		__m128 ra2 = {rL->a2, rR->a2};
247
248		// clang-format off
249		__asm__ __volatile__(
250		"1: \n"
251		"movss (%[data0]), %%xmm2 \n"
252		"movss (%[data1]), %%xmm1 \n"
253		"unpcklps %%xmm1, %%xmm2 \n"
254		"mulps %[qb2],%[x2] \n"
255		"lddqu %[qb0],%%xmm0 \n"
256		"mulps %[ra2],%[z2] \n"
257		"lddqu %[qb1],%%xmm1 \n"
258		"mulps %%xmm2,%%xmm0 \n"
259		"mulps %[x1],%%xmm1 \n"
260		"addps %%xmm1,%%xmm0 \n"
261		"movaps %[qa1],%%xmm1 \n"
262		"mulps %[y1],%%xmm1 \n"
263		"addps %[x2],%%xmm0 \n"
264		"movaps %[rb1],%[x2] \n"
265		"mulps %[y1],%[x2] \n"
266		"subps %%xmm1,%%xmm0 \n"
267		"movaps %[qa2],%%xmm1 \n"
268		"mulps %[y2],%%xmm1 \n"
269		"mulps %[rb2],%[y2] \n"
270		"subps %%xmm1,%%xmm0 \n"
271		"movaps %[rb0],%%xmm1 \n"
272		"mulps %%xmm0,%%xmm1 \n"
273		"addps %[x2],%%xmm1 \n"
274		"movaps %[x1],%[x2] \n"
275		"movaps %%xmm2,%[x1] \n"
276		"addps %[y2],%%xmm1 \n"
277		"movaps %[ra1],%[y2] \n"
278		"mulps %[z1],%[y2] \n"
279		"subps %[y2],%%xmm1 \n"
280		"movaps %[y1],%[y2] \n"
281		"movaps %%xmm0,%[y1] \n"
282		"subps %[z2],%%xmm1 \n"
283		"movaps %[z1],%[z2] \n"
284		"movaps %%xmm1,%[z1] \n"
285		"movss %%xmm1, (%[data0]) \n"
286		"shufps $1, %%xmm1, %%xmm1 \n"
287		"movss %%xmm1, (%[data1]) \n"
288		"add $4, %[data0] \n"
289		"add $4, %[data1] \n"
290		"sub $1, %[count] \n"
291		"jnz 1b \n"
292		: // output
293		[data0]"+r"(data0),
294		[data1]"+r"(data1),
295		[count]"+r"(count),
296		[x1]"+x"(x1),
297		[x2]"+x"(x2),
298		[y1]"+x"(y1),
299		[y2]"+x"(y2),
300		[z1]"+x"(z1),
301		[z2]"+x"(z2)
302		: // input
303		[qb0]"m"(qb0),
304		[qb1]"m"(qb1),
305		[qb2]"m"(qb2),
306		[qa1]"x"(qa1),
307		[qa2]"x"(qa2),
308		[rb0]"x"(rb0),
309		[rb1]"x"(rb1),
310		[rb2]"x"(rb2),
311		[ra1]"x"(ra1),
312		[ra2]"x"(ra2)
313		: // clobber
314		"xmm0", "xmm1", "xmm2", "memory", "cc");
315		// clang-format on
316
317		qL->x1 = x1[0];
318		qL->x2 = x2[0];
319		qL->y1 = y1[0];
320		qL->y2 = y2[0];
321		rL->y1 = z1[0];
322		rL->y2 = z2[0];
323		qR->x1 = x1[1];
324		qR->x2 = x2[1];
325		qR->y1 = y1[1];
326		qR->y2 = y2[1];
327		rR->y1 = z1[1];
328		rR->y2 = z2[1];
329		}
330		#endif
331
332	0	void eq2_process(struct eq2* eq2, float* data0, float* data1, int count) {
333	0	int i;
334	0	int n;
335	0	if (!count) {
336	0	return;
337	0	}
338	0	n = eq2->n[0];
339	0	if (eq2->n[1] > n) {
340	0	n = eq2->n[1];
341	0	}
342	0	for (i = 0; i < n; i += EQ2_NUM_CHANNELS) {
343	0	if (i + 1 == n) {
344	0	eq2_process_one(&eq2->biquad[i], data0, data1, count);
345	0	} else {
346		#if defined(__ARM_NEON__)
347		eq2_process_two_neon(&eq2->biquad[i], data0, data1, count);
348		#elif defined(__SSE3__) && defined(__x86_64__)
349		eq2_process_two_sse3(&eq2->biquad[i], data0, data1, count);
350		#else
351	0	eq2_process_one(&eq2->biquad[i], data0, data1, count);
352	0	eq2_process_one(&eq2->biquad[i + 1], data0, data1, count);
353	0	#endif
354	0	}
355	0	}
356	0	}
357
358		int eq2_convert_channel_response(struct eq2* eq2,
359		int32_t* bq_cfg,
360	0	int channel) {
361	0	float accumulated_gain = 1.0;
362	0	int ret;
363
364	0	for (int i = 0; i < eq2->n[channel]; i++) {
365	0	struct biquad* bq = &eq2->biquad[i][channel];
366
367		/* For i = 0..(n-2), accumulated_gain is kept accumulating in loop.
368		* For i = n-1, the last biquad element, accumulated_gain is dumped to the
369		* converted blob by calling biquad_convert_blob() with dump_gain = 1.
370		* To prevent the sample saturation on each node across the series of biquad
371		* as the channel response intermediate nodes, considering that DSP EQ is
372		* the fixed-point design.
373		*/
374	0	ret = biquad_convert_blob(bq, bq_cfg, &accumulated_gain,
375	0	(i == eq2->n[channel] - 1) /* dump_gain */);
376	0	if (ret < 0) {
377	0	return ret;
378	0	}
379	0	bq_cfg += SOF_EQ_IIR_NBIQUAD;
380	0	}
381	0	return 0;
382	0	}
383
384		int eq2_convert_params_to_blob(struct eq2* eq2,
385		uint32_t** config,
386	0	size_t* config_size) {
387	0	const size_t biquad_size = sizeof(struct sof_eq_iir_biquad);
388	0	const size_t eq_iir_hdr_size = sizeof(struct sof_eq_iir_header);
389	0	const size_t eq_cfg_hdr_size = sizeof(struct sof_eq_iir_config);
390
391	0	if (!eq2) {
392	0	return -ENOENT;
393	0	}
394
395	0	if (eq2->n[0] <= 0 \|\| eq2->n[1] <= 0) {
396	0	return -ENODATA;
397	0	}
398
399	0	size_t response_config_size[EQ2_NUM_CHANNELS] = {
400	0	eq_iir_hdr_size + eq2->n[0] * biquad_size, /* response of ch-0 */
401	0	eq_iir_hdr_size + eq2->n[1] * biquad_size /* response of ch-1 */
402	0	};
403
404	0	size_t size =
405	0	eq_cfg_hdr_size + /* sof_eq_iir_config header */
406	0	EQ2_NUM_CHANNELS * sizeof(uint32_t) + /* assign_response[channels] */
407	0	response_config_size[0] + /* 1st response config data */
408	0	response_config_size[1]; /* 2nd response config data */
409
410	0	struct sof_eq_iir_config* eq_config =
411	0	(struct sof_eq_iir_config*)calloc(1, size);
412	0	if (!eq_config) {
413	0	return -ENOMEM;
414	0	}
415
416		/* Fill sof_eq_iir_config header. */
417	0	eq_config->size = size;
418	0	eq_config->channels_in_config = EQ2_NUM_CHANNELS;
419	0	eq_config->number_of_responses = EQ2_NUM_CHANNELS;
420
421		/* Fill assign_response[channels]. */
422	0	eq_config->data[0] = 0; /* assign response-0 to ch-0 */
423	0	eq_config->data[1] = 1; /* assign response-1 to ch-1 */
424
425		/* Fill config data per response. */
426	0	struct sof_eq_iir_header* eq_hdr =
427	0	(struct sof_eq_iir_header*)(&eq_config->data[2]);
428	0	int ret;
429	0	for (int channel = 0; channel < EQ2_NUM_CHANNELS; channel++) {
430		/* Fill the header information. */
431	0	eq_hdr->num_sections = eq2->n[channel];
432	0	eq_hdr->num_sections_in_series = eq2->n[channel];
433
434		/* Fill sof_eq_iir_biquad for biquads in one channel. */
435	0	ret = eq2_convert_channel_response(eq2, eq_hdr->biquads, channel);
436	0	if (ret < 0) {
437	0	free(eq_config);
438	0	return ret;
439	0	}
440
441		/* Move the address to the next sof_eq_iir_header element. */
442	0	eq_hdr = (struct sof_eq_iir_header)((uint8_t)eq_hdr +
443	0	response_config_size[channel]);
444	0	}
445
446	0	config = (uint32_t)eq_config;
447	0	*config_size = size;
448	0	return 0;
449	0	}