Coverage Report

Created: 2018-09-25 14:53

/src/mozilla-central/dom/media/webaudio/AudioNodeEngineSSE2.cpp
Line
Count
Source (jump to first uncovered line)
1
/* -*- mode: c++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2
/* this source code form is subject to the terms of the mozilla public
3
 * license, v. 2.0. if a copy of the mpl was not distributed with this file,
4
 * You can obtain one at http://mozilla.org/MPL/2.0/. */
5
6
#include "AudioNodeEngineSSE2.h"
7
#include "AlignmentUtils.h"
8
#include <emmintrin.h>
9
10
11
namespace mozilla {
12
void
13
AudioBufferAddWithScale_SSE(const float* aInput,
14
                            float aScale,
15
                            float* aOutput,
16
                            uint32_t aSize)
17
0
{
18
0
  __m128 vin0, vin1, vin2, vin3,
19
0
         vscaled0, vscaled1, vscaled2, vscaled3,
20
0
         vout0, vout1, vout2, vout3,
21
0
         vgain;
22
0
23
0
  ASSERT_ALIGNED16(aInput);
24
0
  ASSERT_ALIGNED16(aOutput);
25
0
  ASSERT_MULTIPLE16(aSize);
26
0
27
0
  vgain = _mm_load1_ps(&aScale);
28
0
29
0
  for (unsigned i = 0; i < aSize; i+=16) {
30
0
    vin0 = _mm_load_ps(&aInput[i]);
31
0
    vin1 = _mm_load_ps(&aInput[i + 4]);
32
0
    vin2 = _mm_load_ps(&aInput[i + 8]);
33
0
    vin3 = _mm_load_ps(&aInput[i + 12]);
34
0
35
0
    vscaled0 = _mm_mul_ps(vin0, vgain);
36
0
    vscaled1 = _mm_mul_ps(vin1, vgain);
37
0
    vscaled2 = _mm_mul_ps(vin2, vgain);
38
0
    vscaled3 = _mm_mul_ps(vin3, vgain);
39
0
40
0
    vin0 = _mm_load_ps(&aOutput[i]);
41
0
    vin1 = _mm_load_ps(&aOutput[i + 4]);
42
0
    vin2 = _mm_load_ps(&aOutput[i + 8]);
43
0
    vin3 = _mm_load_ps(&aOutput[i + 12]);
44
0
45
0
    vout0 = _mm_add_ps(vin0, vscaled0);
46
0
    vout1 = _mm_add_ps(vin1, vscaled1);
47
0
    vout2 = _mm_add_ps(vin2, vscaled2);
48
0
    vout3 = _mm_add_ps(vin3, vscaled3);
49
0
50
0
    _mm_store_ps(&aOutput[i], vout0);
51
0
    _mm_store_ps(&aOutput[i + 4], vout1);
52
0
    _mm_store_ps(&aOutput[i + 8], vout2);
53
0
    _mm_store_ps(&aOutput[i + 12], vout3);
54
0
  }
55
0
}
56
57
void
58
AudioBlockCopyChannelWithScale_SSE(const float* aInput,
59
                                   float aScale,
60
                                   float* aOutput)
61
0
{
62
0
  __m128 vin0, vin1, vin2, vin3,
63
0
         vout0, vout1, vout2, vout3;
64
0
65
0
  ASSERT_ALIGNED16(aInput);
66
0
  ASSERT_ALIGNED16(aOutput);
67
0
68
0
  __m128 vgain = _mm_load1_ps(&aScale);
69
0
70
0
  for (unsigned i = 0 ; i < WEBAUDIO_BLOCK_SIZE; i+=16) {
71
0
    vin0 = _mm_load_ps(&aInput[i]);
72
0
    vin1 = _mm_load_ps(&aInput[i + 4]);
73
0
    vin2 = _mm_load_ps(&aInput[i + 8]);
74
0
    vin3 = _mm_load_ps(&aInput[i + 12]);
75
0
    vout0 = _mm_mul_ps(vin0, vgain);
76
0
    vout1 = _mm_mul_ps(vin1, vgain);
77
0
    vout2 = _mm_mul_ps(vin2, vgain);
78
0
    vout3 = _mm_mul_ps(vin3, vgain);
79
0
    _mm_store_ps(&aOutput[i], vout0);
80
0
    _mm_store_ps(&aOutput[i + 4], vout1);
81
0
    _mm_store_ps(&aOutput[i + 8], vout2);
82
0
    _mm_store_ps(&aOutput[i + 12], vout3);
83
0
  }
84
0
}
85
86
void
87
AudioBlockCopyChannelWithScale_SSE(const float aInput[WEBAUDIO_BLOCK_SIZE],
88
                                   const float aScale[WEBAUDIO_BLOCK_SIZE],
89
                                   float aOutput[WEBAUDIO_BLOCK_SIZE])
90
0
{
91
0
  __m128 vin0, vin1, vin2, vin3,
92
0
         vscaled0, vscaled1, vscaled2, vscaled3,
93
0
         vout0, vout1, vout2, vout3;
94
0
95
0
  ASSERT_ALIGNED16(aInput);
96
0
  ASSERT_ALIGNED16(aScale);
97
0
  ASSERT_ALIGNED16(aOutput);
98
0
99
0
  for (unsigned i = 0 ; i < WEBAUDIO_BLOCK_SIZE; i+=16) {
100
0
    vscaled0 = _mm_load_ps(&aScale[i]);
101
0
    vscaled1 = _mm_load_ps(&aScale[i+4]);
102
0
    vscaled2 = _mm_load_ps(&aScale[i+8]);
103
0
    vscaled3 = _mm_load_ps(&aScale[i+12]);
104
0
105
0
    vin0 = _mm_load_ps(&aInput[i]);
106
0
    vin1 = _mm_load_ps(&aInput[i + 4]);
107
0
    vin2 = _mm_load_ps(&aInput[i + 8]);
108
0
    vin3 = _mm_load_ps(&aInput[i + 12]);
109
0
110
0
    vout0 = _mm_mul_ps(vin0, vscaled0);
111
0
    vout1 = _mm_mul_ps(vin1, vscaled1);
112
0
    vout2 = _mm_mul_ps(vin2, vscaled2);
113
0
    vout3 = _mm_mul_ps(vin3, vscaled3);
114
0
115
0
    _mm_store_ps(&aOutput[i], vout0);
116
0
    _mm_store_ps(&aOutput[i + 4], vout1);
117
0
    _mm_store_ps(&aOutput[i + 8], vout2);
118
0
    _mm_store_ps(&aOutput[i + 12], vout3);
119
0
  }
120
0
}
121
122
void
123
AudioBufferInPlaceScale_SSE(float* aBlock,
124
                            float aScale,
125
                            uint32_t aSize)
126
0
{
127
0
  __m128 vout0, vout1, vout2, vout3,
128
0
         vin0, vin1, vin2, vin3;
129
0
130
0
  ASSERT_ALIGNED16(aBlock);
131
0
  ASSERT_MULTIPLE16(aSize);
132
0
133
0
  __m128 vgain = _mm_load1_ps(&aScale);
134
0
135
0
  for (unsigned i = 0; i < aSize; i+=16) {
136
0
    vin0 = _mm_load_ps(&aBlock[i]);
137
0
    vin1 = _mm_load_ps(&aBlock[i + 4]);
138
0
    vin2 = _mm_load_ps(&aBlock[i + 8]);
139
0
    vin3 = _mm_load_ps(&aBlock[i + 12]);
140
0
    vout0 = _mm_mul_ps(vin0, vgain);
141
0
    vout1 = _mm_mul_ps(vin1, vgain);
142
0
    vout2 = _mm_mul_ps(vin2, vgain);
143
0
    vout3 = _mm_mul_ps(vin3, vgain);
144
0
    _mm_store_ps(&aBlock[i], vout0);
145
0
    _mm_store_ps(&aBlock[i + 4], vout1);
146
0
    _mm_store_ps(&aBlock[i + 8], vout2);
147
0
    _mm_store_ps(&aBlock[i + 12], vout3);
148
0
  }
149
0
}
150
151
void
152
AudioBufferInPlaceScale_SSE(float* aBlock,
153
                            float* aScale,
154
                            uint32_t aSize)
155
0
{
156
0
  __m128 vout0, vout1, vout2, vout3,
157
0
         vgain0, vgain1, vgain2, vgain3,
158
0
         vin0, vin1, vin2, vin3;
159
0
160
0
  ASSERT_ALIGNED16(aBlock);
161
0
  ASSERT_MULTIPLE16(aSize);
162
0
163
0
  for (unsigned i = 0; i < aSize; i+=16) {
164
0
    vin0 = _mm_load_ps(&aBlock[i]);
165
0
    vin1 = _mm_load_ps(&aBlock[i + 4]);
166
0
    vin2 = _mm_load_ps(&aBlock[i + 8]);
167
0
    vin3 = _mm_load_ps(&aBlock[i + 12]);
168
0
    vgain0 = _mm_load_ps(&aScale[i]);
169
0
    vgain1 = _mm_load_ps(&aScale[i + 4]);
170
0
    vgain2 = _mm_load_ps(&aScale[i + 8]);
171
0
    vgain3 = _mm_load_ps(&aScale[i + 12]);
172
0
    vout0 = _mm_mul_ps(vin0, vgain0);
173
0
    vout1 = _mm_mul_ps(vin1, vgain1);
174
0
    vout2 = _mm_mul_ps(vin2, vgain2);
175
0
    vout3 = _mm_mul_ps(vin3, vgain3);
176
0
    _mm_store_ps(&aBlock[i], vout0);
177
0
    _mm_store_ps(&aBlock[i + 4], vout1);
178
0
    _mm_store_ps(&aBlock[i + 8], vout2);
179
0
    _mm_store_ps(&aBlock[i + 12], vout3);
180
0
  }
181
0
}
182
183
void
184
AudioBlockPanStereoToStereo_SSE(const float aInputL[WEBAUDIO_BLOCK_SIZE],
185
                                const float aInputR[WEBAUDIO_BLOCK_SIZE],
186
                                float aGainL, float aGainR, bool aIsOnTheLeft,
187
                                float aOutputL[WEBAUDIO_BLOCK_SIZE],
188
                                float aOutputR[WEBAUDIO_BLOCK_SIZE])
189
0
{
190
0
  __m128 vinl0, vinr0, vinl1, vinr1,
191
0
         vout0, vout1,
192
0
         vscaled0, vscaled1,
193
0
         vgainl, vgainr;
194
0
195
0
  ASSERT_ALIGNED16(aInputL);
196
0
  ASSERT_ALIGNED16(aInputR);
197
0
  ASSERT_ALIGNED16(aOutputL);
198
0
  ASSERT_ALIGNED16(aOutputR);
199
0
200
0
  vgainl = _mm_load1_ps(&aGainL);
201
0
  vgainr = _mm_load1_ps(&aGainR);
202
0
203
0
  if (aIsOnTheLeft) {
204
0
    for (unsigned i = 0; i < WEBAUDIO_BLOCK_SIZE; i+=8) {
205
0
      vinl0 = _mm_load_ps(&aInputL[i]);
206
0
      vinr0 = _mm_load_ps(&aInputR[i]);
207
0
      vinl1 = _mm_load_ps(&aInputL[i+4]);
208
0
      vinr1 = _mm_load_ps(&aInputR[i+4]);
209
0
210
0
      /* left channel : aOutputL  = aInputL + aInputR * gainL */
211
0
      vscaled0 = _mm_mul_ps(vinr0, vgainl);
212
0
      vscaled1 = _mm_mul_ps(vinr1, vgainl);
213
0
      vout0 = _mm_add_ps(vscaled0, vinl0);
214
0
      vout1 = _mm_add_ps(vscaled1, vinl1);
215
0
      _mm_store_ps(&aOutputL[i], vout0);
216
0
      _mm_store_ps(&aOutputL[i+4], vout1);
217
0
218
0
      /* right channel : aOutputR = aInputR * gainR */
219
0
      vscaled0 = _mm_mul_ps(vinr0, vgainr);
220
0
      vscaled1 = _mm_mul_ps(vinr1, vgainr);
221
0
      _mm_store_ps(&aOutputR[i], vscaled0);
222
0
      _mm_store_ps(&aOutputR[i+4], vscaled1);
223
0
    }
224
0
  } else {
225
0
    for (unsigned i = 0; i < WEBAUDIO_BLOCK_SIZE; i+=8) {
226
0
      vinl0 = _mm_load_ps(&aInputL[i]);
227
0
      vinr0 = _mm_load_ps(&aInputR[i]);
228
0
      vinl1 = _mm_load_ps(&aInputL[i+4]);
229
0
      vinr1 = _mm_load_ps(&aInputR[i+4]);
230
0
231
0
      /* left channel : aInputL * gainL */
232
0
      vscaled0 = _mm_mul_ps(vinl0, vgainl);
233
0
      vscaled1 = _mm_mul_ps(vinl1, vgainl);
234
0
      _mm_store_ps(&aOutputL[i], vscaled0);
235
0
      _mm_store_ps(&aOutputL[i+4], vscaled1);
236
0
237
0
      /* right channel: aOutputR = aInputR + aInputL * gainR */
238
0
      vscaled0 = _mm_mul_ps(vinl0, vgainr);
239
0
      vscaled1 = _mm_mul_ps(vinl1, vgainr);
240
0
      vout0 = _mm_add_ps(vscaled0, vinr0);
241
0
      vout1 = _mm_add_ps(vscaled1, vinr1);
242
0
      _mm_store_ps(&aOutputR[i], vout0);
243
0
      _mm_store_ps(&aOutputR[i+4], vout1);
244
0
    }
245
0
  }
246
0
}
247
248
void BufferComplexMultiply_SSE(const float* aInput,
249
                               const float* aScale,
250
                               float* aOutput,
251
                               uint32_t aSize)
252
0
{
253
0
  unsigned i;
254
0
  __m128 in0, in1, in2, in3,
255
0
         outreal0, outreal1, outreal2, outreal3,
256
0
         outimag0, outimag1, outimag2, outimag3;
257
0
258
0
  ASSERT_ALIGNED16(aInput);
259
0
  ASSERT_ALIGNED16(aScale);
260
0
  ASSERT_ALIGNED16(aOutput);
261
0
  ASSERT_MULTIPLE16(aSize);
262
0
263
0
  for (i = 0; i < aSize * 2; i += 16) {
264
0
    in0 = _mm_load_ps(&aInput[i]);
265
0
    in1 = _mm_load_ps(&aInput[i + 4]);
266
0
    in2 = _mm_load_ps(&aInput[i + 8]);
267
0
    in3 = _mm_load_ps(&aInput[i + 12]);
268
0
269
0
    outreal0 = _mm_shuffle_ps(in0, in1, _MM_SHUFFLE(2, 0, 2, 0));
270
0
    outimag0 = _mm_shuffle_ps(in0, in1, _MM_SHUFFLE(3, 1, 3, 1));
271
0
    outreal2 = _mm_shuffle_ps(in2, in3, _MM_SHUFFLE(2, 0, 2, 0));
272
0
    outimag2 = _mm_shuffle_ps(in2, in3, _MM_SHUFFLE(3, 1, 3, 1));
273
0
274
0
    in0 = _mm_load_ps(&aScale[i]);
275
0
    in1 = _mm_load_ps(&aScale[i + 4]);
276
0
    in2 = _mm_load_ps(&aScale[i + 8]);
277
0
    in3 = _mm_load_ps(&aScale[i + 12]);
278
0
279
0
    outreal1 = _mm_shuffle_ps(in0, in1, _MM_SHUFFLE(2, 0, 2, 0));
280
0
    outimag1 = _mm_shuffle_ps(in0, in1, _MM_SHUFFLE(3, 1, 3, 1));
281
0
    outreal3 = _mm_shuffle_ps(in2, in3, _MM_SHUFFLE(2, 0, 2, 0));
282
0
    outimag3 = _mm_shuffle_ps(in2, in3, _MM_SHUFFLE(3, 1, 3, 1));
283
0
284
0
    in0 = _mm_sub_ps(_mm_mul_ps(outreal0, outreal1),
285
0
                     _mm_mul_ps(outimag0, outimag1));
286
0
    in1 = _mm_add_ps(_mm_mul_ps(outreal0, outimag1),
287
0
                     _mm_mul_ps(outimag0, outreal1));
288
0
    in2 = _mm_sub_ps(_mm_mul_ps(outreal2, outreal3),
289
0
                     _mm_mul_ps(outimag2, outimag3));
290
0
    in3 = _mm_add_ps(_mm_mul_ps(outreal2, outimag3),
291
0
                     _mm_mul_ps(outimag2, outreal3));
292
0
293
0
    outreal0 = _mm_unpacklo_ps(in0, in1);
294
0
    outreal1 = _mm_unpackhi_ps(in0, in1);
295
0
    outreal2 = _mm_unpacklo_ps(in2, in3);
296
0
    outreal3 = _mm_unpackhi_ps(in2, in3);
297
0
298
0
    _mm_store_ps(&aOutput[i], outreal0);
299
0
    _mm_store_ps(&aOutput[i + 4], outreal1);
300
0
    _mm_store_ps(&aOutput[i + 8], outreal2);
301
0
    _mm_store_ps(&aOutput[i + 12], outreal3);
302
0
  }
303
0
}
304
305
float
306
AudioBufferSumOfSquares_SSE(const float* aInput, uint32_t aLength)
307
0
{
308
0
  unsigned i;
309
0
  __m128 in0, in1, in2, in3,
310
0
         acc0, acc1, acc2, acc3;
311
0
  float out[4];
312
0
313
0
  ASSERT_ALIGNED16(aInput);
314
0
  ASSERT_MULTIPLE16(aLength);
315
0
316
0
  acc0 = _mm_setzero_ps();
317
0
  acc1 = _mm_setzero_ps();
318
0
  acc2 = _mm_setzero_ps();
319
0
  acc3 = _mm_setzero_ps();
320
0
321
0
  for (i = 0; i < aLength; i+=16) {
322
0
    in0 = _mm_load_ps(&aInput[i]);
323
0
    in1 = _mm_load_ps(&aInput[i + 4]);
324
0
    in2 = _mm_load_ps(&aInput[i + 8]);
325
0
    in3 = _mm_load_ps(&aInput[i + 12]);
326
0
327
0
    in0 = _mm_mul_ps(in0, in0);
328
0
    in1 = _mm_mul_ps(in1, in1);
329
0
    in2 = _mm_mul_ps(in2, in2);
330
0
    in3 = _mm_mul_ps(in3, in3);
331
0
332
0
    acc0 = _mm_add_ps(acc0, in0);
333
0
    acc1 = _mm_add_ps(acc1, in1);
334
0
    acc2 = _mm_add_ps(acc2, in2);
335
0
    acc3 = _mm_add_ps(acc3, in3);
336
0
  }
337
0
338
0
  acc0 = _mm_add_ps(acc0, acc1);
339
0
  acc0 = _mm_add_ps(acc0, acc2);
340
0
  acc0 = _mm_add_ps(acc0, acc3);
341
0
342
0
  _mm_store_ps(out, acc0);
343
0
344
0
  return out[0] + out[1] + out[2] + out[3];
345
0
}
346
347
}