/src/mozilla-central/dom/media/webaudio/blink/HRTFPanner.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (C) 2010, Google Inc. All rights reserved. |
3 | | * |
4 | | * Redistribution and use in source and binary forms, with or without |
5 | | * modification, are permitted provided that the following conditions |
6 | | * are met: |
7 | | * 1. Redistributions of source code must retain the above copyright |
8 | | * notice, this list of conditions and the following disclaimer. |
9 | | * 2. Redistributions in binary form must reproduce the above copyright |
10 | | * notice, this list of conditions and the following disclaimer in the |
11 | | * documentation and/or other materials provided with the distribution. |
12 | | * |
13 | | * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' AND ANY |
14 | | * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
15 | | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
16 | | * DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS BE LIABLE FOR ANY |
17 | | * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES |
18 | | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
19 | | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON |
20 | | * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
21 | | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
22 | | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
23 | | */ |
24 | | |
25 | | #include "HRTFPanner.h" |
26 | | #include "HRTFDatabaseLoader.h" |
27 | | |
28 | | #include "FFTConvolver.h" |
29 | | #include "HRTFDatabase.h" |
30 | | #include "AudioBlock.h" |
31 | | |
32 | | using namespace std; |
33 | | using namespace mozilla; |
34 | | using dom::ChannelInterpretation; |
35 | | |
36 | | namespace WebCore { |
37 | | |
38 | | // The value of 2 milliseconds is larger than the largest delay which exists in any HRTFKernel from the default HRTFDatabase (0.0136 seconds). |
39 | | // We ASSERT the delay values used in process() with this value. |
40 | | const float MaxDelayTimeSeconds = 0.002f; |
41 | | |
42 | | const int UninitializedAzimuth = -1; |
43 | | const unsigned RenderingQuantum = WEBAUDIO_BLOCK_SIZE; |
44 | | |
45 | | HRTFPanner::HRTFPanner(float sampleRate, already_AddRefed<HRTFDatabaseLoader> databaseLoader) |
46 | | : m_databaseLoader(databaseLoader) |
47 | | , m_sampleRate(sampleRate) |
48 | | , m_crossfadeSelection(CrossfadeSelection1) |
49 | | , m_azimuthIndex1(UninitializedAzimuth) |
50 | | , m_azimuthIndex2(UninitializedAzimuth) |
51 | | // m_elevation1 and m_elevation2 are initialized in pan() |
52 | | , m_crossfadeX(0) |
53 | | , m_crossfadeIncr(0) |
54 | | , m_convolverL1(HRTFElevation::fftSizeForSampleRate(sampleRate)) |
55 | | , m_convolverR1(m_convolverL1.fftSize()) |
56 | | , m_convolverL2(m_convolverL1.fftSize()) |
57 | | , m_convolverR2(m_convolverL1.fftSize()) |
58 | | , m_delayLine(MaxDelayTimeSeconds * sampleRate) |
59 | 0 | { |
60 | 0 | MOZ_ASSERT(m_databaseLoader); |
61 | 0 | MOZ_COUNT_CTOR(HRTFPanner); |
62 | 0 | } |
63 | | |
64 | | HRTFPanner::~HRTFPanner() |
65 | 0 | { |
66 | 0 | MOZ_COUNT_DTOR(HRTFPanner); |
67 | 0 | } |
68 | | |
69 | | size_t HRTFPanner::sizeOfIncludingThis(mozilla::MallocSizeOf aMallocSizeOf) const |
70 | 0 | { |
71 | 0 | size_t amount = aMallocSizeOf(this); |
72 | 0 |
|
73 | 0 | // NB: m_databaseLoader can be shared, so it is not measured here |
74 | 0 | amount += m_convolverL1.sizeOfExcludingThis(aMallocSizeOf); |
75 | 0 | amount += m_convolverR1.sizeOfExcludingThis(aMallocSizeOf); |
76 | 0 | amount += m_convolverL2.sizeOfExcludingThis(aMallocSizeOf); |
77 | 0 | amount += m_convolverR2.sizeOfExcludingThis(aMallocSizeOf); |
78 | 0 | amount += m_delayLine.SizeOfExcludingThis(aMallocSizeOf); |
79 | 0 |
|
80 | 0 | return amount; |
81 | 0 | } |
82 | | |
83 | | void HRTFPanner::reset() |
84 | 0 | { |
85 | 0 | m_azimuthIndex1 = UninitializedAzimuth; |
86 | 0 | m_azimuthIndex2 = UninitializedAzimuth; |
87 | 0 | // m_elevation1 and m_elevation2 are initialized in pan() |
88 | 0 | m_crossfadeSelection = CrossfadeSelection1; |
89 | 0 | m_crossfadeX = 0.0f; |
90 | 0 | m_crossfadeIncr = 0.0f; |
91 | 0 | m_convolverL1.reset(); |
92 | 0 | m_convolverR1.reset(); |
93 | 0 | m_convolverL2.reset(); |
94 | 0 | m_convolverR2.reset(); |
95 | 0 | m_delayLine.Reset(); |
96 | 0 | } |
97 | | |
98 | | int HRTFPanner::calculateDesiredAzimuthIndexAndBlend(double azimuth, double& azimuthBlend) |
99 | 0 | { |
100 | 0 | // Convert the azimuth angle from the range -180 -> +180 into the range 0 -> 360. |
101 | 0 | // The azimuth index may then be calculated from this positive value. |
102 | 0 | if (azimuth < 0) |
103 | 0 | azimuth += 360.0; |
104 | 0 |
|
105 | 0 | int numberOfAzimuths = HRTFDatabase::numberOfAzimuths(); |
106 | 0 | const double angleBetweenAzimuths = 360.0 / numberOfAzimuths; |
107 | 0 |
|
108 | 0 | // Calculate the azimuth index and the blend (0 -> 1) for interpolation. |
109 | 0 | double desiredAzimuthIndexFloat = azimuth / angleBetweenAzimuths; |
110 | 0 | int desiredAzimuthIndex = static_cast<int>(desiredAzimuthIndexFloat); |
111 | 0 | azimuthBlend = desiredAzimuthIndexFloat - static_cast<double>(desiredAzimuthIndex); |
112 | 0 |
|
113 | 0 | // We don't immediately start using this azimuth index, but instead approach this index from the last index we rendered at. |
114 | 0 | // This minimizes the clicks and graininess for moving sources which occur otherwise. |
115 | 0 | desiredAzimuthIndex = max(0, desiredAzimuthIndex); |
116 | 0 | desiredAzimuthIndex = min(numberOfAzimuths - 1, desiredAzimuthIndex); |
117 | 0 | return desiredAzimuthIndex; |
118 | 0 | } |
119 | | |
120 | | void HRTFPanner::pan(double desiredAzimuth, double elevation, const AudioBlock* inputBus, AudioBlock* outputBus) |
121 | 0 | { |
122 | | #ifdef DEBUG |
123 | | unsigned numInputChannels = |
124 | | inputBus->IsNull() ? 0 : inputBus->ChannelCount(); |
125 | | |
126 | | MOZ_ASSERT(numInputChannels <= 2); |
127 | | MOZ_ASSERT(inputBus->GetDuration() == WEBAUDIO_BLOCK_SIZE); |
128 | | #endif |
129 | |
|
130 | 0 | bool isOutputGood = outputBus && outputBus->ChannelCount() == 2 && outputBus->GetDuration() == WEBAUDIO_BLOCK_SIZE; |
131 | 0 | MOZ_ASSERT(isOutputGood); |
132 | 0 |
|
133 | 0 | if (!isOutputGood) { |
134 | 0 | if (outputBus) |
135 | 0 | outputBus->SetNull(outputBus->GetDuration()); |
136 | 0 | return; |
137 | 0 | } |
138 | 0 |
|
139 | 0 | HRTFDatabase* database = m_databaseLoader->database(); |
140 | 0 | if (!database) { // not yet loaded |
141 | 0 | outputBus->SetNull(outputBus->GetDuration()); |
142 | 0 | return; |
143 | 0 | } |
144 | 0 | |
145 | 0 | // IRCAM HRTF azimuths values from the loaded database is reversed from the panner's notion of azimuth. |
146 | 0 | double azimuth = -desiredAzimuth; |
147 | 0 |
|
148 | 0 | bool isAzimuthGood = azimuth >= -180.0 && azimuth <= 180.0; |
149 | 0 | MOZ_ASSERT(isAzimuthGood); |
150 | 0 | if (!isAzimuthGood) { |
151 | 0 | outputBus->SetNull(outputBus->GetDuration()); |
152 | 0 | return; |
153 | 0 | } |
154 | 0 | |
155 | 0 | // Normally, we'll just be dealing with mono sources. |
156 | 0 | // If we have a stereo input, implement stereo panning with left source processed by left HRTF, and right source by right HRTF. |
157 | 0 | |
158 | 0 | // Get destination pointers. |
159 | 0 | float* destinationL = |
160 | 0 | static_cast<float*>(const_cast<void*>(outputBus->mChannelData[0])); |
161 | 0 | float* destinationR = |
162 | 0 | static_cast<float*>(const_cast<void*>(outputBus->mChannelData[1])); |
163 | 0 |
|
164 | 0 | double azimuthBlend; |
165 | 0 | int desiredAzimuthIndex = calculateDesiredAzimuthIndexAndBlend(azimuth, azimuthBlend); |
166 | 0 |
|
167 | 0 | // Initially snap azimuth and elevation values to first values encountered. |
168 | 0 | if (m_azimuthIndex1 == UninitializedAzimuth) { |
169 | 0 | m_azimuthIndex1 = desiredAzimuthIndex; |
170 | 0 | m_elevation1 = elevation; |
171 | 0 | } |
172 | 0 | if (m_azimuthIndex2 == UninitializedAzimuth) { |
173 | 0 | m_azimuthIndex2 = desiredAzimuthIndex; |
174 | 0 | m_elevation2 = elevation; |
175 | 0 | } |
176 | 0 |
|
177 | 0 | // Cross-fade / transition over a period of around 45 milliseconds. |
178 | 0 | // This is an empirical value tuned to be a reasonable trade-off between |
179 | 0 | // smoothness and speed. |
180 | 0 | const double fadeFrames = sampleRate() <= 48000 ? 2048 : 4096; |
181 | 0 |
|
182 | 0 | // Check for azimuth and elevation changes, initiating a cross-fade if needed. |
183 | 0 | if (!m_crossfadeX && m_crossfadeSelection == CrossfadeSelection1) { |
184 | 0 | if (desiredAzimuthIndex != m_azimuthIndex1 || elevation != m_elevation1) { |
185 | 0 | // Cross-fade from 1 -> 2 |
186 | 0 | m_crossfadeIncr = 1 / fadeFrames; |
187 | 0 | m_azimuthIndex2 = desiredAzimuthIndex; |
188 | 0 | m_elevation2 = elevation; |
189 | 0 | } |
190 | 0 | } |
191 | 0 | if (m_crossfadeX == 1 && m_crossfadeSelection == CrossfadeSelection2) { |
192 | 0 | if (desiredAzimuthIndex != m_azimuthIndex2 || elevation != m_elevation2) { |
193 | 0 | // Cross-fade from 2 -> 1 |
194 | 0 | m_crossfadeIncr = -1 / fadeFrames; |
195 | 0 | m_azimuthIndex1 = desiredAzimuthIndex; |
196 | 0 | m_elevation1 = elevation; |
197 | 0 | } |
198 | 0 | } |
199 | 0 |
|
200 | 0 | // Get the HRTFKernels and interpolated delays. |
201 | 0 | HRTFKernel* kernelL1; |
202 | 0 | HRTFKernel* kernelR1; |
203 | 0 | HRTFKernel* kernelL2; |
204 | 0 | HRTFKernel* kernelR2; |
205 | 0 | double frameDelayL1; |
206 | 0 | double frameDelayR1; |
207 | 0 | double frameDelayL2; |
208 | 0 | double frameDelayR2; |
209 | 0 | database->getKernelsFromAzimuthElevation(azimuthBlend, m_azimuthIndex1, m_elevation1, kernelL1, kernelR1, frameDelayL1, frameDelayR1); |
210 | 0 | database->getKernelsFromAzimuthElevation(azimuthBlend, m_azimuthIndex2, m_elevation2, kernelL2, kernelR2, frameDelayL2, frameDelayR2); |
211 | 0 |
|
212 | 0 | bool areKernelsGood = kernelL1 && kernelR1 && kernelL2 && kernelR2; |
213 | 0 | MOZ_ASSERT(areKernelsGood); |
214 | 0 | if (!areKernelsGood) { |
215 | 0 | outputBus->SetNull(outputBus->GetDuration()); |
216 | 0 | return; |
217 | 0 | } |
218 | 0 | |
219 | 0 | MOZ_ASSERT(frameDelayL1 / sampleRate() < MaxDelayTimeSeconds && frameDelayR1 / sampleRate() < MaxDelayTimeSeconds); |
220 | 0 | MOZ_ASSERT(frameDelayL2 / sampleRate() < MaxDelayTimeSeconds && frameDelayR2 / sampleRate() < MaxDelayTimeSeconds); |
221 | 0 |
|
222 | 0 | // Crossfade inter-aural delays based on transitions. |
223 | 0 | float frameDelaysL[WEBAUDIO_BLOCK_SIZE]; |
224 | 0 | float frameDelaysR[WEBAUDIO_BLOCK_SIZE]; |
225 | 0 | { |
226 | 0 | float x = m_crossfadeX; |
227 | 0 | float incr = m_crossfadeIncr; |
228 | 0 | for (unsigned i = 0; i < WEBAUDIO_BLOCK_SIZE; ++i) { |
229 | 0 | frameDelaysL[i] = (1 - x) * frameDelayL1 + x * frameDelayL2; |
230 | 0 | frameDelaysR[i] = (1 - x) * frameDelayR1 + x * frameDelayR2; |
231 | 0 | x += incr; |
232 | 0 | } |
233 | 0 | } |
234 | 0 |
|
235 | 0 | // First run through delay lines for inter-aural time difference. |
236 | 0 | m_delayLine.Write(*inputBus); |
237 | 0 | // "Speakers" means a mono input is read into both outputs (with possibly |
238 | 0 | // different delays). |
239 | 0 | m_delayLine.ReadChannel(frameDelaysL, outputBus, 0, |
240 | 0 | ChannelInterpretation::Speakers); |
241 | 0 | m_delayLine.ReadChannel(frameDelaysR, outputBus, 1, |
242 | 0 | ChannelInterpretation::Speakers); |
243 | 0 | m_delayLine.NextBlock(); |
244 | 0 |
|
245 | 0 | bool needsCrossfading = m_crossfadeIncr; |
246 | 0 |
|
247 | 0 | const float* convolutionDestinationL1; |
248 | 0 | const float* convolutionDestinationR1; |
249 | 0 | const float* convolutionDestinationL2; |
250 | 0 | const float* convolutionDestinationR2; |
251 | 0 |
|
252 | 0 | // Now do the convolutions. |
253 | 0 | // Note that we avoid doing convolutions on both sets of convolvers if we're not currently cross-fading. |
254 | 0 |
|
255 | 0 | if (m_crossfadeSelection == CrossfadeSelection1 || needsCrossfading) { |
256 | 0 | convolutionDestinationL1 = |
257 | 0 | m_convolverL1.process(kernelL1->fftFrame(), destinationL); |
258 | 0 | convolutionDestinationR1 = |
259 | 0 | m_convolverR1.process(kernelR1->fftFrame(), destinationR); |
260 | 0 | } |
261 | 0 |
|
262 | 0 | if (m_crossfadeSelection == CrossfadeSelection2 || needsCrossfading) { |
263 | 0 | convolutionDestinationL2 = |
264 | 0 | m_convolverL2.process(kernelL2->fftFrame(), destinationL); |
265 | 0 | convolutionDestinationR2 = |
266 | 0 | m_convolverR2.process(kernelR2->fftFrame(), destinationR); |
267 | 0 | } |
268 | 0 |
|
269 | 0 | if (needsCrossfading) { |
270 | 0 | // Apply linear cross-fade. |
271 | 0 | float x = m_crossfadeX; |
272 | 0 | float incr = m_crossfadeIncr; |
273 | 0 | for (unsigned i = 0; i < WEBAUDIO_BLOCK_SIZE; ++i) { |
274 | 0 | destinationL[i] = (1 - x) * convolutionDestinationL1[i] + x * convolutionDestinationL2[i]; |
275 | 0 | destinationR[i] = (1 - x) * convolutionDestinationR1[i] + x * convolutionDestinationR2[i]; |
276 | 0 | x += incr; |
277 | 0 | } |
278 | 0 | // Update cross-fade value from local. |
279 | 0 | m_crossfadeX = x; |
280 | 0 |
|
281 | 0 | if (m_crossfadeIncr > 0 && fabs(m_crossfadeX - 1) < m_crossfadeIncr) { |
282 | 0 | // We've fully made the crossfade transition from 1 -> 2. |
283 | 0 | m_crossfadeSelection = CrossfadeSelection2; |
284 | 0 | m_crossfadeX = 1; |
285 | 0 | m_crossfadeIncr = 0; |
286 | 0 | } else if (m_crossfadeIncr < 0 && fabs(m_crossfadeX) < -m_crossfadeIncr) { |
287 | 0 | // We've fully made the crossfade transition from 2 -> 1. |
288 | 0 | m_crossfadeSelection = CrossfadeSelection1; |
289 | 0 | m_crossfadeX = 0; |
290 | 0 | m_crossfadeIncr = 0; |
291 | 0 | } |
292 | 0 | } else { |
293 | 0 | const float* sourceL; |
294 | 0 | const float* sourceR; |
295 | 0 | if (m_crossfadeSelection == CrossfadeSelection1) { |
296 | 0 | sourceL = convolutionDestinationL1; |
297 | 0 | sourceR = convolutionDestinationR1; |
298 | 0 | } else { |
299 | 0 | sourceL = convolutionDestinationL2; |
300 | 0 | sourceR = convolutionDestinationR2; |
301 | 0 | } |
302 | 0 | PodCopy(destinationL, sourceL, WEBAUDIO_BLOCK_SIZE); |
303 | 0 | PodCopy(destinationR, sourceR, WEBAUDIO_BLOCK_SIZE); |
304 | 0 | } |
305 | 0 | } |
306 | | |
307 | | int HRTFPanner::maxTailFrames() const |
308 | 0 | { |
309 | 0 | // Although the ideal tail time would be the length of the impulse |
310 | 0 | // response, there is additional tail time from the approximations in the |
311 | 0 | // implementation. Because HRTFPanner is implemented with a DelayKernel |
312 | 0 | // and a FFTConvolver, the tailTime of the HRTFPanner is the sum of the |
313 | 0 | // tailTime of the DelayKernel and the tailTime of the FFTConvolver. The |
314 | 0 | // FFTs of the convolver are fftSize(), half of which is latency, but this |
315 | 0 | // is aligned with blocks and so is reduced by the one block which is |
316 | 0 | // processed immediately. |
317 | 0 | return m_delayLine.MaxDelayTicks() + |
318 | 0 | m_convolverL1.fftSize()/2 + m_convolverL1.latencyFrames(); |
319 | 0 | } |
320 | | |
321 | | } // namespace WebCore |