Line data Source code
1 : /*
2 : * Copyright (C) 2010, Google Inc. All rights reserved.
3 : *
4 : * Redistribution and use in source and binary forms, with or without
5 : * modification, are permitted provided that the following conditions
6 : * are met:
7 : * 1. Redistributions of source code must retain the above copyright
8 : * notice, this list of conditions and the following disclaimer.
9 : * 2. Redistributions in binary form must reproduce the above copyright
10 : * notice, this list of conditions and the following disclaimer in the
11 : * documentation and/or other materials provided with the distribution.
12 : *
13 : * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' AND ANY
14 : * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
15 : * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
16 : * DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS BE LIABLE FOR ANY
17 : * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
18 : * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
19 : * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
20 : * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
21 : * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
22 : * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23 : */
24 :
25 : #include "HRTFPanner.h"
26 : #include "HRTFDatabaseLoader.h"
27 :
28 : #include "FFTConvolver.h"
29 : #include "HRTFDatabase.h"
30 : #include "AudioBlock.h"
31 :
32 : using namespace std;
33 : using namespace mozilla;
34 : using dom::ChannelInterpretation;
35 :
36 : namespace WebCore {
37 :
38 : // The value of 2 milliseconds is larger than the largest delay which exists in any HRTFKernel from the default HRTFDatabase (0.0136 seconds).
39 : // We ASSERT the delay values used in process() with this value.
40 : const double MaxDelayTimeSeconds = 0.002;
41 :
42 : const int UninitializedAzimuth = -1;
43 : const unsigned RenderingQuantum = WEBAUDIO_BLOCK_SIZE;
44 :
45 0 : HRTFPanner::HRTFPanner(float sampleRate, already_AddRefed<HRTFDatabaseLoader> databaseLoader)
46 : : m_databaseLoader(databaseLoader)
47 : , m_sampleRate(sampleRate)
48 : , m_crossfadeSelection(CrossfadeSelection1)
49 : , m_azimuthIndex1(UninitializedAzimuth)
50 : , m_azimuthIndex2(UninitializedAzimuth)
51 : // m_elevation1 and m_elevation2 are initialized in pan()
52 : , m_crossfadeX(0)
53 : , m_crossfadeIncr(0)
54 : , m_convolverL1(HRTFElevation::fftSizeForSampleRate(sampleRate))
55 : , m_convolverR1(m_convolverL1.fftSize())
56 : , m_convolverL2(m_convolverL1.fftSize())
57 : , m_convolverR2(m_convolverL1.fftSize())
58 0 : , m_delayLine(MaxDelayTimeSeconds * sampleRate, 1.0)
59 : {
60 0 : MOZ_ASSERT(m_databaseLoader);
61 0 : MOZ_COUNT_CTOR(HRTFPanner);
62 0 : }
63 :
64 0 : HRTFPanner::~HRTFPanner()
65 : {
66 0 : MOZ_COUNT_DTOR(HRTFPanner);
67 0 : }
68 :
69 0 : size_t HRTFPanner::sizeOfIncludingThis(mozilla::MallocSizeOf aMallocSizeOf) const
70 : {
71 0 : size_t amount = aMallocSizeOf(this);
72 :
73 : // NB: m_databaseLoader can be shared, so it is not measured here
74 0 : amount += m_convolverL1.sizeOfExcludingThis(aMallocSizeOf);
75 0 : amount += m_convolverR1.sizeOfExcludingThis(aMallocSizeOf);
76 0 : amount += m_convolverL2.sizeOfExcludingThis(aMallocSizeOf);
77 0 : amount += m_convolverR2.sizeOfExcludingThis(aMallocSizeOf);
78 0 : amount += m_delayLine.SizeOfExcludingThis(aMallocSizeOf);
79 :
80 0 : return amount;
81 : }
82 :
83 0 : void HRTFPanner::reset()
84 : {
85 0 : m_azimuthIndex1 = UninitializedAzimuth;
86 0 : m_azimuthIndex2 = UninitializedAzimuth;
87 : // m_elevation1 and m_elevation2 are initialized in pan()
88 0 : m_crossfadeSelection = CrossfadeSelection1;
89 0 : m_crossfadeX = 0.0f;
90 0 : m_crossfadeIncr = 0.0f;
91 0 : m_convolverL1.reset();
92 0 : m_convolverR1.reset();
93 0 : m_convolverL2.reset();
94 0 : m_convolverR2.reset();
95 0 : m_delayLine.Reset();
96 0 : }
97 :
98 0 : int HRTFPanner::calculateDesiredAzimuthIndexAndBlend(double azimuth, double& azimuthBlend)
99 : {
100 : // Convert the azimuth angle from the range -180 -> +180 into the range 0 -> 360.
101 : // The azimuth index may then be calculated from this positive value.
102 0 : if (azimuth < 0)
103 0 : azimuth += 360.0;
104 :
105 0 : HRTFDatabase* database = m_databaseLoader->database();
106 0 : MOZ_ASSERT(database);
107 :
108 0 : int numberOfAzimuths = database->numberOfAzimuths();
109 0 : const double angleBetweenAzimuths = 360.0 / numberOfAzimuths;
110 :
111 : // Calculate the azimuth index and the blend (0 -> 1) for interpolation.
112 0 : double desiredAzimuthIndexFloat = azimuth / angleBetweenAzimuths;
113 0 : int desiredAzimuthIndex = static_cast<int>(desiredAzimuthIndexFloat);
114 0 : azimuthBlend = desiredAzimuthIndexFloat - static_cast<double>(desiredAzimuthIndex);
115 :
116 : // We don't immediately start using this azimuth index, but instead approach this index from the last index we rendered at.
117 : // This minimizes the clicks and graininess for moving sources which occur otherwise.
118 0 : desiredAzimuthIndex = max(0, desiredAzimuthIndex);
119 0 : desiredAzimuthIndex = min(numberOfAzimuths - 1, desiredAzimuthIndex);
120 0 : return desiredAzimuthIndex;
121 : }
122 :
123 0 : void HRTFPanner::pan(double desiredAzimuth, double elevation, const AudioBlock* inputBus, AudioBlock* outputBus)
124 : {
125 : #ifdef DEBUG
126 : unsigned numInputChannels =
127 0 : inputBus->IsNull() ? 0 : inputBus->ChannelCount();
128 :
129 0 : MOZ_ASSERT(numInputChannels <= 2);
130 0 : MOZ_ASSERT(inputBus->GetDuration() == WEBAUDIO_BLOCK_SIZE);
131 : #endif
132 :
133 0 : bool isOutputGood = outputBus && outputBus->ChannelCount() == 2 && outputBus->GetDuration() == WEBAUDIO_BLOCK_SIZE;
134 0 : MOZ_ASSERT(isOutputGood);
135 :
136 0 : if (!isOutputGood) {
137 0 : if (outputBus)
138 0 : outputBus->SetNull(outputBus->GetDuration());
139 0 : return;
140 : }
141 :
142 0 : HRTFDatabase* database = m_databaseLoader->database();
143 0 : if (!database) { // not yet loaded
144 0 : outputBus->SetNull(outputBus->GetDuration());
145 0 : return;
146 : }
147 :
148 : // IRCAM HRTF azimuths values from the loaded database is reversed from the panner's notion of azimuth.
149 0 : double azimuth = -desiredAzimuth;
150 :
151 0 : bool isAzimuthGood = azimuth >= -180.0 && azimuth <= 180.0;
152 0 : MOZ_ASSERT(isAzimuthGood);
153 0 : if (!isAzimuthGood) {
154 0 : outputBus->SetNull(outputBus->GetDuration());
155 0 : return;
156 : }
157 :
158 : // Normally, we'll just be dealing with mono sources.
159 : // If we have a stereo input, implement stereo panning with left source processed by left HRTF, and right source by right HRTF.
160 :
161 : // Get destination pointers.
162 : float* destinationL =
163 0 : static_cast<float*>(const_cast<void*>(outputBus->mChannelData[0]));
164 : float* destinationR =
165 0 : static_cast<float*>(const_cast<void*>(outputBus->mChannelData[1]));
166 :
167 : double azimuthBlend;
168 0 : int desiredAzimuthIndex = calculateDesiredAzimuthIndexAndBlend(azimuth, azimuthBlend);
169 :
170 : // Initially snap azimuth and elevation values to first values encountered.
171 0 : if (m_azimuthIndex1 == UninitializedAzimuth) {
172 0 : m_azimuthIndex1 = desiredAzimuthIndex;
173 0 : m_elevation1 = elevation;
174 : }
175 0 : if (m_azimuthIndex2 == UninitializedAzimuth) {
176 0 : m_azimuthIndex2 = desiredAzimuthIndex;
177 0 : m_elevation2 = elevation;
178 : }
179 :
180 : // Cross-fade / transition over a period of around 45 milliseconds.
181 : // This is an empirical value tuned to be a reasonable trade-off between
182 : // smoothness and speed.
183 0 : const double fadeFrames = sampleRate() <= 48000 ? 2048 : 4096;
184 :
185 : // Check for azimuth and elevation changes, initiating a cross-fade if needed.
186 0 : if (!m_crossfadeX && m_crossfadeSelection == CrossfadeSelection1) {
187 0 : if (desiredAzimuthIndex != m_azimuthIndex1 || elevation != m_elevation1) {
188 : // Cross-fade from 1 -> 2
189 0 : m_crossfadeIncr = 1 / fadeFrames;
190 0 : m_azimuthIndex2 = desiredAzimuthIndex;
191 0 : m_elevation2 = elevation;
192 : }
193 : }
194 0 : if (m_crossfadeX == 1 && m_crossfadeSelection == CrossfadeSelection2) {
195 0 : if (desiredAzimuthIndex != m_azimuthIndex2 || elevation != m_elevation2) {
196 : // Cross-fade from 2 -> 1
197 0 : m_crossfadeIncr = -1 / fadeFrames;
198 0 : m_azimuthIndex1 = desiredAzimuthIndex;
199 0 : m_elevation1 = elevation;
200 : }
201 : }
202 :
203 : // Get the HRTFKernels and interpolated delays.
204 : HRTFKernel* kernelL1;
205 : HRTFKernel* kernelR1;
206 : HRTFKernel* kernelL2;
207 : HRTFKernel* kernelR2;
208 : double frameDelayL1;
209 : double frameDelayR1;
210 : double frameDelayL2;
211 : double frameDelayR2;
212 0 : database->getKernelsFromAzimuthElevation(azimuthBlend, m_azimuthIndex1, m_elevation1, kernelL1, kernelR1, frameDelayL1, frameDelayR1);
213 0 : database->getKernelsFromAzimuthElevation(azimuthBlend, m_azimuthIndex2, m_elevation2, kernelL2, kernelR2, frameDelayL2, frameDelayR2);
214 :
215 0 : bool areKernelsGood = kernelL1 && kernelR1 && kernelL2 && kernelR2;
216 0 : MOZ_ASSERT(areKernelsGood);
217 0 : if (!areKernelsGood) {
218 0 : outputBus->SetNull(outputBus->GetDuration());
219 0 : return;
220 : }
221 :
222 0 : MOZ_ASSERT(frameDelayL1 / sampleRate() < MaxDelayTimeSeconds && frameDelayR1 / sampleRate() < MaxDelayTimeSeconds);
223 0 : MOZ_ASSERT(frameDelayL2 / sampleRate() < MaxDelayTimeSeconds && frameDelayR2 / sampleRate() < MaxDelayTimeSeconds);
224 :
225 : // Crossfade inter-aural delays based on transitions.
226 : double frameDelaysL[WEBAUDIO_BLOCK_SIZE];
227 : double frameDelaysR[WEBAUDIO_BLOCK_SIZE];
228 : {
229 0 : float x = m_crossfadeX;
230 0 : float incr = m_crossfadeIncr;
231 0 : for (unsigned i = 0; i < WEBAUDIO_BLOCK_SIZE; ++i) {
232 0 : frameDelaysL[i] = (1 - x) * frameDelayL1 + x * frameDelayL2;
233 0 : frameDelaysR[i] = (1 - x) * frameDelayR1 + x * frameDelayR2;
234 0 : x += incr;
235 : }
236 : }
237 :
238 : // First run through delay lines for inter-aural time difference.
239 0 : m_delayLine.Write(*inputBus);
240 : // "Speakers" means a mono input is read into both outputs (with possibly
241 : // different delays).
242 0 : m_delayLine.ReadChannel(frameDelaysL, outputBus, 0,
243 0 : ChannelInterpretation::Speakers);
244 0 : m_delayLine.ReadChannel(frameDelaysR, outputBus, 1,
245 0 : ChannelInterpretation::Speakers);
246 0 : m_delayLine.NextBlock();
247 :
248 0 : bool needsCrossfading = m_crossfadeIncr;
249 :
250 : const float* convolutionDestinationL1;
251 : const float* convolutionDestinationR1;
252 : const float* convolutionDestinationL2;
253 : const float* convolutionDestinationR2;
254 :
255 : // Now do the convolutions.
256 : // Note that we avoid doing convolutions on both sets of convolvers if we're not currently cross-fading.
257 :
258 0 : if (m_crossfadeSelection == CrossfadeSelection1 || needsCrossfading) {
259 : convolutionDestinationL1 =
260 0 : m_convolverL1.process(kernelL1->fftFrame(), destinationL);
261 : convolutionDestinationR1 =
262 0 : m_convolverR1.process(kernelR1->fftFrame(), destinationR);
263 : }
264 :
265 0 : if (m_crossfadeSelection == CrossfadeSelection2 || needsCrossfading) {
266 : convolutionDestinationL2 =
267 0 : m_convolverL2.process(kernelL2->fftFrame(), destinationL);
268 : convolutionDestinationR2 =
269 0 : m_convolverR2.process(kernelR2->fftFrame(), destinationR);
270 : }
271 :
272 0 : if (needsCrossfading) {
273 : // Apply linear cross-fade.
274 0 : float x = m_crossfadeX;
275 0 : float incr = m_crossfadeIncr;
276 0 : for (unsigned i = 0; i < WEBAUDIO_BLOCK_SIZE; ++i) {
277 0 : destinationL[i] = (1 - x) * convolutionDestinationL1[i] + x * convolutionDestinationL2[i];
278 0 : destinationR[i] = (1 - x) * convolutionDestinationR1[i] + x * convolutionDestinationR2[i];
279 0 : x += incr;
280 : }
281 : // Update cross-fade value from local.
282 0 : m_crossfadeX = x;
283 :
284 0 : if (m_crossfadeIncr > 0 && fabs(m_crossfadeX - 1) < m_crossfadeIncr) {
285 : // We've fully made the crossfade transition from 1 -> 2.
286 0 : m_crossfadeSelection = CrossfadeSelection2;
287 0 : m_crossfadeX = 1;
288 0 : m_crossfadeIncr = 0;
289 0 : } else if (m_crossfadeIncr < 0 && fabs(m_crossfadeX) < -m_crossfadeIncr) {
290 : // We've fully made the crossfade transition from 2 -> 1.
291 0 : m_crossfadeSelection = CrossfadeSelection1;
292 0 : m_crossfadeX = 0;
293 0 : m_crossfadeIncr = 0;
294 : }
295 : } else {
296 : const float* sourceL;
297 : const float* sourceR;
298 0 : if (m_crossfadeSelection == CrossfadeSelection1) {
299 0 : sourceL = convolutionDestinationL1;
300 0 : sourceR = convolutionDestinationR1;
301 : } else {
302 0 : sourceL = convolutionDestinationL2;
303 0 : sourceR = convolutionDestinationR2;
304 : }
305 0 : PodCopy(destinationL, sourceL, WEBAUDIO_BLOCK_SIZE);
306 0 : PodCopy(destinationR, sourceR, WEBAUDIO_BLOCK_SIZE);
307 : }
308 : }
309 :
310 0 : int HRTFPanner::maxTailFrames() const
311 : {
312 : // Although the ideal tail time would be the length of the impulse
313 : // response, there is additional tail time from the approximations in the
314 : // implementation. Because HRTFPanner is implemented with a DelayKernel
315 : // and a FFTConvolver, the tailTime of the HRTFPanner is the sum of the
316 : // tailTime of the DelayKernel and the tailTime of the FFTConvolver. The
317 : // FFTs of the convolver are fftSize(), half of which is latency, but this
318 : // is aligned with blocks and so is reduced by the one block which is
319 : // processed immediately.
320 0 : return m_delayLine.MaxDelayTicks() +
321 0 : m_convolverL1.fftSize()/2 + m_convolverL1.latencyFrames();
322 : }
323 :
324 : } // namespace WebCore
|