Line data Source code
1 : /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 : /* This Source Code Form is subject to the terms of the Mozilla Public
3 : * License, v. 2.0. If a copy of the MPL was not distributed with this file,
4 : * You can obtain one at http://mozilla.org/MPL/2.0/. */
5 :
6 : #ifndef MOZILLA_AUDIOSEGMENT_H_
7 : #define MOZILLA_AUDIOSEGMENT_H_
8 :
9 : #include "MediaSegment.h"
10 : #include "AudioSampleFormat.h"
11 : #include "AudioChannelFormat.h"
12 : #include "SharedBuffer.h"
13 : #include "WebAudioUtils.h"
14 : #ifdef MOZILLA_INTERNAL_API
15 : #include "mozilla/TimeStamp.h"
16 : #endif
17 : #include <float.h>
18 :
19 : namespace mozilla {
20 :
21 : template<typename T>
22 0 : class SharedChannelArrayBuffer : public ThreadSharedObject {
23 : public:
24 0 : explicit SharedChannelArrayBuffer(nsTArray<nsTArray<T> >* aBuffers)
25 0 : {
26 0 : mBuffers.SwapElements(*aBuffers);
27 0 : }
28 :
29 0 : size_t SizeOfExcludingThis(MallocSizeOf aMallocSizeOf) const override
30 : {
31 0 : size_t amount = 0;
32 0 : amount += mBuffers.ShallowSizeOfExcludingThis(aMallocSizeOf);
33 0 : for (size_t i = 0; i < mBuffers.Length(); i++) {
34 0 : amount += mBuffers[i].ShallowSizeOfExcludingThis(aMallocSizeOf);
35 : }
36 :
37 0 : return amount;
38 : }
39 :
40 0 : size_t SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const override
41 : {
42 0 : return aMallocSizeOf(this) + SizeOfExcludingThis(aMallocSizeOf);
43 : }
44 :
45 : nsTArray<nsTArray<T> > mBuffers;
46 : };
47 :
48 : class AudioMixer;
49 :
50 : /**
51 : * For auto-arrays etc, guess this as the common number of channels.
52 : */
53 : const int GUESS_AUDIO_CHANNELS = 2;
54 :
55 : // We ensure that the graph advances in steps that are multiples of the Web
56 : // Audio block size
57 : const uint32_t WEBAUDIO_BLOCK_SIZE_BITS = 7;
58 : const uint32_t WEBAUDIO_BLOCK_SIZE = 1 << WEBAUDIO_BLOCK_SIZE_BITS;
59 :
60 : template <typename SrcT, typename DestT>
61 : static void
62 0 : InterleaveAndConvertBuffer(const SrcT* const* aSourceChannels,
63 : uint32_t aLength, float aVolume,
64 : uint32_t aChannels,
65 : DestT* aOutput)
66 : {
67 0 : DestT* output = aOutput;
68 0 : for (size_t i = 0; i < aLength; ++i) {
69 0 : for (size_t channel = 0; channel < aChannels; ++channel) {
70 0 : float v = AudioSampleToFloat(aSourceChannels[channel][i])*aVolume;
71 0 : *output = FloatToAudioSample<DestT>(v);
72 0 : ++output;
73 : }
74 : }
75 0 : }
76 :
77 : template <typename SrcT, typename DestT>
78 : static void
79 0 : DeinterleaveAndConvertBuffer(const SrcT* aSourceBuffer,
80 : uint32_t aFrames, uint32_t aChannels,
81 : DestT** aOutput)
82 : {
83 0 : for (size_t i = 0; i < aChannels; i++) {
84 0 : size_t interleavedIndex = i;
85 0 : for (size_t j = 0; j < aFrames; j++) {
86 0 : ConvertAudioSample(aSourceBuffer[interleavedIndex],
87 0 : aOutput[i][j]);
88 0 : interleavedIndex += aChannels;
89 : }
90 : }
91 0 : }
92 :
93 : class SilentChannel
94 : {
95 : public:
96 : static const int AUDIO_PROCESSING_FRAMES = 640; /* > 10ms of 48KHz audio */
97 : static const uint8_t gZeroChannel[MAX_AUDIO_SAMPLE_SIZE*AUDIO_PROCESSING_FRAMES];
98 : // We take advantage of the fact that zero in float and zero in int have the
99 : // same all-zeros bit layout.
100 : template<typename T>
101 : static const T* ZeroChannel();
102 : };
103 :
104 :
105 : /**
106 : * Given an array of input channels (aChannelData), downmix to aOutputChannels,
107 : * interleave the channel data. A total of aOutputChannels*aDuration
108 : * interleaved samples will be copied to a channel buffer in aOutput.
109 : */
110 : template <typename SrcT, typename DestT>
111 : void
112 0 : DownmixAndInterleave(const nsTArray<const SrcT*>& aChannelData,
113 : int32_t aDuration, float aVolume, uint32_t aOutputChannels,
114 : DestT* aOutput)
115 : {
116 :
117 0 : if (aChannelData.Length() == aOutputChannels) {
118 0 : InterleaveAndConvertBuffer(aChannelData.Elements(),
119 : aDuration, aVolume, aOutputChannels, aOutput);
120 : } else {
121 0 : AutoTArray<SrcT*,GUESS_AUDIO_CHANNELS> outputChannelData;
122 0 : AutoTArray<SrcT, SilentChannel::AUDIO_PROCESSING_FRAMES * GUESS_AUDIO_CHANNELS> outputBuffers;
123 0 : outputChannelData.SetLength(aOutputChannels);
124 0 : outputBuffers.SetLength(aDuration * aOutputChannels);
125 0 : for (uint32_t i = 0; i < aOutputChannels; i++) {
126 0 : outputChannelData[i] = outputBuffers.Elements() + aDuration * i;
127 : }
128 0 : AudioChannelsDownMix(aChannelData,
129 : outputChannelData.Elements(),
130 : aOutputChannels,
131 : aDuration);
132 0 : InterleaveAndConvertBuffer(outputChannelData.Elements(),
133 : aDuration, aVolume, aOutputChannels, aOutput);
134 : }
135 0 : }
136 :
137 : /**
138 : * An AudioChunk represents a multi-channel buffer of audio samples.
139 : * It references an underlying ThreadSharedObject which manages the lifetime
140 : * of the buffer. An AudioChunk maintains its own duration and channel data
141 : * pointers so it can represent a subinterval of a buffer without copying.
142 : * An AudioChunk can store its individual channels anywhere; it maintains
143 : * separate pointers to each channel's buffer.
144 : */
145 0 : struct AudioChunk {
146 : typedef mozilla::AudioSampleFormat SampleFormat;
147 :
148 0 : AudioChunk() : mPrincipalHandle(PRINCIPAL_HANDLE_NONE) {}
149 :
150 : // Generic methods
151 0 : void SliceTo(StreamTime aStart, StreamTime aEnd)
152 : {
153 0 : MOZ_ASSERT(aStart >= 0 && aStart < aEnd && aEnd <= mDuration,
154 : "Slice out of bounds");
155 0 : if (mBuffer) {
156 0 : MOZ_ASSERT(aStart < INT32_MAX, "Can't slice beyond 32-bit sample lengths");
157 0 : for (uint32_t channel = 0; channel < mChannelData.Length(); ++channel) {
158 0 : mChannelData[channel] = AddAudioSampleOffset(mChannelData[channel],
159 : mBufferFormat, int32_t(aStart));
160 : }
161 : }
162 0 : mDuration = aEnd - aStart;
163 0 : }
164 0 : StreamTime GetDuration() const { return mDuration; }
165 0 : bool CanCombineWithFollowing(const AudioChunk& aOther) const
166 : {
167 0 : if (aOther.mBuffer != mBuffer) {
168 0 : return false;
169 : }
170 0 : if (mBuffer) {
171 0 : NS_ASSERTION(aOther.mBufferFormat == mBufferFormat,
172 : "Wrong metadata about buffer");
173 0 : NS_ASSERTION(aOther.mChannelData.Length() == mChannelData.Length(),
174 : "Mismatched channel count");
175 0 : if (mDuration > INT32_MAX) {
176 0 : return false;
177 : }
178 0 : for (uint32_t channel = 0; channel < mChannelData.Length(); ++channel) {
179 0 : if (aOther.mChannelData[channel] != AddAudioSampleOffset(mChannelData[channel],
180 0 : mBufferFormat, int32_t(mDuration))) {
181 0 : return false;
182 : }
183 : }
184 : }
185 0 : return true;
186 : }
187 0 : bool IsNull() const { return mBuffer == nullptr; }
188 0 : void SetNull(StreamTime aDuration)
189 : {
190 0 : mBuffer = nullptr;
191 0 : mChannelData.Clear();
192 0 : mDuration = aDuration;
193 0 : mVolume = 1.0f;
194 0 : mBufferFormat = AUDIO_FORMAT_SILENCE;
195 0 : mPrincipalHandle = PRINCIPAL_HANDLE_NONE;
196 0 : }
197 :
198 0 : size_t ChannelCount() const { return mChannelData.Length(); }
199 :
200 : bool IsMuted() const { return mVolume == 0.0f; }
201 :
202 0 : size_t SizeOfExcludingThisIfUnshared(MallocSizeOf aMallocSizeOf) const
203 : {
204 0 : return SizeOfExcludingThis(aMallocSizeOf, true);
205 : }
206 :
207 0 : size_t SizeOfExcludingThis(MallocSizeOf aMallocSizeOf, bool aUnshared) const
208 : {
209 0 : size_t amount = 0;
210 :
211 : // Possibly owned:
212 : // - mBuffer - Can hold data that is also in the decoded audio queue. If it
213 : // is not shared, or unshared == false it gets counted.
214 0 : if (mBuffer && (!aUnshared || !mBuffer->IsShared())) {
215 0 : amount += mBuffer->SizeOfIncludingThis(aMallocSizeOf);
216 : }
217 :
218 : // Memory in the array is owned by mBuffer.
219 0 : amount += mChannelData.ShallowSizeOfExcludingThis(aMallocSizeOf);
220 0 : return amount;
221 : }
222 :
223 : template<typename T>
224 0 : const nsTArray<const T*>& ChannelData()
225 : {
226 0 : MOZ_ASSERT(AudioSampleTypeToFormat<T>::Format == mBufferFormat);
227 0 : return *reinterpret_cast<nsTArray<const T*>*>(&mChannelData);
228 : }
229 :
230 0 : PrincipalHandle GetPrincipalHandle() const { return mPrincipalHandle; }
231 :
232 : StreamTime mDuration; // in frames within the buffer
233 : RefPtr<ThreadSharedObject> mBuffer; // the buffer object whose lifetime is managed; null means data is all zeroes
234 : nsTArray<const void*> mChannelData; // one pointer per channel; empty if and only if mBuffer is null
235 : float mVolume; // volume multiplier to apply (1.0f if mBuffer is nonnull)
236 : SampleFormat mBufferFormat; // format of frames in mBuffer (only meaningful if mBuffer is nonnull)
237 : #ifdef MOZILLA_INTERNAL_API
238 : mozilla::TimeStamp mTimeStamp; // time at which this has been fetched from the MediaEngine
239 : #endif
240 : // principalHandle for the data in this chunk.
241 : // This can be compared to an nsIPrincipal* when back on main thread.
242 : PrincipalHandle mPrincipalHandle;
243 : };
244 :
245 : /**
246 : * A list of audio samples consisting of a sequence of slices of SharedBuffers.
247 : * The audio rate is determined by the track, not stored in this class.
248 : */
249 0 : class AudioSegment : public MediaSegmentBase<AudioSegment, AudioChunk> {
250 : public:
251 : typedef mozilla::AudioSampleFormat SampleFormat;
252 :
253 0 : AudioSegment() : MediaSegmentBase<AudioSegment, AudioChunk>(AUDIO) {}
254 :
255 : // Resample the whole segment in place.
256 : template<typename T>
257 0 : void Resample(SpeexResamplerState* aResampler, uint32_t aInRate, uint32_t aOutRate)
258 : {
259 0 : mDuration = 0;
260 : #ifdef DEBUG
261 0 : uint32_t segmentChannelCount = ChannelCount();
262 : #endif
263 :
264 0 : for (ChunkIterator ci(*this); !ci.IsEnded(); ci.Next()) {
265 0 : AutoTArray<nsTArray<T>, GUESS_AUDIO_CHANNELS> output;
266 0 : AutoTArray<const T*, GUESS_AUDIO_CHANNELS> bufferPtrs;
267 0 : AudioChunk& c = *ci;
268 : // If this chunk is null, don't bother resampling, just alter its duration
269 0 : if (c.IsNull()) {
270 0 : c.mDuration = (c.mDuration * aOutRate) / aInRate;
271 0 : mDuration += c.mDuration;
272 0 : continue;
273 : }
274 0 : uint32_t channels = c.mChannelData.Length();
275 0 : MOZ_ASSERT(channels == segmentChannelCount);
276 0 : output.SetLength(channels);
277 0 : bufferPtrs.SetLength(channels);
278 0 : uint32_t inFrames = c.mDuration;
279 : // Round up to allocate; the last frame may not be used.
280 0 : NS_ASSERTION((UINT32_MAX - aInRate + 1) / c.mDuration >= aOutRate,
281 : "Dropping samples");
282 0 : uint32_t outSize = (c.mDuration * aOutRate + aInRate - 1) / aInRate;
283 0 : for (uint32_t i = 0; i < channels; i++) {
284 0 : T* out = output[i].AppendElements(outSize);
285 0 : uint32_t outFrames = outSize;
286 :
287 0 : const T* in = static_cast<const T*>(c.mChannelData[i]);
288 0 : dom::WebAudioUtils::SpeexResamplerProcess(aResampler, i,
289 : in, &inFrames,
290 : out, &outFrames);
291 0 : MOZ_ASSERT(inFrames == c.mDuration);
292 :
293 0 : bufferPtrs[i] = out;
294 0 : output[i].SetLength(outFrames);
295 : }
296 0 : MOZ_ASSERT(channels > 0);
297 0 : c.mDuration = output[0].Length();
298 0 : c.mBuffer = new mozilla::SharedChannelArrayBuffer<T>(&output);
299 0 : for (uint32_t i = 0; i < channels; i++) {
300 0 : c.mChannelData[i] = bufferPtrs[i];
301 : }
302 0 : mDuration += c.mDuration;
303 : }
304 0 : }
305 :
306 : void ResampleChunks(SpeexResamplerState* aResampler,
307 : uint32_t aInRate,
308 : uint32_t aOutRate);
309 :
310 0 : void AppendFrames(already_AddRefed<ThreadSharedObject> aBuffer,
311 : const nsTArray<const float*>& aChannelData,
312 : int32_t aDuration, const PrincipalHandle& aPrincipalHandle)
313 : {
314 0 : AudioChunk* chunk = AppendChunk(aDuration);
315 0 : chunk->mBuffer = aBuffer;
316 0 : for (uint32_t channel = 0; channel < aChannelData.Length(); ++channel) {
317 0 : chunk->mChannelData.AppendElement(aChannelData[channel]);
318 : }
319 0 : chunk->mVolume = 1.0f;
320 0 : chunk->mBufferFormat = AUDIO_FORMAT_FLOAT32;
321 : #ifdef MOZILLA_INTERNAL_API
322 0 : chunk->mTimeStamp = TimeStamp::Now();
323 : #endif
324 0 : chunk->mPrincipalHandle = aPrincipalHandle;
325 0 : }
326 0 : void AppendFrames(already_AddRefed<ThreadSharedObject> aBuffer,
327 : const nsTArray<const int16_t*>& aChannelData,
328 : int32_t aDuration, const PrincipalHandle& aPrincipalHandle)
329 : {
330 0 : AudioChunk* chunk = AppendChunk(aDuration);
331 0 : chunk->mBuffer = aBuffer;
332 0 : for (uint32_t channel = 0; channel < aChannelData.Length(); ++channel) {
333 0 : chunk->mChannelData.AppendElement(aChannelData[channel]);
334 : }
335 0 : chunk->mVolume = 1.0f;
336 0 : chunk->mBufferFormat = AUDIO_FORMAT_S16;
337 : #ifdef MOZILLA_INTERNAL_API
338 0 : chunk->mTimeStamp = TimeStamp::Now();
339 : #endif
340 0 : chunk->mPrincipalHandle = aPrincipalHandle;
341 0 : }
342 : // Consumes aChunk, and returns a pointer to the persistent copy of aChunk
343 : // in the segment.
344 0 : AudioChunk* AppendAndConsumeChunk(AudioChunk* aChunk)
345 : {
346 0 : AudioChunk* chunk = AppendChunk(aChunk->mDuration);
347 0 : chunk->mBuffer = aChunk->mBuffer.forget();
348 0 : chunk->mChannelData.SwapElements(aChunk->mChannelData);
349 0 : chunk->mVolume = aChunk->mVolume;
350 0 : chunk->mBufferFormat = aChunk->mBufferFormat;
351 : #ifdef MOZILLA_INTERNAL_API
352 0 : chunk->mTimeStamp = TimeStamp::Now();
353 : #endif
354 0 : chunk->mPrincipalHandle = aChunk->mPrincipalHandle;
355 0 : return chunk;
356 : }
357 : void ApplyVolume(float aVolume);
358 : // Mix the segment into a mixer, interleaved. This is useful to output a
359 : // segment to a system audio callback. It up or down mixes to aChannelCount
360 : // channels.
361 : void WriteTo(uint64_t aID, AudioMixer& aMixer, uint32_t aChannelCount,
362 : uint32_t aSampleRate);
363 : // Mix the segment into a mixer, keeping it planar, up or down mixing to
364 : // aChannelCount channels.
365 : void Mix(AudioMixer& aMixer, uint32_t aChannelCount, uint32_t aSampleRate);
366 :
367 0 : int ChannelCount() {
368 0 : NS_WARNING_ASSERTION(
369 : !mChunks.IsEmpty(),
370 : "Cannot query channel count on a AudioSegment with no chunks.");
371 : // Find the first chunk that has non-zero channels. A chunk that hs zero
372 : // channels is just silence and we can simply discard it.
373 0 : for (ChunkIterator ci(*this); !ci.IsEnded(); ci.Next()) {
374 0 : if (ci->ChannelCount()) {
375 0 : return ci->ChannelCount();
376 : }
377 : }
378 0 : return 0;
379 : }
380 :
381 0 : bool IsNull() const {
382 0 : for (ChunkIterator ci(*const_cast<AudioSegment*>(this)); !ci.IsEnded();
383 0 : ci.Next()) {
384 0 : if (!ci->IsNull()) {
385 0 : return false;
386 : }
387 : }
388 0 : return true;
389 : }
390 :
391 0 : static Type StaticType() { return AUDIO; }
392 :
393 0 : size_t SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const override
394 : {
395 0 : return aMallocSizeOf(this) + SizeOfExcludingThis(aMallocSizeOf);
396 : }
397 : };
398 :
399 : template<typename SrcT>
400 0 : void WriteChunk(AudioChunk& aChunk,
401 : uint32_t aOutputChannels,
402 : AudioDataValue* aOutputBuffer)
403 : {
404 0 : AutoTArray<const SrcT*,GUESS_AUDIO_CHANNELS> channelData;
405 :
406 0 : channelData = aChunk.ChannelData<SrcT>();
407 :
408 0 : if (channelData.Length() < aOutputChannels) {
409 : // Up-mix. Note that this might actually make channelData have more
410 : // than aOutputChannels temporarily.
411 0 : AudioChannelsUpMix(&channelData, aOutputChannels, SilentChannel::ZeroChannel<SrcT>());
412 : }
413 0 : if (channelData.Length() > aOutputChannels) {
414 : // Down-mix.
415 0 : DownmixAndInterleave(channelData, aChunk.mDuration,
416 : aChunk.mVolume, aOutputChannels, aOutputBuffer);
417 : } else {
418 0 : InterleaveAndConvertBuffer(channelData.Elements(),
419 0 : aChunk.mDuration, aChunk.mVolume,
420 : aOutputChannels,
421 : aOutputBuffer);
422 : }
423 0 : }
424 :
425 :
426 :
427 : } // namespace mozilla
428 :
429 : #endif /* MOZILLA_AUDIOSEGMENT_H_ */
|