Line data Source code
1 : /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 : /* vim:set ts=2 sw=2 sts=2 et cindent: */
3 : /* This Source Code Form is subject to the terms of the Mozilla Public
4 : * License, v. 2.0. If a copy of the MPL was not distributed with this
5 : * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 :
7 : #include "nsPrintfCString.h"
8 : #include "MediaQueue.h"
9 : #include "AudioSink.h"
10 : #include "VideoUtils.h"
11 : #include "AudioConverter.h"
12 :
13 : #include "mozilla/CheckedInt.h"
14 : #include "mozilla/DebugOnly.h"
15 : #include "mozilla/IntegerPrintfMacros.h"
16 : #include "MediaPrefs.h"
17 :
18 : namespace mozilla {
19 :
20 : extern LazyLogModule gMediaDecoderLog;
21 : #define SINK_LOG(msg, ...) \
22 : MOZ_LOG(gMediaDecoderLog, LogLevel::Debug, ("AudioSink=%p " msg, this, ##__VA_ARGS__))
23 : #define SINK_LOG_V(msg, ...) \
24 : MOZ_LOG(gMediaDecoderLog, LogLevel::Verbose, ("AudioSink=%p " msg, this, ##__VA_ARGS__))
25 :
26 : namespace media {
27 :
28 : // The amount of audio frames that is used to fuzz rounding errors.
29 : static const int64_t AUDIO_FUZZ_FRAMES = 1;
30 :
31 : // Amount of audio frames we will be processing ahead of use
32 : static const int32_t LOW_AUDIO_USECS = 300000;
33 :
34 0 : AudioSink::AudioSink(AbstractThread* aThread,
35 : MediaQueue<AudioData>& aAudioQueue,
36 : const TimeUnit& aStartTime,
37 : const AudioInfo& aInfo,
38 0 : dom::AudioChannel aChannel)
39 : : mStartTime(aStartTime)
40 : , mInfo(aInfo)
41 : , mChannel(aChannel)
42 : , mPlaying(true)
43 : , mMonitor("AudioSink")
44 : , mWritten(0)
45 : , mErrored(false)
46 : , mPlaybackComplete(false)
47 : , mOwnerThread(aThread)
48 : , mProcessedQueueLength(0)
49 : , mFramesParsed(0)
50 : , mIsAudioDataAudible(false)
51 0 : , mAudioQueue(aAudioQueue)
52 : {
53 0 : bool resampling = MediaPrefs::AudioSinkResampling();
54 :
55 0 : if (resampling) {
56 0 : mOutputRate = MediaPrefs::AudioSinkResampleRate();
57 0 : } else if (mInfo.mRate == 44100 || mInfo.mRate == 48000) {
58 : // The original rate is of good quality and we want to minimize unecessary
59 : // resampling. The common scenario being that the sampling rate is one or
60 : // the other, this allows to minimize audio quality regression and hoping
61 : // content provider want change from those rates mid-stream.
62 0 : mOutputRate = mInfo.mRate;
63 : } else {
64 : // We will resample all data to match cubeb's preferred sampling rate.
65 0 : mOutputRate = AudioStream::GetPreferredRate();
66 : }
67 0 : MOZ_DIAGNOSTIC_ASSERT(mOutputRate, "output rate can't be 0.");
68 :
69 0 : bool monoAudioEnabled = MediaPrefs::MonoAudio();
70 :
71 0 : mOutputChannels =
72 : monoAudioEnabled
73 0 : ? 1
74 0 : : (MediaPrefs::AudioSinkForceStereo() ? 2 : mInfo.mChannels);
75 0 : }
76 :
77 0 : AudioSink::~AudioSink()
78 : {
79 0 : }
80 :
81 : RefPtr<GenericPromise>
82 0 : AudioSink::Init(const PlaybackParams& aParams)
83 : {
84 0 : MOZ_ASSERT(mOwnerThread->IsCurrentThreadIn());
85 :
86 0 : mAudioQueueListener = mAudioQueue.PushEvent().Connect(
87 0 : mOwnerThread, this, &AudioSink::OnAudioPushed);
88 0 : mAudioQueueFinishListener = mAudioQueue.FinishEvent().Connect(
89 0 : mOwnerThread, this, &AudioSink::NotifyAudioNeeded);
90 0 : mProcessedQueueListener = mProcessedQueue.PopEvent().Connect(
91 0 : mOwnerThread, this, &AudioSink::OnAudioPopped);
92 :
93 : // To ensure at least one audio packet will be popped from AudioQueue and
94 : // ready to be played.
95 0 : NotifyAudioNeeded();
96 0 : RefPtr<GenericPromise> p = mEndPromise.Ensure(__func__);
97 0 : nsresult rv = InitializeAudioStream(aParams);
98 0 : if (NS_FAILED(rv)) {
99 0 : mEndPromise.Reject(rv, __func__);
100 : }
101 0 : return p;
102 : }
103 :
104 : TimeUnit
105 0 : AudioSink::GetPosition()
106 : {
107 : int64_t tmp;
108 0 : if (mAudioStream &&
109 0 : (tmp = mAudioStream->GetPosition()) >= 0) {
110 0 : TimeUnit pos = TimeUnit::FromMicroseconds(tmp);
111 0 : NS_ASSERTION(pos >= mLastGoodPosition,
112 : "AudioStream position shouldn't go backward");
113 : // Update the last good position when we got a good one.
114 0 : if (pos >= mLastGoodPosition) {
115 0 : mLastGoodPosition = pos;
116 : }
117 : }
118 :
119 0 : return mStartTime + mLastGoodPosition;
120 : }
121 :
122 : bool
123 0 : AudioSink::HasUnplayedFrames()
124 : {
125 : // Experimentation suggests that GetPositionInFrames() is zero-indexed,
126 : // so we need to add 1 here before comparing it to mWritten.
127 : int64_t total;
128 : {
129 0 : MonitorAutoLock mon(mMonitor);
130 0 : total = mWritten + (mCursor.get() ? mCursor->Available() : 0);
131 : }
132 0 : return mProcessedQueue.GetSize() ||
133 0 : (mAudioStream && mAudioStream->GetPositionInFrames() + 1 < total);
134 : }
135 :
136 : void
137 0 : AudioSink::Shutdown()
138 : {
139 0 : MOZ_ASSERT(mOwnerThread->IsCurrentThreadIn());
140 :
141 0 : mAudioQueueListener.Disconnect();
142 0 : mAudioQueueFinishListener.Disconnect();
143 0 : mProcessedQueueListener.Disconnect();
144 :
145 0 : if (mAudioStream) {
146 0 : mAudioStream->Shutdown();
147 0 : mAudioStream = nullptr;
148 : }
149 0 : mProcessedQueue.Reset();
150 0 : mProcessedQueue.Finish();
151 0 : mEndPromise.ResolveIfExists(true, __func__);
152 0 : }
153 :
154 : void
155 0 : AudioSink::SetVolume(double aVolume)
156 : {
157 0 : if (mAudioStream) {
158 0 : mAudioStream->SetVolume(aVolume);
159 : }
160 0 : }
161 :
162 : void
163 0 : AudioSink::SetPlaybackRate(double aPlaybackRate)
164 : {
165 0 : MOZ_ASSERT(aPlaybackRate != 0, "Don't set the playbackRate to 0 on AudioStream");
166 0 : if (mAudioStream) {
167 0 : mAudioStream->SetPlaybackRate(aPlaybackRate);
168 : }
169 0 : }
170 :
171 : void
172 0 : AudioSink::SetPreservesPitch(bool aPreservesPitch)
173 : {
174 0 : if (mAudioStream) {
175 0 : mAudioStream->SetPreservesPitch(aPreservesPitch);
176 : }
177 0 : }
178 :
179 : void
180 0 : AudioSink::SetPlaying(bool aPlaying)
181 : {
182 0 : if (!mAudioStream || mPlaying == aPlaying || mPlaybackComplete) {
183 0 : return;
184 : }
185 : // pause/resume AudioStream as necessary.
186 0 : if (!aPlaying) {
187 0 : mAudioStream->Pause();
188 0 : } else if (aPlaying) {
189 0 : mAudioStream->Resume();
190 : }
191 0 : mPlaying = aPlaying;
192 : }
193 :
194 : nsresult
195 0 : AudioSink::InitializeAudioStream(const PlaybackParams& aParams)
196 : {
197 0 : mAudioStream = new AudioStream(*this);
198 : // When AudioQueue is empty, there is no way to know the channel layout of
199 : // the coming audio data, so we use the predefined channel map instead.
200 : uint32_t channelMap = mConverter
201 0 : ? mConverter->OutputConfig().Layout().Map()
202 0 : : AudioStream::GetPreferredChannelMap(mOutputChannels);
203 : // The layout map used here is already processed by mConverter with
204 : // mOutputChannels into SMPTE format, so there is no need to worry if
205 : // MediaPrefs::MonoAudio() or MediaPrefs::AudioSinkForceStereo() is applied.
206 0 : nsresult rv = mAudioStream->Init(mOutputChannels, channelMap, mOutputRate, mChannel);
207 0 : if (NS_FAILED(rv)) {
208 0 : mAudioStream->Shutdown();
209 0 : mAudioStream = nullptr;
210 0 : return rv;
211 : }
212 :
213 : // Set playback params before calling Start() so they can take effect
214 : // as soon as the 1st DataCallback of the AudioStream fires.
215 0 : mAudioStream->SetVolume(aParams.mVolume);
216 0 : mAudioStream->SetPlaybackRate(aParams.mPlaybackRate);
217 0 : mAudioStream->SetPreservesPitch(aParams.mPreservesPitch);
218 0 : mAudioStream->Start();
219 :
220 0 : return NS_OK;
221 : }
222 :
223 : TimeUnit
224 0 : AudioSink::GetEndTime() const
225 : {
226 : int64_t written;
227 : {
228 0 : MonitorAutoLock mon(mMonitor);
229 0 : written = mWritten;
230 : }
231 0 : TimeUnit played = FramesToTimeUnit(written, mOutputRate) + mStartTime;
232 0 : if (!played.IsValid()) {
233 0 : NS_WARNING("Int overflow calculating audio end time");
234 0 : return TimeUnit::Zero();
235 : }
236 : // As we may be resampling, rounding errors may occur. Ensure we never get
237 : // past the original end time.
238 0 : return std::min(mLastEndTime, played);
239 : }
240 :
241 : UniquePtr<AudioStream::Chunk>
242 0 : AudioSink::PopFrames(uint32_t aFrames)
243 : {
244 0 : class Chunk : public AudioStream::Chunk {
245 : public:
246 0 : Chunk(AudioData* aBuffer, uint32_t aFrames, AudioDataValue* aData)
247 0 : : mBuffer(aBuffer), mFrames(aFrames), mData(aData) {}
248 0 : Chunk() : mFrames(0), mData(nullptr) {}
249 0 : const AudioDataValue* Data() const { return mData; }
250 0 : uint32_t Frames() const { return mFrames; }
251 0 : uint32_t Channels() const { return mBuffer ? mBuffer->mChannels: 0; }
252 0 : uint32_t Rate() const { return mBuffer ? mBuffer->mRate : 0; }
253 0 : AudioDataValue* GetWritable() const { return mData; }
254 : private:
255 : const RefPtr<AudioData> mBuffer;
256 : const uint32_t mFrames;
257 : AudioDataValue* const mData;
258 : };
259 :
260 0 : bool needPopping = false;
261 0 : if (!mCurrentData) {
262 : // No data in the queue. Return an empty chunk.
263 0 : if (!mProcessedQueue.GetSize()) {
264 0 : return MakeUnique<Chunk>();
265 : }
266 :
267 : // We need to update our values prior popping the processed queue in
268 : // order to prevent the pop event to fire too early (prior
269 : // mProcessedQueueLength being updated) or prevent HasUnplayedFrames
270 : // to incorrectly return true during the time interval betweeen the
271 : // when mProcessedQueue is read and mWritten is updated.
272 0 : needPopping = true;
273 0 : mCurrentData = mProcessedQueue.PeekFront();
274 : {
275 0 : MonitorAutoLock mon(mMonitor);
276 0 : mCursor = MakeUnique<AudioBufferCursor>(mCurrentData->mAudioData.get(),
277 0 : mCurrentData->mChannels,
278 0 : mCurrentData->mFrames);
279 : }
280 0 : MOZ_ASSERT(mCurrentData->mFrames > 0);
281 0 : mProcessedQueueLength -=
282 0 : FramesToUsecs(mCurrentData->mFrames, mOutputRate).value();
283 : }
284 :
285 0 : auto framesToPop = std::min(aFrames, mCursor->Available());
286 :
287 0 : SINK_LOG_V("playing audio at time=%" PRId64 " offset=%u length=%u",
288 : mCurrentData->mTime.ToMicroseconds(),
289 : mCurrentData->mFrames - mCursor->Available(), framesToPop);
290 :
291 : UniquePtr<AudioStream::Chunk> chunk =
292 0 : MakeUnique<Chunk>(mCurrentData, framesToPop, mCursor->Ptr());
293 :
294 : {
295 0 : MonitorAutoLock mon(mMonitor);
296 0 : mWritten += framesToPop;
297 0 : mCursor->Advance(framesToPop);
298 : }
299 :
300 : // All frames are popped. Reset mCurrentData so we can pop new elements from
301 : // the audio queue in next calls to PopFrames().
302 0 : if (!mCursor->Available()) {
303 0 : mCurrentData = nullptr;
304 : }
305 :
306 0 : if (needPopping) {
307 : // We can now safely pop the audio packet from the processed queue.
308 : // This will fire the popped event, triggering a call to NotifyAudioNeeded.
309 0 : RefPtr<AudioData> releaseMe = mProcessedQueue.PopFront();
310 0 : CheckIsAudible(releaseMe);
311 : }
312 :
313 0 : return chunk;
314 : }
315 :
316 : bool
317 0 : AudioSink::Ended() const
318 : {
319 : // Return true when error encountered so AudioStream can start draining.
320 0 : return mProcessedQueue.IsFinished() || mErrored;
321 : }
322 :
323 : void
324 0 : AudioSink::Drained()
325 : {
326 0 : SINK_LOG("Drained");
327 0 : mPlaybackComplete = true;
328 0 : mEndPromise.ResolveIfExists(true, __func__);
329 0 : }
330 :
331 : void
332 0 : AudioSink::CheckIsAudible(const AudioData* aData)
333 : {
334 0 : MOZ_ASSERT(aData);
335 :
336 0 : bool isAudible = aData->IsAudible();
337 0 : if (isAudible != mIsAudioDataAudible) {
338 0 : mIsAudioDataAudible = isAudible;
339 0 : mAudibleEvent.Notify(mIsAudioDataAudible);
340 : }
341 0 : }
342 :
343 : void
344 0 : AudioSink::OnAudioPopped(const RefPtr<AudioData>& aSample)
345 : {
346 0 : SINK_LOG_V("AudioStream has used an audio packet.");
347 0 : NotifyAudioNeeded();
348 0 : }
349 :
350 : void
351 0 : AudioSink::OnAudioPushed(const RefPtr<AudioData>& aSample)
352 : {
353 0 : SINK_LOG_V("One new audio packet available.");
354 0 : NotifyAudioNeeded();
355 0 : }
356 :
357 : void
358 0 : AudioSink::NotifyAudioNeeded()
359 : {
360 0 : MOZ_ASSERT(mOwnerThread->IsCurrentThreadIn(),
361 : "Not called from the owner's thread");
362 :
363 : // Always ensure we have two processed frames pending to allow for processing
364 : // latency.
365 0 : while (mAudioQueue.GetSize() && (mAudioQueue.IsFinished() ||
366 0 : mProcessedQueueLength < LOW_AUDIO_USECS ||
367 0 : mProcessedQueue.GetSize() < 2)) {
368 0 : RefPtr<AudioData> data = mAudioQueue.PopFront();
369 :
370 : // Ignore the element with 0 frames and try next.
371 0 : if (!data->mFrames) {
372 0 : continue;
373 : }
374 :
375 0 : if (!mConverter ||
376 0 : (data->mRate != mConverter->InputConfig().Rate() ||
377 0 : data->mChannels != mConverter->InputConfig().Channels())) {
378 0 : SINK_LOG_V("Audio format changed from %u@%uHz to %u@%uHz",
379 : mConverter? mConverter->InputConfig().Channels() : 0,
380 : mConverter ? mConverter->InputConfig().Rate() : 0,
381 : data->mChannels, data->mRate);
382 :
383 0 : DrainConverter();
384 :
385 : // mFramesParsed indicates the current playtime in frames at the current
386 : // input sampling rate. Recalculate it per the new sampling rate.
387 0 : if (mFramesParsed) {
388 : // We minimize overflow.
389 0 : uint32_t oldRate = mConverter->InputConfig().Rate();
390 0 : uint32_t newRate = data->mRate;
391 0 : CheckedInt64 result = SaferMultDiv(mFramesParsed, newRate, oldRate);
392 0 : if (!result.isValid()) {
393 0 : NS_WARNING("Int overflow in AudioSink");
394 0 : mErrored = true;
395 0 : return;
396 : }
397 0 : mFramesParsed = result.value();
398 : }
399 :
400 : mConverter =
401 0 : MakeUnique<AudioConverter>(
402 0 : AudioConfig(data->mChannels, data->mRate),
403 0 : AudioConfig(mOutputChannels, mOutputRate));
404 : }
405 :
406 : // See if there's a gap in the audio. If there is, push silence into the
407 : // audio hardware, so we can play across the gap.
408 : // Calculate the timestamp of the next chunk of audio in numbers of
409 : // samples.
410 : CheckedInt64 sampleTime =
411 0 : TimeUnitToFrames(data->mTime - mStartTime, data->mRate);
412 : // Calculate the number of frames that have been pushed onto the audio hardware.
413 0 : CheckedInt64 missingFrames = sampleTime - mFramesParsed;
414 :
415 0 : if (!missingFrames.isValid()) {
416 0 : NS_WARNING("Int overflow in AudioSink");
417 0 : mErrored = true;
418 0 : return;
419 : }
420 :
421 0 : if (missingFrames.value() > AUDIO_FUZZ_FRAMES) {
422 : // The next audio packet begins some time after the end of the last packet
423 : // we pushed to the audio hardware. We must push silence into the audio
424 : // hardware so that the next audio packet begins playback at the correct
425 : // time.
426 0 : missingFrames = std::min<int64_t>(INT32_MAX, missingFrames.value());
427 0 : mFramesParsed += missingFrames.value();
428 :
429 : // We need to calculate how many frames are missing at the output rate.
430 : missingFrames =
431 0 : SaferMultDiv(missingFrames.value(), mOutputRate, data->mRate);
432 0 : if (!missingFrames.isValid()) {
433 0 : NS_WARNING("Int overflow in AudioSink");
434 0 : mErrored = true;
435 0 : return;
436 : }
437 :
438 : // We need to insert silence, first use drained frames if any.
439 0 : missingFrames -= DrainConverter(missingFrames.value());
440 : // Insert silence if still needed.
441 0 : if (missingFrames.value()) {
442 0 : AlignedAudioBuffer silenceData(missingFrames.value() * mOutputChannels);
443 0 : if (!silenceData) {
444 0 : NS_WARNING("OOM in AudioSink");
445 0 : mErrored = true;
446 0 : return;
447 : }
448 0 : RefPtr<AudioData> silence = CreateAudioFromBuffer(Move(silenceData), data);
449 0 : PushProcessedAudio(silence);
450 : }
451 : }
452 :
453 0 : mLastEndTime = data->GetEndTime();
454 0 : mFramesParsed += data->mFrames;
455 :
456 0 : if (mConverter->InputConfig() != mConverter->OutputConfig()) {
457 : // We must ensure that the size in the buffer contains exactly the number
458 : // of frames, in case one of the audio producer over allocated the buffer.
459 0 : AlignedAudioBuffer buffer(Move(data->mAudioData));
460 0 : buffer.SetLength(size_t(data->mFrames) * data->mChannels);
461 :
462 : AlignedAudioBuffer convertedData =
463 0 : mConverter->Process(AudioSampleBuffer(Move(buffer))).Forget();
464 0 : data = CreateAudioFromBuffer(Move(convertedData), data);
465 : }
466 0 : if (PushProcessedAudio(data)) {
467 0 : mLastProcessedPacket = Some(data);
468 : }
469 : }
470 :
471 0 : if (mAudioQueue.IsFinished()) {
472 : // We have reached the end of the data, drain the resampler.
473 0 : DrainConverter();
474 0 : mProcessedQueue.Finish();
475 : }
476 : }
477 :
478 : uint32_t
479 0 : AudioSink::PushProcessedAudio(AudioData* aData)
480 : {
481 0 : if (!aData || !aData->mFrames) {
482 0 : return 0;
483 : }
484 0 : mProcessedQueue.Push(aData);
485 0 : mProcessedQueueLength += FramesToUsecs(aData->mFrames, mOutputRate).value();
486 0 : return aData->mFrames;
487 : }
488 :
489 : already_AddRefed<AudioData>
490 0 : AudioSink::CreateAudioFromBuffer(AlignedAudioBuffer&& aBuffer,
491 : AudioData* aReference)
492 : {
493 0 : uint32_t frames = aBuffer.Length() / mOutputChannels;
494 0 : if (!frames) {
495 0 : return nullptr;
496 : }
497 0 : auto duration = FramesToTimeUnit(frames, mOutputRate);
498 0 : if (!duration.IsValid()) {
499 0 : NS_WARNING("Int overflow in AudioSink");
500 0 : mErrored = true;
501 0 : return nullptr;
502 : }
503 : RefPtr<AudioData> data =
504 : new AudioData(aReference->mOffset,
505 : aReference->mTime,
506 : duration,
507 : frames,
508 : Move(aBuffer),
509 : mOutputChannels,
510 0 : mOutputRate);
511 0 : return data.forget();
512 : }
513 :
514 : uint32_t
515 0 : AudioSink::DrainConverter(uint32_t aMaxFrames)
516 : {
517 0 : MOZ_ASSERT(mOwnerThread->IsCurrentThreadIn());
518 :
519 0 : if (!mConverter || !mLastProcessedPacket || !aMaxFrames) {
520 : // nothing to drain.
521 0 : return 0;
522 : }
523 :
524 0 : RefPtr<AudioData> lastPacket = mLastProcessedPacket.ref();
525 0 : mLastProcessedPacket.reset();
526 :
527 : // To drain we simply provide an empty packet to the audio converter.
528 : AlignedAudioBuffer convertedData =
529 0 : mConverter->Process(AudioSampleBuffer(AlignedAudioBuffer())).Forget();
530 :
531 0 : uint32_t frames = convertedData.Length() / mOutputChannels;
532 0 : if (!convertedData.SetLength(std::min(frames, aMaxFrames) * mOutputChannels)) {
533 : // This can never happen as we were reducing the length of convertData.
534 0 : mErrored = true;
535 0 : return 0;
536 : }
537 :
538 : RefPtr<AudioData> data =
539 0 : CreateAudioFromBuffer(Move(convertedData), lastPacket);
540 0 : if (!data) {
541 0 : return 0;
542 : }
543 0 : mProcessedQueue.Push(data);
544 0 : return data->mFrames;
545 : }
546 :
547 : } // namespace media
548 : } // namespace mozilla
|