Line data Source code
1 : /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/
2 : /* This Source Code Form is subject to the terms of the Mozilla Public
3 : * License, v. 2.0. If a copy of the MPL was not distributed with this file,
4 : * You can obtain one at http://mozilla.org/MPL/2.0/. */
5 : #include "OpusTrackEncoder.h"
6 : #include "nsString.h"
7 : #include "GeckoProfiler.h"
8 : #include "mozilla/CheckedInt.h"
9 :
10 : #include <opus/opus.h>
11 :
12 : #undef LOG
13 : #ifdef MOZ_WIDGET_GONK
14 : #include <android/log.h>
15 : #define LOG(args...) __android_log_print(ANDROID_LOG_INFO, "MediaEncoder", ## args);
16 : #else
17 : #define LOG(args, ...)
18 : #endif
19 :
20 : namespace mozilla {
21 :
22 : // The Opus format supports up to 8 channels, and supports multitrack audio up
23 : // to 255 channels, but the current implementation supports only mono and
24 : // stereo, and downmixes any more than that.
25 : static const int MAX_SUPPORTED_AUDIO_CHANNELS = 8;
26 :
27 : // http://www.opus-codec.org/docs/html_api-1.0.2/group__opus__encoder.html
28 : // In section "opus_encoder_init", channels must be 1 or 2 of input signal.
29 : static const int MAX_CHANNELS = 2;
30 :
31 : // A maximum data bytes for Opus to encode.
32 : static const int MAX_DATA_BYTES = 4096;
33 :
34 : // http://tools.ietf.org/html/draft-ietf-codec-oggopus-00#section-4
35 : // Second paragraph, " The granule position of an audio data page is in units
36 : // of PCM audio samples at a fixed rate of 48 kHz."
37 : static const int kOpusSamplingRate = 48000;
38 :
39 : // The duration of an Opus frame, and it must be 2.5, 5, 10, 20, 40 or 60 ms.
40 : static const int kFrameDurationMs = 20;
41 :
42 : // The supported sampling rate of input signal (Hz),
43 : // must be one of the following. Will resampled to 48kHz otherwise.
44 : static const int kOpusSupportedInputSamplingRates[] =
45 : {8000, 12000, 16000, 24000, 48000};
46 :
47 : namespace {
48 :
49 : // An endian-neutral serialization of integers. Serializing T in little endian
50 : // format to aOutput, where T is a 16 bits or 32 bits integer.
51 : template<typename T>
52 : static void
53 0 : SerializeToBuffer(T aValue, nsTArray<uint8_t>* aOutput)
54 : {
55 0 : for (uint32_t i = 0; i < sizeof(T); i++) {
56 0 : aOutput->AppendElement((uint8_t)(0x000000ff & (aValue >> (i * 8))));
57 : }
58 0 : }
59 :
60 : static inline void
61 0 : SerializeToBuffer(const nsCString& aComment, nsTArray<uint8_t>* aOutput)
62 : {
63 : // Format of serializing a string to buffer is, the length of string (32 bits,
64 : // little endian), and the string.
65 0 : SerializeToBuffer((uint32_t)(aComment.Length()), aOutput);
66 0 : aOutput->AppendElements(aComment.get(), aComment.Length());
67 0 : }
68 :
69 :
70 : static void
71 0 : SerializeOpusIdHeader(uint8_t aChannelCount, uint16_t aPreskip,
72 : uint32_t aInputSampleRate, nsTArray<uint8_t>* aOutput)
73 : {
74 : // The magic signature, null terminator has to be stripped off from strings.
75 : static const uint8_t magic[] = "OpusHead";
76 0 : aOutput->AppendElements(magic, sizeof(magic) - 1);
77 :
78 : // The version must always be 1 (8 bits, unsigned).
79 0 : aOutput->AppendElement(1);
80 :
81 : // Number of output channels (8 bits, unsigned).
82 0 : aOutput->AppendElement(aChannelCount);
83 :
84 : // Number of samples (at 48 kHz) to discard from the decoder output when
85 : // starting playback (16 bits, unsigned, little endian).
86 0 : SerializeToBuffer(aPreskip, aOutput);
87 :
88 : // The sampling rate of input source (32 bits, unsigned, little endian).
89 0 : SerializeToBuffer(aInputSampleRate, aOutput);
90 :
91 : // Output gain, an encoder should set this field to zero (16 bits, signed,
92 : // little endian).
93 0 : SerializeToBuffer((int16_t)0, aOutput);
94 :
95 : // Channel mapping family. Family 0 allows only 1 or 2 channels (8 bits,
96 : // unsigned).
97 0 : aOutput->AppendElement(0);
98 0 : }
99 :
100 : static void
101 0 : SerializeOpusCommentHeader(const nsCString& aVendor,
102 : const nsTArray<nsCString>& aComments,
103 : nsTArray<uint8_t>* aOutput)
104 : {
105 : // The magic signature, null terminator has to be stripped off.
106 : static const uint8_t magic[] = "OpusTags";
107 0 : aOutput->AppendElements(magic, sizeof(magic) - 1);
108 :
109 : // The vendor; Should append in the following order:
110 : // vendor string length (32 bits, unsigned, little endian)
111 : // vendor string.
112 0 : SerializeToBuffer(aVendor, aOutput);
113 :
114 : // Add comments; Should append in the following order:
115 : // comment list length (32 bits, unsigned, little endian)
116 : // comment #0 string length (32 bits, unsigned, little endian)
117 : // comment #0 string
118 : // comment #1 string length (32 bits, unsigned, little endian)
119 : // comment #1 string ...
120 0 : SerializeToBuffer((uint32_t)aComments.Length(), aOutput);
121 0 : for (uint32_t i = 0; i < aComments.Length(); ++i) {
122 0 : SerializeToBuffer(aComments[i], aOutput);
123 : }
124 0 : }
125 :
126 : } // Anonymous namespace.
127 :
128 0 : OpusTrackEncoder::OpusTrackEncoder()
129 : : AudioTrackEncoder()
130 : , mEncoder(nullptr)
131 : , mLookahead(0)
132 : , mResampler(nullptr)
133 0 : , mOutputTimeStamp(0)
134 : {
135 0 : }
136 :
137 0 : OpusTrackEncoder::~OpusTrackEncoder()
138 : {
139 0 : if (mEncoder) {
140 0 : opus_encoder_destroy(mEncoder);
141 : }
142 0 : if (mResampler) {
143 0 : speex_resampler_destroy(mResampler);
144 0 : mResampler = nullptr;
145 : }
146 0 : }
147 :
148 : nsresult
149 0 : OpusTrackEncoder::Init(int aChannels, int aSamplingRate)
150 : {
151 : // This monitor is used to wake up other methods that are waiting for encoder
152 : // to be completely initialized.
153 0 : ReentrantMonitorAutoEnter mon(mReentrantMonitor);
154 :
155 0 : NS_ENSURE_TRUE((aChannels <= MAX_SUPPORTED_AUDIO_CHANNELS) && (aChannels > 0),
156 : NS_ERROR_FAILURE);
157 :
158 : // This version of encoder API only support 1 or 2 channels,
159 : // So set the mChannels less or equal 2 and
160 : // let InterleaveTrackData downmix pcm data.
161 0 : mChannels = aChannels > MAX_CHANNELS ? MAX_CHANNELS : aChannels;
162 :
163 : // Reject non-audio sample rates.
164 0 : NS_ENSURE_TRUE(aSamplingRate >= 8000, NS_ERROR_INVALID_ARG);
165 0 : NS_ENSURE_TRUE(aSamplingRate <= 192000, NS_ERROR_INVALID_ARG);
166 :
167 : // According to www.opus-codec.org, creating an opus encoder requires the
168 : // sampling rate of source signal be one of 8000, 12000, 16000, 24000, or
169 : // 48000. If this constraint is not satisfied, we resample the input to 48kHz.
170 0 : nsTArray<int> supportedSamplingRates;
171 0 : supportedSamplingRates.AppendElements(kOpusSupportedInputSamplingRates,
172 0 : ArrayLength(kOpusSupportedInputSamplingRates));
173 0 : if (!supportedSamplingRates.Contains(aSamplingRate)) {
174 : int error;
175 0 : mResampler = speex_resampler_init(mChannels,
176 : aSamplingRate,
177 : kOpusSamplingRate,
178 : SPEEX_RESAMPLER_QUALITY_DEFAULT,
179 : &error);
180 :
181 0 : if (error != RESAMPLER_ERR_SUCCESS) {
182 0 : return NS_ERROR_FAILURE;
183 : }
184 : }
185 0 : mSamplingRate = aSamplingRate;
186 0 : NS_ENSURE_TRUE(mSamplingRate > 0, NS_ERROR_FAILURE);
187 :
188 0 : int error = 0;
189 0 : mEncoder = opus_encoder_create(GetOutputSampleRate(), mChannels,
190 : OPUS_APPLICATION_AUDIO, &error);
191 :
192 :
193 0 : mInitialized = (error == OPUS_OK);
194 :
195 0 : if (mAudioBitrate) {
196 0 : opus_encoder_ctl(mEncoder, OPUS_SET_BITRATE(static_cast<int>(mAudioBitrate)));
197 : }
198 :
199 0 : mReentrantMonitor.NotifyAll();
200 :
201 0 : return error == OPUS_OK ? NS_OK : NS_ERROR_FAILURE;
202 : }
203 :
204 : int
205 0 : OpusTrackEncoder::GetOutputSampleRate()
206 : {
207 0 : return mResampler ? kOpusSamplingRate : mSamplingRate;
208 : }
209 :
210 : int
211 0 : OpusTrackEncoder::GetPacketDuration()
212 : {
213 0 : return GetOutputSampleRate() * kFrameDurationMs / 1000;
214 : }
215 :
216 : already_AddRefed<TrackMetadataBase>
217 0 : OpusTrackEncoder::GetMetadata()
218 : {
219 0 : AUTO_PROFILER_LABEL("OpusTrackEncoder::GetMetadata", OTHER);
220 : {
221 : // Wait if mEncoder is not initialized.
222 0 : ReentrantMonitorAutoEnter mon(mReentrantMonitor);
223 0 : while (!mCanceled && !mInitialized) {
224 0 : mReentrantMonitor.Wait();
225 : }
226 : }
227 :
228 0 : if (mCanceled || mEncodingComplete) {
229 0 : return nullptr;
230 : }
231 :
232 0 : RefPtr<OpusMetadata> meta = new OpusMetadata();
233 0 : meta->mChannels = mChannels;
234 0 : meta->mSamplingFrequency = mSamplingRate;
235 :
236 0 : mLookahead = 0;
237 0 : int error = opus_encoder_ctl(mEncoder, OPUS_GET_LOOKAHEAD(&mLookahead));
238 0 : if (error != OPUS_OK) {
239 0 : mLookahead = 0;
240 : }
241 :
242 : // The ogg time stamping and pre-skip is always timed at 48000.
243 0 : SerializeOpusIdHeader(mChannels, mLookahead * (kOpusSamplingRate /
244 0 : GetOutputSampleRate()), mSamplingRate,
245 0 : &meta->mIdHeader);
246 :
247 0 : nsCString vendor;
248 0 : vendor.AppendASCII(opus_get_version_string());
249 :
250 0 : nsTArray<nsCString> comments;
251 0 : comments.AppendElement(NS_LITERAL_CSTRING("ENCODER=Mozilla" MOZ_APP_UA_VERSION));
252 :
253 0 : SerializeOpusCommentHeader(vendor, comments,
254 0 : &meta->mCommentHeader);
255 :
256 0 : return meta.forget();
257 : }
258 :
259 : nsresult
260 0 : OpusTrackEncoder::GetEncodedTrack(EncodedFrameContainer& aData)
261 : {
262 0 : AUTO_PROFILER_LABEL("OpusTrackEncoder::GetEncodedTrack", OTHER);
263 : {
264 0 : ReentrantMonitorAutoEnter mon(mReentrantMonitor);
265 : // Wait until initialized or cancelled.
266 0 : while (!mCanceled && !mInitialized) {
267 0 : mReentrantMonitor.Wait();
268 : }
269 0 : if (mCanceled || mEncodingComplete) {
270 0 : return NS_ERROR_FAILURE;
271 : }
272 : }
273 :
274 : // calculation below depends on the truth that mInitialized is true.
275 0 : MOZ_ASSERT(mInitialized);
276 :
277 0 : bool wait = true;
278 0 : int result = 0;
279 : // Only wait once, then loop until we run out of packets of input data
280 0 : while (result >= 0 && !mEncodingComplete) {
281 : // re-sampled frames left last time which didn't fit into an Opus packet duration.
282 0 : const int framesLeft = mResampledLeftover.Length() / mChannels;
283 : // When framesLeft is 0, (GetPacketDuration() - framesLeft) is a multiple
284 : // of kOpusSamplingRate. There is not precision loss in the integer division
285 : // in computing framesToFetch. If frameLeft > 0, we need to add 1 to
286 : // framesToFetch to ensure there will be at least n frames after re-sampling.
287 0 : const int frameRoundUp = framesLeft ? 1 : 0;
288 :
289 0 : MOZ_ASSERT(GetPacketDuration() >= framesLeft);
290 : // Try to fetch m frames such that there will be n frames
291 : // where (n + frameLeft) >= GetPacketDuration() after re-sampling.
292 0 : const int framesToFetch = !mResampler ? GetPacketDuration()
293 0 : : (GetPacketDuration() - framesLeft) * mSamplingRate / kOpusSamplingRate
294 0 : + frameRoundUp;
295 : {
296 : // Move all the samples from mRawSegment to mSourceSegment. We only hold
297 : // the monitor in this block.
298 0 : ReentrantMonitorAutoEnter mon(mReentrantMonitor);
299 :
300 : // Wait until enough raw data, end of stream or cancelled.
301 0 : while (!mCanceled && mRawSegment.GetDuration() +
302 0 : mSourceSegment.GetDuration() < framesToFetch &&
303 0 : !mEndOfStream) {
304 0 : if (wait) {
305 0 : mReentrantMonitor.Wait();
306 0 : wait = false;
307 : } else {
308 0 : goto done; // nested while's...
309 : }
310 : }
311 :
312 0 : if (mCanceled) {
313 0 : return NS_ERROR_FAILURE;
314 : }
315 :
316 0 : mSourceSegment.AppendFrom(&mRawSegment);
317 :
318 : // Pad |mLookahead| samples to the end of source stream to prevent lost of
319 : // original data, the pcm duration will be calculated at rate 48K later.
320 0 : if (mEndOfStream && !mEosSetInEncoder) {
321 0 : mEosSetInEncoder = true;
322 0 : mSourceSegment.AppendNullData(mLookahead);
323 : }
324 : }
325 :
326 : // Start encoding data.
327 0 : AutoTArray<AudioDataValue, 9600> pcm;
328 0 : pcm.SetLength(GetPacketDuration() * mChannels);
329 0 : AudioSegment::ChunkIterator iter(mSourceSegment);
330 0 : int frameCopied = 0;
331 :
332 0 : while (!iter.IsEnded() && frameCopied < framesToFetch) {
333 0 : AudioChunk chunk = *iter;
334 :
335 : // Chunk to the required frame size.
336 0 : StreamTime frameToCopy = chunk.GetDuration();
337 0 : if (frameToCopy > framesToFetch - frameCopied) {
338 0 : frameToCopy = framesToFetch - frameCopied;
339 : }
340 : // Possible greatest value of framesToFetch = 3844: see
341 : // https://bugzilla.mozilla.org/show_bug.cgi?id=1349421#c8. frameToCopy
342 : // should not be able to exceed this value.
343 0 : MOZ_ASSERT(frameToCopy <= 3844, "frameToCopy exceeded expected range");
344 :
345 0 : if (!chunk.IsNull()) {
346 : // Append the interleaved data to the end of pcm buffer.
347 0 : AudioTrackEncoder::InterleaveTrackData(chunk, frameToCopy, mChannels,
348 0 : pcm.Elements() + frameCopied * mChannels);
349 : } else {
350 0 : CheckedInt<int> memsetLength = CheckedInt<int>(frameToCopy) *
351 0 : mChannels *
352 0 : sizeof(AudioDataValue);
353 0 : if (!memsetLength.isValid()) {
354 : // This should never happen, but we use a defensive check because
355 : // we really don't want a bad memset
356 0 : MOZ_ASSERT_UNREACHABLE("memsetLength invalid!");
357 : return NS_ERROR_FAILURE;
358 : }
359 0 : memset(pcm.Elements() + frameCopied * mChannels, 0,
360 0 : memsetLength.value());
361 : }
362 :
363 0 : frameCopied += frameToCopy;
364 0 : iter.Next();
365 : }
366 :
367 : // Possible greatest value of framesToFetch = 3844: see
368 : // https://bugzilla.mozilla.org/show_bug.cgi?id=1349421#c8. frameCopied
369 : // should not be able to exceed this value.
370 0 : MOZ_ASSERT(frameCopied <= 3844, "frameCopied exceeded expected range");
371 :
372 0 : RefPtr<EncodedFrame> audiodata = new EncodedFrame();
373 0 : audiodata->SetFrameType(EncodedFrame::OPUS_AUDIO_FRAME);
374 0 : int framesInPCM = frameCopied;
375 0 : if (mResampler) {
376 0 : AutoTArray<AudioDataValue, 9600> resamplingDest;
377 : // We want to consume all the input data, so we slightly oversize the
378 : // resampled data buffer so we can fit the output data in. We cannot really
379 : // predict the output frame count at each call.
380 0 : uint32_t outframes = frameCopied * kOpusSamplingRate / mSamplingRate + 1;
381 0 : uint32_t inframes = frameCopied;
382 :
383 0 : resamplingDest.SetLength(outframes * mChannels);
384 :
385 : #if MOZ_SAMPLE_TYPE_S16
386 : short* in = reinterpret_cast<short*>(pcm.Elements());
387 : short* out = reinterpret_cast<short*>(resamplingDest.Elements());
388 : speex_resampler_process_interleaved_int(mResampler, in, &inframes,
389 : out, &outframes);
390 : #else
391 0 : float* in = reinterpret_cast<float*>(pcm.Elements());
392 0 : float* out = reinterpret_cast<float*>(resamplingDest.Elements());
393 0 : speex_resampler_process_interleaved_float(mResampler, in, &inframes,
394 0 : out, &outframes);
395 : #endif
396 :
397 0 : MOZ_ASSERT(pcm.Length() >= mResampledLeftover.Length());
398 0 : PodCopy(pcm.Elements(), mResampledLeftover.Elements(),
399 0 : mResampledLeftover.Length());
400 :
401 : uint32_t outframesToCopy = std::min(outframes,
402 0 : static_cast<uint32_t>(GetPacketDuration() - framesLeft));
403 :
404 0 : MOZ_ASSERT(pcm.Length() - mResampledLeftover.Length() >=
405 : outframesToCopy * mChannels);
406 0 : PodCopy(pcm.Elements() + mResampledLeftover.Length(),
407 0 : resamplingDest.Elements(), outframesToCopy * mChannels);
408 0 : int frameLeftover = outframes - outframesToCopy;
409 0 : mResampledLeftover.SetLength(frameLeftover * mChannels);
410 0 : PodCopy(mResampledLeftover.Elements(),
411 0 : resamplingDest.Elements() + outframesToCopy * mChannels,
412 0 : mResampledLeftover.Length());
413 : // This is always at 48000Hz.
414 0 : framesInPCM = framesLeft + outframesToCopy;
415 0 : audiodata->SetDuration(framesInPCM);
416 : } else {
417 : // The ogg time stamping and pre-skip is always timed at 48000.
418 0 : audiodata->SetDuration(frameCopied * (kOpusSamplingRate / mSamplingRate));
419 : }
420 :
421 : // Remove the raw data which has been pulled to pcm buffer.
422 : // The value of frameCopied should equal to (or smaller than, if eos)
423 : // GetPacketDuration().
424 0 : mSourceSegment.RemoveLeading(frameCopied);
425 :
426 : // Has reached the end of input stream and all queued data has pulled for
427 : // encoding.
428 0 : if (mSourceSegment.GetDuration() == 0 && mEosSetInEncoder) {
429 0 : mEncodingComplete = true;
430 : LOG("[Opus] Done encoding.");
431 : }
432 :
433 0 : MOZ_ASSERT(mEosSetInEncoder || framesInPCM == GetPacketDuration());
434 :
435 : // Append null data to pcm buffer if the leftover data is not enough for
436 : // opus encoder.
437 0 : if (framesInPCM < GetPacketDuration() && mEosSetInEncoder) {
438 0 : PodZero(pcm.Elements() + framesInPCM * mChannels,
439 0 : (GetPacketDuration() - framesInPCM) * mChannels);
440 : }
441 0 : nsTArray<uint8_t> frameData;
442 : // Encode the data with Opus Encoder.
443 0 : frameData.SetLength(MAX_DATA_BYTES);
444 : // result is returned as opus error code if it is negative.
445 0 : result = 0;
446 : #ifdef MOZ_SAMPLE_TYPE_S16
447 : const opus_int16* pcmBuf = static_cast<opus_int16*>(pcm.Elements());
448 : result = opus_encode(mEncoder, pcmBuf, GetPacketDuration(),
449 : frameData.Elements(), MAX_DATA_BYTES);
450 : #else
451 0 : const float* pcmBuf = static_cast<float*>(pcm.Elements());
452 0 : result = opus_encode_float(mEncoder, pcmBuf, GetPacketDuration(),
453 0 : frameData.Elements(), MAX_DATA_BYTES);
454 : #endif
455 0 : frameData.SetLength(result >= 0 ? result : 0);
456 :
457 : if (result < 0) {
458 : LOG("[Opus] Fail to encode data! Result: %s.", opus_strerror(result));
459 : }
460 0 : if (mEncodingComplete) {
461 0 : if (mResampler) {
462 0 : speex_resampler_destroy(mResampler);
463 0 : mResampler = nullptr;
464 : }
465 0 : mResampledLeftover.SetLength(0);
466 : }
467 :
468 0 : audiodata->SwapInFrameData(frameData);
469 : // timestamp should be the time of the first sample
470 0 : audiodata->SetTimeStamp(mOutputTimeStamp);
471 0 : mOutputTimeStamp += FramesToUsecs(GetPacketDuration(), kOpusSamplingRate).value();
472 : LOG("[Opus] mOutputTimeStamp %lld.",mOutputTimeStamp);
473 0 : aData.AppendEncodedFrame(audiodata);
474 : }
475 : done:
476 0 : return result >= 0 ? NS_OK : NS_ERROR_FAILURE;
477 : }
478 :
479 : } // namespace mozilla
|