Line data Source code
1 : /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 : /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 : /* This Source Code Form is subject to the terms of the Mozilla Public
4 : * License, v. 2.0. If a copy of the MPL was not distributed with this
5 : * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 :
7 : #include "AudioConverter.h"
8 : #include <string.h>
9 : #include <speex/speex_resampler.h>
10 : #include <cmath>
11 :
12 : /*
13 : * Parts derived from MythTV AudioConvert Class
14 : * Created by Jean-Yves Avenard.
15 : *
16 : * Copyright (C) Bubblestuff Pty Ltd 2013
17 : * Copyright (C) foobum@gmail.com 2010
18 : */
19 :
20 : namespace mozilla {
21 :
22 0 : AudioConverter::AudioConverter(const AudioConfig& aIn, const AudioConfig& aOut)
23 : : mIn(aIn)
24 : , mOut(aOut)
25 0 : , mResampler(nullptr)
26 : {
27 0 : MOZ_DIAGNOSTIC_ASSERT(aIn.Format() == aOut.Format() &&
28 : aIn.Interleaved() == aOut.Interleaved(),
29 : "No format or rate conversion is supported at this stage");
30 0 : MOZ_DIAGNOSTIC_ASSERT(aOut.Channels() <= 2 ||
31 : aIn.Channels() == aOut.Channels(),
32 : "Only down/upmixing to mono or stereo is supported at this stage");
33 0 : MOZ_DIAGNOSTIC_ASSERT(aOut.Interleaved(), "planar audio format not supported");
34 0 : mIn.Layout().MappingTable(mOut.Layout(), mChannelOrderMap);
35 0 : if (aIn.Rate() != aOut.Rate()) {
36 0 : RecreateResampler();
37 : }
38 0 : }
39 :
40 0 : AudioConverter::~AudioConverter()
41 : {
42 0 : if (mResampler) {
43 0 : speex_resampler_destroy(mResampler);
44 0 : mResampler = nullptr;
45 : }
46 0 : }
47 :
48 : bool
49 0 : AudioConverter::CanWorkInPlace() const
50 : {
51 0 : bool needDownmix = mIn.Channels() > mOut.Channels();
52 0 : bool needUpmix = mIn.Channels() < mOut.Channels();
53 : bool canDownmixInPlace =
54 0 : mIn.Channels() * AudioConfig::SampleSize(mIn.Format()) >=
55 0 : mOut.Channels() * AudioConfig::SampleSize(mOut.Format());
56 0 : bool needResample = mIn.Rate() != mOut.Rate();
57 0 : bool canResampleInPlace = mIn.Rate() >= mOut.Rate();
58 : // We should be able to work in place if 1s of audio input takes less space
59 : // than 1s of audio output. However, as we downmix before resampling we can't
60 : // perform any upsampling in place (e.g. if incoming rate >= outgoing rate)
61 0 : return !needUpmix && (!needDownmix || canDownmixInPlace) &&
62 0 : (!needResample || canResampleInPlace);
63 : }
64 :
65 : size_t
66 0 : AudioConverter::ProcessInternal(void* aOut, const void* aIn, size_t aFrames)
67 : {
68 0 : if (mIn.Channels() > mOut.Channels()) {
69 0 : return DownmixAudio(aOut, aIn, aFrames);
70 0 : } else if (mIn.Channels() < mOut.Channels()) {
71 0 : return UpmixAudio(aOut, aIn, aFrames);
72 0 : } else if (mIn.Layout() != mOut.Layout() && CanReorderAudio()) {
73 0 : ReOrderInterleavedChannels(aOut, aIn, aFrames);
74 0 : } else if (aIn != aOut) {
75 0 : memmove(aOut, aIn, FramesOutToBytes(aFrames));
76 : }
77 0 : return aFrames;
78 : }
79 :
80 : // Reorder interleaved channels.
81 : // Can work in place (e.g aOut == aIn).
82 : template <class AudioDataType>
83 : void
84 0 : _ReOrderInterleavedChannels(AudioDataType* aOut, const AudioDataType* aIn,
85 : uint32_t aFrames, uint32_t aChannels,
86 : const uint8_t* aChannelOrderMap)
87 : {
88 0 : MOZ_DIAGNOSTIC_ASSERT(aChannels <= MAX_AUDIO_CHANNELS);
89 : AudioDataType val[MAX_AUDIO_CHANNELS];
90 0 : for (uint32_t i = 0; i < aFrames; i++) {
91 0 : for (uint32_t j = 0; j < aChannels; j++) {
92 0 : val[j] = aIn[aChannelOrderMap[j]];
93 : }
94 0 : for (uint32_t j = 0; j < aChannels; j++) {
95 0 : aOut[j] = val[j];
96 : }
97 0 : aOut += aChannels;
98 0 : aIn += aChannels;
99 : }
100 0 : }
101 :
102 : void
103 0 : AudioConverter::ReOrderInterleavedChannels(void* aOut, const void* aIn,
104 : size_t aFrames) const
105 : {
106 0 : MOZ_DIAGNOSTIC_ASSERT(mIn.Channels() == mOut.Channels());
107 :
108 0 : if (mOut.Channels() == 1 || mOut.Layout() == mIn.Layout()) {
109 : // If channel count is 1, planar and non-planar formats are the same and
110 : // there's nothing to reorder.
111 0 : if (aOut != aIn) {
112 0 : memmove(aOut, aIn, FramesOutToBytes(aFrames));
113 : }
114 0 : return;
115 : }
116 :
117 0 : uint32_t bits = AudioConfig::FormatToBits(mOut.Format());
118 0 : switch (bits) {
119 : case 8:
120 0 : _ReOrderInterleavedChannels((uint8_t*)aOut, (const uint8_t*)aIn,
121 0 : aFrames, mIn.Channels(), mChannelOrderMap);
122 0 : break;
123 : case 16:
124 0 : _ReOrderInterleavedChannels((int16_t*)aOut,(const int16_t*)aIn,
125 0 : aFrames, mIn.Channels(), mChannelOrderMap);
126 0 : break;
127 : default:
128 0 : MOZ_DIAGNOSTIC_ASSERT(AudioConfig::SampleSize(mOut.Format()) == 4);
129 0 : _ReOrderInterleavedChannels((int32_t*)aOut,(const int32_t*)aIn,
130 0 : aFrames, mIn.Channels(), mChannelOrderMap);
131 0 : break;
132 : }
133 : }
134 :
135 0 : static inline int16_t clipTo15(int32_t aX)
136 : {
137 0 : return aX < -32768 ? -32768 : aX <= 32767 ? aX : 32767;
138 : }
139 :
140 : size_t
141 0 : AudioConverter::DownmixAudio(void* aOut, const void* aIn, size_t aFrames) const
142 : {
143 0 : MOZ_ASSERT(mIn.Format() == AudioConfig::FORMAT_S16 ||
144 : mIn.Format() == AudioConfig::FORMAT_FLT);
145 0 : MOZ_ASSERT(mIn.Channels() >= mOut.Channels());
146 0 : MOZ_ASSERT(mIn.Layout() == AudioConfig::ChannelLayout(mIn.Channels()),
147 : "Can only downmix input data in SMPTE layout");
148 0 : MOZ_ASSERT(mOut.Layout() == AudioConfig::ChannelLayout(2) ||
149 : mOut.Layout() == AudioConfig::ChannelLayout(1));
150 :
151 0 : uint32_t channels = mIn.Channels();
152 :
153 0 : if (channels == 1 && mOut.Channels() == 1) {
154 0 : if (aOut != aIn) {
155 0 : memmove(aOut, aIn, FramesOutToBytes(aFrames));
156 : }
157 0 : return aFrames;
158 : }
159 :
160 0 : if (channels > 2) {
161 0 : if (mIn.Format() == AudioConfig::FORMAT_FLT) {
162 : // Downmix matrix. Per-row normalization 1 for rows 3,4 and 2 for rows 5-8.
163 : static const float dmatrix[6][8][2]= {
164 : /*3*/{{0.5858f,0},{0,0.5858f},{0.4142f,0.4142f}},
165 : /*4*/{{0.4226f,0},{0,0.4226f},{0.366f, 0.2114f},{0.2114f,0.366f}},
166 : /*5*/{{0.6510f,0},{0,0.6510f},{0.4600f,0.4600f},{0.5636f,0.3254f},{0.3254f,0.5636f}},
167 : /*6*/{{0.5290f,0},{0,0.5290f},{0.3741f,0.3741f},{0.3741f,0.3741f},{0.4582f,0.2645f},{0.2645f,0.4582f}},
168 : /*7*/{{0.4553f,0},{0,0.4553f},{0.3220f,0.3220f},{0.3220f,0.3220f},{0.2788f,0.2788f},{0.3943f,0.2277f},{0.2277f,0.3943f}},
169 : /*8*/{{0.3886f,0},{0,0.3886f},{0.2748f,0.2748f},{0.2748f,0.2748f},{0.3366f,0.1943f},{0.1943f,0.3366f},{0.3366f,0.1943f},{0.1943f,0.3366f}},
170 : };
171 : // Re-write the buffer with downmixed data
172 0 : const float* in = static_cast<const float*>(aIn);
173 0 : float* out = static_cast<float*>(aOut);
174 0 : for (uint32_t i = 0; i < aFrames; i++) {
175 0 : float sampL = 0.0;
176 0 : float sampR = 0.0;
177 0 : for (uint32_t j = 0; j < channels; j++) {
178 0 : sampL += in[i*mIn.Channels()+j]*dmatrix[mIn.Channels()-3][j][0];
179 0 : sampR += in[i*mIn.Channels()+j]*dmatrix[mIn.Channels()-3][j][1];
180 : }
181 0 : *out++ = sampL;
182 0 : *out++ = sampR;
183 : }
184 0 : } else if (mIn.Format() == AudioConfig::FORMAT_S16) {
185 : // Downmix matrix. Per-row normalization 1 for rows 3,4 and 2 for rows 5-8.
186 : // Coefficients in Q14.
187 : static const int16_t dmatrix[6][8][2]= {
188 : /*3*/{{9598, 0},{0, 9598},{6786,6786}},
189 : /*4*/{{6925, 0},{0, 6925},{5997,3462},{3462,5997}},
190 : /*5*/{{10663,0},{0, 10663},{7540,7540},{9234,5331},{5331,9234}},
191 : /*6*/{{8668, 0},{0, 8668},{6129,6129},{6129,6129},{7507,4335},{4335,7507}},
192 : /*7*/{{7459, 0},{0, 7459},{5275,5275},{5275,5275},{4568,4568},{6460,3731},{3731,6460}},
193 : /*8*/{{6368, 0},{0, 6368},{4502,4502},{4502,4502},{5514,3184},{3184,5514},{5514,3184},{3184,5514}}
194 : };
195 : // Re-write the buffer with downmixed data
196 0 : const int16_t* in = static_cast<const int16_t*>(aIn);
197 0 : int16_t* out = static_cast<int16_t*>(aOut);
198 0 : for (uint32_t i = 0; i < aFrames; i++) {
199 0 : int32_t sampL = 0;
200 0 : int32_t sampR = 0;
201 0 : for (uint32_t j = 0; j < channels; j++) {
202 0 : sampL+=in[i*channels+j]*dmatrix[channels-3][j][0];
203 0 : sampR+=in[i*channels+j]*dmatrix[channels-3][j][1];
204 : }
205 0 : *out++ = clipTo15((sampL + 8192)>>14);
206 0 : *out++ = clipTo15((sampR + 8192)>>14);
207 : }
208 : } else {
209 0 : MOZ_DIAGNOSTIC_ASSERT(false, "Unsupported data type");
210 : }
211 :
212 : // If we are to continue downmixing to mono, start working on the output
213 : // buffer.
214 0 : aIn = aOut;
215 0 : channels = 2;
216 : }
217 :
218 0 : if (mOut.Channels() == 1) {
219 0 : if (mIn.Format() == AudioConfig::FORMAT_FLT) {
220 0 : const float* in = static_cast<const float*>(aIn);
221 0 : float* out = static_cast<float*>(aOut);
222 0 : for (size_t fIdx = 0; fIdx < aFrames; ++fIdx) {
223 0 : float sample = 0.0;
224 : // The sample of the buffer would be interleaved.
225 0 : sample = (in[fIdx*channels] + in[fIdx*channels + 1]) * 0.5;
226 0 : *out++ = sample;
227 : }
228 0 : } else if (mIn.Format() == AudioConfig::FORMAT_S16) {
229 0 : const int16_t* in = static_cast<const int16_t*>(aIn);
230 0 : int16_t* out = static_cast<int16_t*>(aOut);
231 0 : for (size_t fIdx = 0; fIdx < aFrames; ++fIdx) {
232 0 : int32_t sample = 0.0;
233 : // The sample of the buffer would be interleaved.
234 0 : sample = (in[fIdx*channels] + in[fIdx*channels + 1]) * 0.5;
235 0 : *out++ = sample;
236 : }
237 : } else {
238 0 : MOZ_DIAGNOSTIC_ASSERT(false, "Unsupported data type");
239 : }
240 : }
241 0 : return aFrames;
242 : }
243 :
244 : size_t
245 0 : AudioConverter::ResampleAudio(void* aOut, const void* aIn, size_t aFrames)
246 : {
247 0 : if (!mResampler) {
248 0 : return 0;
249 : }
250 0 : uint32_t outframes = ResampleRecipientFrames(aFrames);
251 0 : uint32_t inframes = aFrames;
252 :
253 : int error;
254 0 : if (mOut.Format() == AudioConfig::FORMAT_FLT) {
255 0 : const float* in = reinterpret_cast<const float*>(aIn);
256 0 : float* out = reinterpret_cast<float*>(aOut);
257 : error =
258 0 : speex_resampler_process_interleaved_float(mResampler, in, &inframes,
259 0 : out, &outframes);
260 0 : } else if (mOut.Format() == AudioConfig::FORMAT_S16) {
261 0 : const int16_t* in = reinterpret_cast<const int16_t*>(aIn);
262 0 : int16_t* out = reinterpret_cast<int16_t*>(aOut);
263 : error =
264 0 : speex_resampler_process_interleaved_int(mResampler, in, &inframes,
265 0 : out, &outframes);
266 : } else {
267 0 : MOZ_DIAGNOSTIC_ASSERT(false, "Unsupported data type");
268 : error = RESAMPLER_ERR_ALLOC_FAILED;
269 : }
270 0 : MOZ_ASSERT(error == RESAMPLER_ERR_SUCCESS);
271 0 : if (error != RESAMPLER_ERR_SUCCESS) {
272 0 : speex_resampler_destroy(mResampler);
273 0 : mResampler = nullptr;
274 0 : return 0;
275 : }
276 0 : MOZ_ASSERT(inframes == aFrames, "Some frames will be dropped");
277 0 : return outframes;
278 : }
279 :
280 : void
281 0 : AudioConverter::RecreateResampler()
282 : {
283 0 : if (mResampler) {
284 0 : speex_resampler_destroy(mResampler);
285 : }
286 : int error;
287 0 : mResampler = speex_resampler_init(mOut.Channels(),
288 : mIn.Rate(),
289 : mOut.Rate(),
290 : SPEEX_RESAMPLER_QUALITY_DEFAULT,
291 : &error);
292 :
293 0 : if (error == RESAMPLER_ERR_SUCCESS) {
294 0 : speex_resampler_skip_zeros(mResampler);
295 : } else {
296 0 : NS_WARNING("Failed to initialize resampler.");
297 0 : mResampler = nullptr;
298 : }
299 0 : }
300 :
301 : size_t
302 0 : AudioConverter::DrainResampler(void* aOut)
303 : {
304 0 : if (!mResampler) {
305 0 : return 0;
306 : }
307 0 : int frames = speex_resampler_get_input_latency(mResampler);
308 0 : AlignedByteBuffer buffer(FramesOutToBytes(frames));
309 0 : if (!buffer) {
310 : // OOM
311 0 : return 0;
312 : }
313 0 : frames = ResampleAudio(aOut, buffer.Data(), frames);
314 : // Tore down the resampler as it's easier than handling follow-up.
315 0 : RecreateResampler();
316 0 : return frames;
317 : }
318 :
319 : size_t
320 0 : AudioConverter::UpmixAudio(void* aOut, const void* aIn, size_t aFrames) const
321 : {
322 0 : MOZ_ASSERT(mIn.Format() == AudioConfig::FORMAT_S16 ||
323 : mIn.Format() == AudioConfig::FORMAT_FLT);
324 0 : MOZ_ASSERT(mIn.Channels() < mOut.Channels());
325 0 : MOZ_ASSERT(mIn.Channels() == 1, "Can only upmix mono for now");
326 0 : MOZ_ASSERT(mOut.Channels() == 2, "Can only upmix to stereo for now");
327 :
328 0 : if (mOut.Channels() != 2) {
329 0 : return 0;
330 : }
331 :
332 : // Upmix mono to stereo.
333 : // This is a very dumb mono to stereo upmixing, power levels are preserved
334 : // following the calculation: left = right = -3dB*mono.
335 0 : if (mIn.Format() == AudioConfig::FORMAT_FLT) {
336 0 : const float m3db = std::sqrt(0.5); // -3dB = sqrt(1/2)
337 0 : const float* in = static_cast<const float*>(aIn);
338 0 : float* out = static_cast<float*>(aOut);
339 0 : for (size_t fIdx = 0; fIdx < aFrames; ++fIdx) {
340 0 : float sample = in[fIdx] * m3db;
341 : // The samples of the buffer would be interleaved.
342 0 : *out++ = sample;
343 0 : *out++ = sample;
344 : }
345 0 : } else if (mIn.Format() == AudioConfig::FORMAT_S16) {
346 0 : const int16_t* in = static_cast<const int16_t*>(aIn);
347 0 : int16_t* out = static_cast<int16_t*>(aOut);
348 0 : for (size_t fIdx = 0; fIdx < aFrames; ++fIdx) {
349 0 : int16_t sample = ((int32_t)in[fIdx] * 11585) >> 14; // close enough to i*sqrt(0.5)
350 : // The samples of the buffer would be interleaved.
351 0 : *out++ = sample;
352 0 : *out++ = sample;
353 : }
354 : } else {
355 0 : MOZ_DIAGNOSTIC_ASSERT(false, "Unsupported data type");
356 : }
357 :
358 0 : return aFrames;
359 : }
360 :
361 : size_t
362 0 : AudioConverter::ResampleRecipientFrames(size_t aFrames) const
363 : {
364 0 : if (!aFrames && mIn.Rate() != mOut.Rate()) {
365 : // The resampler will be drained, account for frames currently buffered
366 : // in the resampler.
367 0 : if (!mResampler) {
368 0 : return 0;
369 : }
370 0 : return speex_resampler_get_output_latency(mResampler);
371 : } else {
372 0 : return (uint64_t)aFrames * mOut.Rate() / mIn.Rate() + 1;
373 : }
374 : }
375 :
376 : size_t
377 0 : AudioConverter::FramesOutToSamples(size_t aFrames) const
378 : {
379 0 : return aFrames * mOut.Channels();
380 : }
381 :
382 : size_t
383 0 : AudioConverter::SamplesInToFrames(size_t aSamples) const
384 : {
385 0 : return aSamples / mIn.Channels();
386 : }
387 :
388 : size_t
389 0 : AudioConverter::FramesOutToBytes(size_t aFrames) const
390 : {
391 0 : return FramesOutToSamples(aFrames) * AudioConfig::SampleSize(mOut.Format());
392 : }
393 : } // namespace mozilla
|