Line data Source code
1 : /*
2 : * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 : *
4 : * Use of this source code is governed by a BSD-style license
5 : * that can be found in the LICENSE file in the root of the source
6 : * tree. An additional intellectual property rights grant can be found
7 : * in the file PATENTS. All contributing project authors may
8 : * be found in the AUTHORS file in the root of the source tree.
9 : */
10 :
11 : #include "webrtc/modules/audio_processing/audio_buffer.h"
12 :
13 : #include "webrtc/base/checks.h"
14 : #include "webrtc/common_audio/include/audio_util.h"
15 : #include "webrtc/common_audio/resampler/push_sinc_resampler.h"
16 : #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
17 : #include "webrtc/common_audio/channel_buffer.h"
18 : #include "webrtc/modules/audio_processing/common.h"
19 :
20 : namespace webrtc {
21 : namespace {
22 :
23 : const size_t kSamplesPer16kHzChannel = 160;
24 : const size_t kSamplesPer32kHzChannel = 320;
25 : const size_t kSamplesPer48kHzChannel = 480;
26 :
27 0 : int KeyboardChannelIndex(const StreamConfig& stream_config) {
28 0 : if (!stream_config.has_keyboard()) {
29 0 : RTC_NOTREACHED();
30 0 : return 0;
31 : }
32 :
33 0 : return stream_config.num_channels();
34 : }
35 :
36 0 : size_t NumBandsFromSamplesPerChannel(size_t num_frames) {
37 0 : size_t num_bands = 1;
38 0 : if (num_frames == kSamplesPer32kHzChannel ||
39 : num_frames == kSamplesPer48kHzChannel) {
40 0 : num_bands = rtc::CheckedDivExact(num_frames, kSamplesPer16kHzChannel);
41 : }
42 0 : return num_bands;
43 : }
44 :
45 : } // namespace
46 :
47 0 : AudioBuffer::AudioBuffer(size_t input_num_frames,
48 : size_t num_input_channels,
49 : size_t process_num_frames,
50 : size_t num_process_channels,
51 0 : size_t output_num_frames)
52 : : input_num_frames_(input_num_frames),
53 : num_input_channels_(num_input_channels),
54 : proc_num_frames_(process_num_frames),
55 : num_proc_channels_(num_process_channels),
56 : output_num_frames_(output_num_frames),
57 : num_channels_(num_process_channels),
58 0 : num_bands_(NumBandsFromSamplesPerChannel(proc_num_frames_)),
59 0 : num_split_frames_(rtc::CheckedDivExact(proc_num_frames_, num_bands_)),
60 : mixed_low_pass_valid_(false),
61 : reference_copied_(false),
62 : activity_(AudioFrame::kVadUnknown),
63 : keyboard_data_(NULL),
64 0 : data_(new IFChannelBuffer(proc_num_frames_, num_proc_channels_)) {
65 0 : RTC_DCHECK_GT(input_num_frames_, 0);
66 0 : RTC_DCHECK_GT(proc_num_frames_, 0);
67 0 : RTC_DCHECK_GT(output_num_frames_, 0);
68 0 : RTC_DCHECK_GT(num_input_channels_, 0);
69 0 : RTC_DCHECK_GT(num_proc_channels_, 0);
70 0 : RTC_DCHECK_LE(num_proc_channels_, num_input_channels_);
71 :
72 0 : if (input_num_frames_ != proc_num_frames_ ||
73 0 : output_num_frames_ != proc_num_frames_) {
74 : // Create an intermediate buffer for resampling.
75 0 : process_buffer_.reset(new ChannelBuffer<float>(proc_num_frames_,
76 0 : num_proc_channels_));
77 :
78 0 : if (input_num_frames_ != proc_num_frames_) {
79 0 : for (size_t i = 0; i < num_proc_channels_; ++i) {
80 0 : input_resamplers_.push_back(std::unique_ptr<PushSincResampler>(
81 0 : new PushSincResampler(input_num_frames_, proc_num_frames_)));
82 : }
83 : }
84 :
85 0 : if (output_num_frames_ != proc_num_frames_) {
86 0 : for (size_t i = 0; i < num_proc_channels_; ++i) {
87 0 : output_resamplers_.push_back(std::unique_ptr<PushSincResampler>(
88 0 : new PushSincResampler(proc_num_frames_, output_num_frames_)));
89 : }
90 : }
91 : }
92 :
93 0 : if (num_bands_ > 1) {
94 0 : split_data_.reset(new IFChannelBuffer(proc_num_frames_,
95 0 : num_proc_channels_,
96 0 : num_bands_));
97 0 : splitting_filter_.reset(new SplittingFilter(num_proc_channels_,
98 : num_bands_,
99 0 : proc_num_frames_));
100 : }
101 0 : }
102 :
103 0 : AudioBuffer::~AudioBuffer() {}
104 :
105 0 : void AudioBuffer::CopyFrom(const float* const* data,
106 : const StreamConfig& stream_config) {
107 0 : RTC_DCHECK_EQ(stream_config.num_frames(), input_num_frames_);
108 0 : RTC_DCHECK_EQ(stream_config.num_channels(), num_input_channels_);
109 0 : InitForNewData();
110 : // Initialized lazily because there's a different condition in
111 : // DeinterleaveFrom.
112 : const bool need_to_downmix =
113 0 : num_input_channels_ > 1 && num_proc_channels_ == 1;
114 0 : if (need_to_downmix && !input_buffer_) {
115 0 : input_buffer_.reset(
116 0 : new IFChannelBuffer(input_num_frames_, num_proc_channels_));
117 : }
118 :
119 0 : if (stream_config.has_keyboard()) {
120 0 : keyboard_data_ = data[KeyboardChannelIndex(stream_config)];
121 : }
122 :
123 : // Downmix.
124 0 : const float* const* data_ptr = data;
125 0 : if (need_to_downmix) {
126 0 : DownmixToMono<float, float>(data, input_num_frames_, num_input_channels_,
127 0 : input_buffer_->fbuf()->channels()[0]);
128 0 : data_ptr = input_buffer_->fbuf_const()->channels();
129 : }
130 :
131 : // Resample.
132 0 : if (input_num_frames_ != proc_num_frames_) {
133 0 : for (size_t i = 0; i < num_proc_channels_; ++i) {
134 0 : input_resamplers_[i]->Resample(data_ptr[i],
135 0 : input_num_frames_,
136 0 : process_buffer_->channels()[i],
137 0 : proc_num_frames_);
138 : }
139 0 : data_ptr = process_buffer_->channels();
140 : }
141 :
142 : // Convert to the S16 range.
143 0 : for (size_t i = 0; i < num_proc_channels_; ++i) {
144 0 : FloatToFloatS16(data_ptr[i],
145 0 : proc_num_frames_,
146 0 : data_->fbuf()->channels()[i]);
147 : }
148 0 : }
149 :
150 0 : void AudioBuffer::CopyTo(const StreamConfig& stream_config,
151 : float* const* data) {
152 0 : RTC_DCHECK_EQ(stream_config.num_frames(), output_num_frames_);
153 0 : RTC_DCHECK(stream_config.num_channels() == num_channels_ ||
154 0 : num_channels_ == 1);
155 :
156 : // Convert to the float range.
157 0 : float* const* data_ptr = data;
158 0 : if (output_num_frames_ != proc_num_frames_) {
159 : // Convert to an intermediate buffer for subsequent resampling.
160 0 : data_ptr = process_buffer_->channels();
161 : }
162 0 : for (size_t i = 0; i < num_channels_; ++i) {
163 0 : FloatS16ToFloat(data_->fbuf()->channels()[i],
164 0 : proc_num_frames_,
165 0 : data_ptr[i]);
166 : }
167 :
168 : // Resample.
169 0 : if (output_num_frames_ != proc_num_frames_) {
170 0 : for (size_t i = 0; i < num_channels_; ++i) {
171 0 : output_resamplers_[i]->Resample(data_ptr[i],
172 0 : proc_num_frames_,
173 0 : data[i],
174 0 : output_num_frames_);
175 : }
176 : }
177 :
178 : // Upmix.
179 0 : for (size_t i = num_channels_; i < stream_config.num_channels(); ++i) {
180 0 : memcpy(data[i], data[0], output_num_frames_ * sizeof(**data));
181 : }
182 0 : }
183 :
184 0 : void AudioBuffer::InitForNewData() {
185 0 : keyboard_data_ = NULL;
186 0 : mixed_low_pass_valid_ = false;
187 0 : reference_copied_ = false;
188 0 : activity_ = AudioFrame::kVadUnknown;
189 0 : num_channels_ = num_proc_channels_;
190 0 : data_->set_num_channels(num_proc_channels_);
191 0 : if (split_data_.get()) {
192 0 : split_data_->set_num_channels(num_proc_channels_);
193 : }
194 0 : }
195 :
196 0 : const int16_t* const* AudioBuffer::channels_const() const {
197 0 : return data_->ibuf_const()->channels();
198 : }
199 :
200 0 : int16_t* const* AudioBuffer::channels() {
201 0 : mixed_low_pass_valid_ = false;
202 0 : return data_->ibuf()->channels();
203 : }
204 :
205 0 : const int16_t* const* AudioBuffer::split_bands_const(size_t channel) const {
206 0 : return split_data_.get() ?
207 0 : split_data_->ibuf_const()->bands(channel) :
208 0 : data_->ibuf_const()->bands(channel);
209 : }
210 :
211 0 : int16_t* const* AudioBuffer::split_bands(size_t channel) {
212 0 : mixed_low_pass_valid_ = false;
213 0 : return split_data_.get() ?
214 0 : split_data_->ibuf()->bands(channel) :
215 0 : data_->ibuf()->bands(channel);
216 : }
217 :
218 0 : const int16_t* const* AudioBuffer::split_channels_const(Band band) const {
219 0 : if (split_data_.get()) {
220 0 : return split_data_->ibuf_const()->channels(band);
221 : } else {
222 0 : return band == kBand0To8kHz ? data_->ibuf_const()->channels() : nullptr;
223 : }
224 : }
225 :
226 0 : int16_t* const* AudioBuffer::split_channels(Band band) {
227 0 : mixed_low_pass_valid_ = false;
228 0 : if (split_data_.get()) {
229 0 : return split_data_->ibuf()->channels(band);
230 : } else {
231 0 : return band == kBand0To8kHz ? data_->ibuf()->channels() : nullptr;
232 : }
233 : }
234 :
235 0 : ChannelBuffer<int16_t>* AudioBuffer::data() {
236 0 : mixed_low_pass_valid_ = false;
237 0 : return data_->ibuf();
238 : }
239 :
240 0 : const ChannelBuffer<int16_t>* AudioBuffer::data() const {
241 0 : return data_->ibuf_const();
242 : }
243 :
244 0 : ChannelBuffer<int16_t>* AudioBuffer::split_data() {
245 0 : mixed_low_pass_valid_ = false;
246 0 : return split_data_.get() ? split_data_->ibuf() : data_->ibuf();
247 : }
248 :
249 0 : const ChannelBuffer<int16_t>* AudioBuffer::split_data() const {
250 0 : return split_data_.get() ? split_data_->ibuf_const() : data_->ibuf_const();
251 : }
252 :
253 0 : const float* const* AudioBuffer::channels_const_f() const {
254 0 : return data_->fbuf_const()->channels();
255 : }
256 :
257 0 : float* const* AudioBuffer::channels_f() {
258 0 : mixed_low_pass_valid_ = false;
259 0 : return data_->fbuf()->channels();
260 : }
261 :
262 0 : const float* const* AudioBuffer::split_bands_const_f(size_t channel) const {
263 0 : return split_data_.get() ?
264 0 : split_data_->fbuf_const()->bands(channel) :
265 0 : data_->fbuf_const()->bands(channel);
266 : }
267 :
268 0 : float* const* AudioBuffer::split_bands_f(size_t channel) {
269 0 : mixed_low_pass_valid_ = false;
270 0 : return split_data_.get() ?
271 0 : split_data_->fbuf()->bands(channel) :
272 0 : data_->fbuf()->bands(channel);
273 : }
274 :
275 0 : const float* const* AudioBuffer::split_channels_const_f(Band band) const {
276 0 : if (split_data_.get()) {
277 0 : return split_data_->fbuf_const()->channels(band);
278 : } else {
279 0 : return band == kBand0To8kHz ? data_->fbuf_const()->channels() : nullptr;
280 : }
281 : }
282 :
283 0 : float* const* AudioBuffer::split_channels_f(Band band) {
284 0 : mixed_low_pass_valid_ = false;
285 0 : if (split_data_.get()) {
286 0 : return split_data_->fbuf()->channels(band);
287 : } else {
288 0 : return band == kBand0To8kHz ? data_->fbuf()->channels() : nullptr;
289 : }
290 : }
291 :
292 0 : ChannelBuffer<float>* AudioBuffer::data_f() {
293 0 : mixed_low_pass_valid_ = false;
294 0 : return data_->fbuf();
295 : }
296 :
297 0 : const ChannelBuffer<float>* AudioBuffer::data_f() const {
298 0 : return data_->fbuf_const();
299 : }
300 :
301 0 : ChannelBuffer<float>* AudioBuffer::split_data_f() {
302 0 : mixed_low_pass_valid_ = false;
303 0 : return split_data_.get() ? split_data_->fbuf() : data_->fbuf();
304 : }
305 :
306 0 : const ChannelBuffer<float>* AudioBuffer::split_data_f() const {
307 0 : return split_data_.get() ? split_data_->fbuf_const() : data_->fbuf_const();
308 : }
309 :
310 0 : const int16_t* AudioBuffer::mixed_low_pass_data() {
311 0 : if (num_proc_channels_ == 1) {
312 0 : return split_bands_const(0)[kBand0To8kHz];
313 : }
314 :
315 0 : if (!mixed_low_pass_valid_) {
316 0 : if (!mixed_low_pass_channels_.get()) {
317 0 : mixed_low_pass_channels_.reset(
318 0 : new ChannelBuffer<int16_t>(num_split_frames_, 1));
319 : }
320 :
321 0 : DownmixToMono<int16_t, int32_t>(split_channels_const(kBand0To8kHz),
322 0 : num_split_frames_, num_channels_,
323 0 : mixed_low_pass_channels_->channels()[0]);
324 0 : mixed_low_pass_valid_ = true;
325 : }
326 0 : return mixed_low_pass_channels_->channels()[0];
327 : }
328 :
329 0 : const int16_t* AudioBuffer::low_pass_reference(int channel) const {
330 0 : if (!reference_copied_) {
331 0 : return NULL;
332 : }
333 :
334 0 : return low_pass_reference_channels_->channels()[channel];
335 : }
336 :
337 0 : const float* AudioBuffer::keyboard_data() const {
338 0 : return keyboard_data_;
339 : }
340 :
341 0 : void AudioBuffer::set_activity(AudioFrame::VADActivity activity) {
342 0 : activity_ = activity;
343 0 : }
344 :
345 0 : AudioFrame::VADActivity AudioBuffer::activity() const {
346 0 : return activity_;
347 : }
348 :
349 0 : size_t AudioBuffer::num_channels() const {
350 0 : return num_channels_;
351 : }
352 :
353 0 : void AudioBuffer::set_num_channels(size_t num_channels) {
354 0 : num_channels_ = num_channels;
355 0 : data_->set_num_channels(num_channels);
356 0 : if (split_data_.get()) {
357 0 : split_data_->set_num_channels(num_channels);
358 : }
359 0 : }
360 :
361 0 : size_t AudioBuffer::num_frames() const {
362 0 : return proc_num_frames_;
363 : }
364 :
365 0 : size_t AudioBuffer::num_frames_per_band() const {
366 0 : return num_split_frames_;
367 : }
368 :
369 0 : size_t AudioBuffer::num_keyboard_frames() const {
370 : // We don't resample the keyboard channel.
371 0 : return input_num_frames_;
372 : }
373 :
374 0 : size_t AudioBuffer::num_bands() const {
375 0 : return num_bands_;
376 : }
377 :
378 : // The resampler is only for supporting 48kHz to 16kHz in the reverse stream.
379 0 : void AudioBuffer::DeinterleaveFrom(AudioFrame* frame) {
380 0 : RTC_DCHECK_EQ(frame->num_channels_, num_input_channels_);
381 0 : RTC_DCHECK_EQ(frame->samples_per_channel_, input_num_frames_);
382 0 : InitForNewData();
383 : // Initialized lazily because there's a different condition in CopyFrom.
384 0 : if ((input_num_frames_ != proc_num_frames_) && !input_buffer_) {
385 0 : input_buffer_.reset(
386 0 : new IFChannelBuffer(input_num_frames_, num_proc_channels_));
387 : }
388 0 : activity_ = frame->vad_activity_;
389 :
390 : int16_t* const* deinterleaved;
391 0 : if (input_num_frames_ == proc_num_frames_) {
392 0 : deinterleaved = data_->ibuf()->channels();
393 : } else {
394 0 : deinterleaved = input_buffer_->ibuf()->channels();
395 : }
396 0 : if (num_proc_channels_ == 1) {
397 : // Downmix and deinterleave simultaneously.
398 0 : DownmixInterleavedToMono(frame->data_, input_num_frames_,
399 0 : num_input_channels_, deinterleaved[0]);
400 : } else {
401 0 : RTC_DCHECK_EQ(num_proc_channels_, num_input_channels_);
402 0 : Deinterleave(frame->data_,
403 0 : input_num_frames_,
404 0 : num_proc_channels_,
405 0 : deinterleaved);
406 : }
407 :
408 : // Resample.
409 0 : if (input_num_frames_ != proc_num_frames_) {
410 0 : for (size_t i = 0; i < num_proc_channels_; ++i) {
411 0 : input_resamplers_[i]->Resample(input_buffer_->fbuf_const()->channels()[i],
412 0 : input_num_frames_,
413 0 : data_->fbuf()->channels()[i],
414 0 : proc_num_frames_);
415 : }
416 : }
417 0 : }
418 :
419 0 : void AudioBuffer::InterleaveTo(AudioFrame* frame, bool data_changed) {
420 0 : frame->vad_activity_ = activity_;
421 0 : if (!data_changed) {
422 0 : return;
423 : }
424 :
425 0 : RTC_DCHECK(frame->num_channels_ == num_channels_ || num_channels_ == 1);
426 0 : RTC_DCHECK_EQ(frame->samples_per_channel_, output_num_frames_);
427 :
428 : // Resample if necessary.
429 0 : IFChannelBuffer* data_ptr = data_.get();
430 0 : if (proc_num_frames_ != output_num_frames_) {
431 0 : if (!output_buffer_) {
432 0 : output_buffer_.reset(
433 0 : new IFChannelBuffer(output_num_frames_, num_channels_));
434 : }
435 0 : for (size_t i = 0; i < num_channels_; ++i) {
436 0 : output_resamplers_[i]->Resample(
437 0 : data_->fbuf()->channels()[i], proc_num_frames_,
438 0 : output_buffer_->fbuf()->channels()[i], output_num_frames_);
439 : }
440 0 : data_ptr = output_buffer_.get();
441 : }
442 :
443 0 : if (frame->num_channels_ == num_channels_) {
444 0 : Interleave(data_ptr->ibuf()->channels(), output_num_frames_, num_channels_,
445 0 : frame->data_);
446 : } else {
447 0 : UpmixMonoToInterleaved(data_ptr->ibuf()->channels()[0], output_num_frames_,
448 0 : frame->num_channels_, frame->data_);
449 : }
450 : }
451 :
452 0 : void AudioBuffer::CopyLowPassToReference() {
453 0 : reference_copied_ = true;
454 0 : if (!low_pass_reference_channels_.get() ||
455 0 : low_pass_reference_channels_->num_channels() != num_channels_) {
456 0 : low_pass_reference_channels_.reset(
457 : new ChannelBuffer<int16_t>(num_split_frames_,
458 0 : num_proc_channels_));
459 : }
460 0 : for (size_t i = 0; i < num_proc_channels_; i++) {
461 0 : memcpy(low_pass_reference_channels_->channels()[i],
462 0 : split_bands_const(i)[kBand0To8kHz],
463 0 : low_pass_reference_channels_->num_frames_per_band() *
464 0 : sizeof(split_bands_const(i)[kBand0To8kHz][0]));
465 : }
466 0 : }
467 :
468 0 : void AudioBuffer::SplitIntoFrequencyBands() {
469 0 : splitting_filter_->Analysis(data_.get(), split_data_.get());
470 0 : }
471 :
472 0 : void AudioBuffer::MergeFrequencyBands() {
473 0 : splitting_filter_->Synthesis(split_data_.get(), data_.get());
474 0 : }
475 :
476 : } // namespace webrtc
|