Line data Source code
1 : /*
2 : * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
3 : *
4 : * Use of this source code is governed by a BSD-style license
5 : * that can be found in the LICENSE file in the root of the source
6 : * tree. An additional intellectual property rights grant can be found
7 : * in the file PATENTS. All contributing project authors may
8 : * be found in the AUTHORS file in the root of the source tree.
9 : */
10 :
11 : #include "webrtc/modules/audio_processing/vad/voice_activity_detector.h"
12 :
13 : #include <algorithm>
14 :
15 : #include "webrtc/base/checks.h"
16 :
17 : namespace webrtc {
18 : namespace {
19 :
20 : const size_t kMaxLength = 320;
21 : const size_t kNumChannels = 1;
22 :
23 : const double kDefaultVoiceValue = 1.0;
24 : const double kNeutralProbability = 0.5;
25 : const double kLowProbability = 0.01;
26 :
27 : } // namespace
28 :
29 0 : VoiceActivityDetector::VoiceActivityDetector()
30 : : last_voice_probability_(kDefaultVoiceValue),
31 0 : standalone_vad_(StandaloneVad::Create()) {
32 0 : }
33 :
34 : VoiceActivityDetector::~VoiceActivityDetector() = default;
35 :
36 : // Because ISAC has a different chunk length, it updates
37 : // |chunkwise_voice_probabilities_| and |chunkwise_rms_| when there is new data.
38 : // Otherwise it clears them.
39 0 : void VoiceActivityDetector::ProcessChunk(const int16_t* audio,
40 : size_t length,
41 : int sample_rate_hz) {
42 0 : RTC_DCHECK_EQ(length, sample_rate_hz / 100);
43 0 : RTC_DCHECK_LE(length, kMaxLength);
44 : // Resample to the required rate.
45 0 : const int16_t* resampled_ptr = audio;
46 0 : if (sample_rate_hz != kSampleRateHz) {
47 0 : RTC_CHECK_EQ(
48 : resampler_.ResetIfNeeded(sample_rate_hz, kSampleRateHz, kNumChannels),
49 0 : 0);
50 0 : resampler_.Push(audio, length, resampled_, kLength10Ms, length);
51 0 : resampled_ptr = resampled_;
52 : }
53 0 : RTC_DCHECK_EQ(length, kLength10Ms);
54 :
55 : // Each chunk needs to be passed into |standalone_vad_|, because internally it
56 : // buffers the audio and processes it all at once when GetActivity() is
57 : // called.
58 0 : RTC_CHECK_EQ(standalone_vad_->AddAudio(resampled_ptr, length), 0);
59 :
60 0 : audio_processing_.ExtractFeatures(resampled_ptr, length, &features_);
61 :
62 0 : chunkwise_voice_probabilities_.resize(features_.num_frames);
63 0 : chunkwise_rms_.resize(features_.num_frames);
64 0 : std::copy(features_.rms, features_.rms + chunkwise_rms_.size(),
65 0 : chunkwise_rms_.begin());
66 0 : if (features_.num_frames > 0) {
67 0 : if (features_.silence) {
68 : // The other features are invalid, so set the voice probabilities to an
69 : // arbitrary low value.
70 0 : std::fill(chunkwise_voice_probabilities_.begin(),
71 0 : chunkwise_voice_probabilities_.end(), kLowProbability);
72 : } else {
73 0 : std::fill(chunkwise_voice_probabilities_.begin(),
74 0 : chunkwise_voice_probabilities_.end(), kNeutralProbability);
75 0 : RTC_CHECK_GE(
76 : standalone_vad_->GetActivity(&chunkwise_voice_probabilities_[0],
77 : chunkwise_voice_probabilities_.size()),
78 0 : 0);
79 0 : RTC_CHECK_GE(pitch_based_vad_.VoicingProbability(
80 : features_, &chunkwise_voice_probabilities_[0]),
81 0 : 0);
82 : }
83 0 : last_voice_probability_ = chunkwise_voice_probabilities_.back();
84 : }
85 0 : }
86 :
87 : } // namespace webrtc
|