Line data Source code
1 : /*
2 : * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 : *
4 : * Use of this source code is governed by a BSD-style license
5 : * that can be found in the LICENSE file in the root of the source
6 : * tree. An additional intellectual property rights grant can be found
7 : * in the file PATENTS. All contributing project authors may
8 : * be found in the AUTHORS file in the root of the source tree.
9 : */
10 :
11 : #include "webrtc/modules/audio_processing/voice_detection_impl.h"
12 :
13 : #include "webrtc/base/constructormagic.h"
14 : #include "webrtc/common_audio/vad/include/webrtc_vad.h"
15 : #include "webrtc/modules/audio_processing/audio_buffer.h"
16 :
17 : namespace webrtc {
18 : class VoiceDetectionImpl::Vad {
19 : public:
20 0 : Vad() {
21 0 : state_ = WebRtcVad_Create();
22 0 : RTC_CHECK(state_);
23 0 : int error = WebRtcVad_Init(state_);
24 0 : RTC_DCHECK_EQ(0, error);
25 0 : }
26 0 : ~Vad() {
27 0 : WebRtcVad_Free(state_);
28 0 : }
29 0 : VadInst* state() { return state_; }
30 : private:
31 : VadInst* state_ = nullptr;
32 : RTC_DISALLOW_COPY_AND_ASSIGN(Vad);
33 : };
34 :
35 0 : VoiceDetectionImpl::VoiceDetectionImpl(rtc::CriticalSection* crit)
36 0 : : crit_(crit) {
37 0 : RTC_DCHECK(crit);
38 0 : }
39 :
40 0 : VoiceDetectionImpl::~VoiceDetectionImpl() {}
41 :
42 0 : void VoiceDetectionImpl::Initialize(int sample_rate_hz) {
43 0 : rtc::CritScope cs(crit_);
44 0 : sample_rate_hz_ = sample_rate_hz;
45 0 : std::unique_ptr<Vad> new_vad;
46 0 : if (enabled_) {
47 0 : new_vad.reset(new Vad());
48 : }
49 0 : vad_.swap(new_vad);
50 0 : using_external_vad_ = false;
51 0 : frame_size_samples_ =
52 0 : static_cast<size_t>(frame_size_ms_ * sample_rate_hz_) / 1000;
53 0 : set_likelihood(likelihood_);
54 0 : }
55 :
56 0 : void VoiceDetectionImpl::ProcessCaptureAudio(AudioBuffer* audio) {
57 0 : rtc::CritScope cs(crit_);
58 0 : if (!enabled_) {
59 0 : return;
60 : }
61 0 : if (using_external_vad_) {
62 0 : using_external_vad_ = false;
63 0 : return;
64 : }
65 :
66 0 : RTC_DCHECK_GE(160, audio->num_frames_per_band());
67 : // TODO(ajm): concatenate data in frame buffer here.
68 0 : int vad_ret = WebRtcVad_Process(vad_->state(), sample_rate_hz_,
69 : audio->mixed_low_pass_data(),
70 0 : frame_size_samples_);
71 0 : if (vad_ret == 0) {
72 0 : stream_has_voice_ = false;
73 0 : audio->set_activity(AudioFrame::kVadPassive);
74 0 : } else if (vad_ret == 1) {
75 0 : stream_has_voice_ = true;
76 0 : audio->set_activity(AudioFrame::kVadActive);
77 : } else {
78 0 : RTC_NOTREACHED();
79 : }
80 : }
81 :
82 0 : int VoiceDetectionImpl::Enable(bool enable) {
83 0 : rtc::CritScope cs(crit_);
84 0 : if (enabled_ != enable) {
85 0 : enabled_ = enable;
86 0 : Initialize(sample_rate_hz_);
87 : }
88 0 : return AudioProcessing::kNoError;
89 : }
90 :
91 0 : bool VoiceDetectionImpl::is_enabled() const {
92 0 : rtc::CritScope cs(crit_);
93 0 : return enabled_;
94 : }
95 :
96 0 : int VoiceDetectionImpl::set_stream_has_voice(bool has_voice) {
97 0 : rtc::CritScope cs(crit_);
98 0 : using_external_vad_ = true;
99 0 : stream_has_voice_ = has_voice;
100 0 : return AudioProcessing::kNoError;
101 : }
102 :
103 0 : bool VoiceDetectionImpl::stream_has_voice() const {
104 0 : rtc::CritScope cs(crit_);
105 : // TODO(ajm): enable this assertion?
106 : //RTC_DCHECK(using_external_vad_ || is_component_enabled());
107 0 : return stream_has_voice_;
108 : }
109 :
110 0 : int VoiceDetectionImpl::set_likelihood(VoiceDetection::Likelihood likelihood) {
111 0 : rtc::CritScope cs(crit_);
112 0 : likelihood_ = likelihood;
113 0 : if (enabled_) {
114 0 : int mode = 2;
115 0 : switch (likelihood) {
116 : case VoiceDetection::kVeryLowLikelihood:
117 0 : mode = 3;
118 0 : break;
119 : case VoiceDetection::kLowLikelihood:
120 0 : mode = 2;
121 0 : break;
122 : case VoiceDetection::kModerateLikelihood:
123 0 : mode = 1;
124 0 : break;
125 : case VoiceDetection::kHighLikelihood:
126 0 : mode = 0;
127 0 : break;
128 : default:
129 0 : RTC_NOTREACHED();
130 0 : break;
131 : }
132 0 : int error = WebRtcVad_set_mode(vad_->state(), mode);
133 0 : RTC_DCHECK_EQ(0, error);
134 : }
135 0 : return AudioProcessing::kNoError;
136 : }
137 :
138 0 : VoiceDetection::Likelihood VoiceDetectionImpl::likelihood() const {
139 0 : rtc::CritScope cs(crit_);
140 0 : return likelihood_;
141 : }
142 :
143 0 : int VoiceDetectionImpl::set_frame_size_ms(int size) {
144 0 : rtc::CritScope cs(crit_);
145 0 : RTC_DCHECK_EQ(10, size); // TODO(ajm): remove when supported.
146 0 : frame_size_ms_ = size;
147 0 : Initialize(sample_rate_hz_);
148 0 : return AudioProcessing::kNoError;
149 : }
150 :
151 0 : int VoiceDetectionImpl::frame_size_ms() const {
152 0 : rtc::CritScope cs(crit_);
153 0 : return frame_size_ms_;
154 : }
155 : } // namespace webrtc
|