Line data Source code
1 : /*
2 : * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 : *
4 : * Use of this source code is governed by a BSD-style license
5 : * that can be found in the LICENSE file in the root of the source
6 : * tree. An additional intellectual property rights grant can be found
7 : * in the file PATENTS. All contributing project authors may
8 : * be found in the AUTHORS file in the root of the source tree.
9 : */
10 :
11 : #include "webrtc/modules/audio_processing/vad/pitch_based_vad.h"
12 :
13 : #include <math.h>
14 : #include <string.h>
15 :
16 : #include "webrtc/modules/audio_processing/vad/vad_circular_buffer.h"
17 : #include "webrtc/modules/audio_processing/vad/common.h"
18 : #include "webrtc/modules/audio_processing/vad/noise_gmm_tables.h"
19 : #include "webrtc/modules/audio_processing/vad/voice_gmm_tables.h"
20 : #include "webrtc/modules/include/module_common_types.h"
21 :
22 : namespace webrtc {
23 :
24 : static_assert(kNoiseGmmDim == kVoiceGmmDim,
25 : "noise and voice gmm dimension not equal");
26 :
27 : // These values should match MATLAB counterparts for unit-tests to pass.
28 : static const int kPosteriorHistorySize = 500; // 5 sec of 10 ms frames.
29 : static const double kInitialPriorProbability = 0.3;
30 : static const int kTransientWidthThreshold = 7;
31 : static const double kLowProbabilityThreshold = 0.2;
32 :
33 0 : static double LimitProbability(double p) {
34 0 : const double kLimHigh = 0.99;
35 0 : const double kLimLow = 0.01;
36 :
37 0 : if (p > kLimHigh)
38 0 : p = kLimHigh;
39 0 : else if (p < kLimLow)
40 0 : p = kLimLow;
41 0 : return p;
42 : }
43 :
44 0 : PitchBasedVad::PitchBasedVad()
45 : : p_prior_(kInitialPriorProbability),
46 0 : circular_buffer_(VadCircularBuffer::Create(kPosteriorHistorySize)) {
47 : // Setup noise GMM.
48 0 : noise_gmm_.dimension = kNoiseGmmDim;
49 0 : noise_gmm_.num_mixtures = kNoiseGmmNumMixtures;
50 0 : noise_gmm_.weight = kNoiseGmmWeights;
51 0 : noise_gmm_.mean = &kNoiseGmmMean[0][0];
52 0 : noise_gmm_.covar_inverse = &kNoiseGmmCovarInverse[0][0][0];
53 :
54 : // Setup voice GMM.
55 0 : voice_gmm_.dimension = kVoiceGmmDim;
56 0 : voice_gmm_.num_mixtures = kVoiceGmmNumMixtures;
57 0 : voice_gmm_.weight = kVoiceGmmWeights;
58 0 : voice_gmm_.mean = &kVoiceGmmMean[0][0];
59 0 : voice_gmm_.covar_inverse = &kVoiceGmmCovarInverse[0][0][0];
60 0 : }
61 :
62 0 : PitchBasedVad::~PitchBasedVad() {
63 0 : }
64 :
65 0 : int PitchBasedVad::VoicingProbability(const AudioFeatures& features,
66 : double* p_combined) {
67 : double p;
68 : double gmm_features[3];
69 : double pdf_features_given_voice;
70 : double pdf_features_given_noise;
71 : // These limits are the same in matlab implementation 'VoicingProbGMM().'
72 0 : const double kLimLowLogPitchGain = -2.0;
73 0 : const double kLimHighLogPitchGain = -0.9;
74 0 : const double kLimLowSpectralPeak = 200;
75 0 : const double kLimHighSpectralPeak = 2000;
76 0 : const double kEps = 1e-12;
77 0 : for (size_t n = 0; n < features.num_frames; n++) {
78 0 : gmm_features[0] = features.log_pitch_gain[n];
79 0 : gmm_features[1] = features.spectral_peak[n];
80 0 : gmm_features[2] = features.pitch_lag_hz[n];
81 :
82 0 : pdf_features_given_voice = EvaluateGmm(gmm_features, voice_gmm_);
83 0 : pdf_features_given_noise = EvaluateGmm(gmm_features, noise_gmm_);
84 :
85 0 : if (features.spectral_peak[n] < kLimLowSpectralPeak ||
86 0 : features.spectral_peak[n] > kLimHighSpectralPeak ||
87 0 : features.log_pitch_gain[n] < kLimLowLogPitchGain) {
88 0 : pdf_features_given_voice = kEps * pdf_features_given_noise;
89 0 : } else if (features.log_pitch_gain[n] > kLimHighLogPitchGain) {
90 0 : pdf_features_given_noise = kEps * pdf_features_given_voice;
91 : }
92 :
93 0 : p = p_prior_ * pdf_features_given_voice /
94 0 : (pdf_features_given_voice * p_prior_ +
95 0 : pdf_features_given_noise * (1 - p_prior_));
96 :
97 0 : p = LimitProbability(p);
98 :
99 : // Combine pitch-based probability with standalone probability, before
100 : // updating prior probabilities.
101 0 : double prod_active = p * p_combined[n];
102 0 : double prod_inactive = (1 - p) * (1 - p_combined[n]);
103 0 : p_combined[n] = prod_active / (prod_active + prod_inactive);
104 :
105 0 : if (UpdatePrior(p_combined[n]) < 0)
106 0 : return -1;
107 : // Limit prior probability. With a zero prior probability the posterior
108 : // probability is always zero.
109 0 : p_prior_ = LimitProbability(p_prior_);
110 : }
111 0 : return 0;
112 : }
113 :
114 0 : int PitchBasedVad::UpdatePrior(double p) {
115 0 : circular_buffer_->Insert(p);
116 0 : if (circular_buffer_->RemoveTransient(kTransientWidthThreshold,
117 : kLowProbabilityThreshold) < 0)
118 0 : return -1;
119 0 : p_prior_ = circular_buffer_->Mean();
120 0 : return 0;
121 : }
122 :
123 : } // namespace webrtc
|