LCOV - code coverage report
Current view: top level - media/webrtc/trunk/webrtc/modules/audio_processing/vad - pitch_based_vad.cc (source / functions) Hit Total Coverage
Test: output.info Lines: 0 58 0.0 %
Date: 2017-07-14 16:53:18 Functions: 0 5 0.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*
       2             :  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
       3             :  *
       4             :  *  Use of this source code is governed by a BSD-style license
       5             :  *  that can be found in the LICENSE file in the root of the source
       6             :  *  tree. An additional intellectual property rights grant can be found
       7             :  *  in the file PATENTS.  All contributing project authors may
       8             :  *  be found in the AUTHORS file in the root of the source tree.
       9             :  */
      10             : 
      11             : #include "webrtc/modules/audio_processing/vad/pitch_based_vad.h"
      12             : 
      13             : #include <math.h>
      14             : #include <string.h>
      15             : 
      16             : #include "webrtc/modules/audio_processing/vad/vad_circular_buffer.h"
      17             : #include "webrtc/modules/audio_processing/vad/common.h"
      18             : #include "webrtc/modules/audio_processing/vad/noise_gmm_tables.h"
      19             : #include "webrtc/modules/audio_processing/vad/voice_gmm_tables.h"
      20             : #include "webrtc/modules/include/module_common_types.h"
      21             : 
      22             : namespace webrtc {
      23             : 
      24             : static_assert(kNoiseGmmDim == kVoiceGmmDim,
      25             :               "noise and voice gmm dimension not equal");
      26             : 
      27             : // These values should match MATLAB counterparts for unit-tests to pass.
      28             : static const int kPosteriorHistorySize = 500;  // 5 sec of 10 ms frames.
      29             : static const double kInitialPriorProbability = 0.3;
      30             : static const int kTransientWidthThreshold = 7;
      31             : static const double kLowProbabilityThreshold = 0.2;
      32             : 
      33           0 : static double LimitProbability(double p) {
      34           0 :   const double kLimHigh = 0.99;
      35           0 :   const double kLimLow = 0.01;
      36             : 
      37           0 :   if (p > kLimHigh)
      38           0 :     p = kLimHigh;
      39           0 :   else if (p < kLimLow)
      40           0 :     p = kLimLow;
      41           0 :   return p;
      42             : }
      43             : 
      44           0 : PitchBasedVad::PitchBasedVad()
      45             :     : p_prior_(kInitialPriorProbability),
      46           0 :       circular_buffer_(VadCircularBuffer::Create(kPosteriorHistorySize)) {
      47             :   // Setup noise GMM.
      48           0 :   noise_gmm_.dimension = kNoiseGmmDim;
      49           0 :   noise_gmm_.num_mixtures = kNoiseGmmNumMixtures;
      50           0 :   noise_gmm_.weight = kNoiseGmmWeights;
      51           0 :   noise_gmm_.mean = &kNoiseGmmMean[0][0];
      52           0 :   noise_gmm_.covar_inverse = &kNoiseGmmCovarInverse[0][0][0];
      53             : 
      54             :   // Setup voice GMM.
      55           0 :   voice_gmm_.dimension = kVoiceGmmDim;
      56           0 :   voice_gmm_.num_mixtures = kVoiceGmmNumMixtures;
      57           0 :   voice_gmm_.weight = kVoiceGmmWeights;
      58           0 :   voice_gmm_.mean = &kVoiceGmmMean[0][0];
      59           0 :   voice_gmm_.covar_inverse = &kVoiceGmmCovarInverse[0][0][0];
      60           0 : }
      61             : 
      62           0 : PitchBasedVad::~PitchBasedVad() {
      63           0 : }
      64             : 
      65           0 : int PitchBasedVad::VoicingProbability(const AudioFeatures& features,
      66             :                                       double* p_combined) {
      67             :   double p;
      68             :   double gmm_features[3];
      69             :   double pdf_features_given_voice;
      70             :   double pdf_features_given_noise;
      71             :   // These limits are the same in matlab implementation 'VoicingProbGMM().'
      72           0 :   const double kLimLowLogPitchGain = -2.0;
      73           0 :   const double kLimHighLogPitchGain = -0.9;
      74           0 :   const double kLimLowSpectralPeak = 200;
      75           0 :   const double kLimHighSpectralPeak = 2000;
      76           0 :   const double kEps = 1e-12;
      77           0 :   for (size_t n = 0; n < features.num_frames; n++) {
      78           0 :     gmm_features[0] = features.log_pitch_gain[n];
      79           0 :     gmm_features[1] = features.spectral_peak[n];
      80           0 :     gmm_features[2] = features.pitch_lag_hz[n];
      81             : 
      82           0 :     pdf_features_given_voice = EvaluateGmm(gmm_features, voice_gmm_);
      83           0 :     pdf_features_given_noise = EvaluateGmm(gmm_features, noise_gmm_);
      84             : 
      85           0 :     if (features.spectral_peak[n] < kLimLowSpectralPeak ||
      86           0 :         features.spectral_peak[n] > kLimHighSpectralPeak ||
      87           0 :         features.log_pitch_gain[n] < kLimLowLogPitchGain) {
      88           0 :       pdf_features_given_voice = kEps * pdf_features_given_noise;
      89           0 :     } else if (features.log_pitch_gain[n] > kLimHighLogPitchGain) {
      90           0 :       pdf_features_given_noise = kEps * pdf_features_given_voice;
      91             :     }
      92             : 
      93           0 :     p = p_prior_ * pdf_features_given_voice /
      94           0 :         (pdf_features_given_voice * p_prior_ +
      95           0 :          pdf_features_given_noise * (1 - p_prior_));
      96             : 
      97           0 :     p = LimitProbability(p);
      98             : 
      99             :     // Combine pitch-based probability with standalone probability, before
     100             :     // updating prior probabilities.
     101           0 :     double prod_active = p * p_combined[n];
     102           0 :     double prod_inactive = (1 - p) * (1 - p_combined[n]);
     103           0 :     p_combined[n] = prod_active / (prod_active + prod_inactive);
     104             : 
     105           0 :     if (UpdatePrior(p_combined[n]) < 0)
     106           0 :       return -1;
     107             :     // Limit prior probability. With a zero prior probability the posterior
     108             :     // probability is always zero.
     109           0 :     p_prior_ = LimitProbability(p_prior_);
     110             :   }
     111           0 :   return 0;
     112             : }
     113             : 
     114           0 : int PitchBasedVad::UpdatePrior(double p) {
     115           0 :   circular_buffer_->Insert(p);
     116           0 :   if (circular_buffer_->RemoveTransient(kTransientWidthThreshold,
     117             :                                         kLowProbabilityThreshold) < 0)
     118           0 :     return -1;
     119           0 :   p_prior_ = circular_buffer_->Mean();
     120           0 :   return 0;
     121             : }
     122             : 
     123             : }  // namespace webrtc

Generated by: LCOV version 1.13