LCOV - code coverage report
Current view: top level - media/webrtc/trunk/webrtc/common_audio/vad - vad_filterbank.c (source / functions) Hit Total Coverage
Test: output.info Lines: 0 106 0.0 %
Date: 2017-07-14 16:53:18 Functions: 0 5 0.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*
       2             :  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
       3             :  *
       4             :  *  Use of this source code is governed by a BSD-style license
       5             :  *  that can be found in the LICENSE file in the root of the source
       6             :  *  tree. An additional intellectual property rights grant can be found
       7             :  *  in the file PATENTS.  All contributing project authors may
       8             :  *  be found in the AUTHORS file in the root of the source tree.
       9             :  */
      10             : 
      11             : #include "webrtc/common_audio/vad/vad_filterbank.h"
      12             : 
      13             : #include "webrtc/base/checks.h"
      14             : #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
      15             : #include "webrtc/typedefs.h"
      16             : 
      17             : // Constants used in LogOfEnergy().
      18             : static const int16_t kLogConst = 24660;  // 160*log10(2) in Q9.
      19             : static const int16_t kLogEnergyIntPart = 14336;  // 14 in Q10
      20             : 
      21             : // Coefficients used by HighPassFilter, Q14.
      22             : static const int16_t kHpZeroCoefs[3] = { 6631, -13262, 6631 };
      23             : static const int16_t kHpPoleCoefs[3] = { 16384, -7756, 5620 };
      24             : 
      25             : // Allpass filter coefficients, upper and lower, in Q15.
      26             : // Upper: 0.64, Lower: 0.17
      27             : static const int16_t kAllPassCoefsQ15[2] = { 20972, 5571 };
      28             : 
      29             : // Adjustment for division with two in SplitFilter.
      30             : static const int16_t kOffsetVector[6] = { 368, 368, 272, 176, 176, 176 };
      31             : 
      32             : // High pass filtering, with a cut-off frequency at 80 Hz, if the |data_in| is
      33             : // sampled at 500 Hz.
      34             : //
      35             : // - data_in      [i]   : Input audio data sampled at 500 Hz.
      36             : // - data_length  [i]   : Length of input and output data.
      37             : // - filter_state [i/o] : State of the filter.
      38             : // - data_out     [o]   : Output audio data in the frequency interval
      39             : //                        80 - 250 Hz.
      40           0 : static void HighPassFilter(const int16_t* data_in, size_t data_length,
      41             :                            int16_t* filter_state, int16_t* data_out) {
      42             :   size_t i;
      43           0 :   const int16_t* in_ptr = data_in;
      44           0 :   int16_t* out_ptr = data_out;
      45           0 :   int32_t tmp32 = 0;
      46             : 
      47             : 
      48             :   // The sum of the absolute values of the impulse response:
      49             :   // The zero/pole-filter has a max amplification of a single sample of: 1.4546
      50             :   // Impulse response: 0.4047 -0.6179 -0.0266  0.1993  0.1035  -0.0194
      51             :   // The all-zero section has a max amplification of a single sample of: 1.6189
      52             :   // Impulse response: 0.4047 -0.8094  0.4047  0       0        0
      53             :   // The all-pole section has a max amplification of a single sample of: 1.9931
      54             :   // Impulse response: 1.0000  0.4734 -0.1189 -0.2187 -0.0627   0.04532
      55             : 
      56           0 :   for (i = 0; i < data_length; i++) {
      57             :     // All-zero section (filter coefficients in Q14).
      58           0 :     tmp32 = kHpZeroCoefs[0] * *in_ptr;
      59           0 :     tmp32 += kHpZeroCoefs[1] * filter_state[0];
      60           0 :     tmp32 += kHpZeroCoefs[2] * filter_state[1];
      61           0 :     filter_state[1] = filter_state[0];
      62           0 :     filter_state[0] = *in_ptr++;
      63             : 
      64             :     // All-pole section (filter coefficients in Q14).
      65           0 :     tmp32 -= kHpPoleCoefs[1] * filter_state[2];
      66           0 :     tmp32 -= kHpPoleCoefs[2] * filter_state[3];
      67           0 :     filter_state[3] = filter_state[2];
      68           0 :     filter_state[2] = (int16_t) (tmp32 >> 14);
      69           0 :     *out_ptr++ = filter_state[2];
      70             :   }
      71           0 : }
      72             : 
      73             : // All pass filtering of |data_in|, used before splitting the signal into two
      74             : // frequency bands (low pass vs high pass).
      75             : // Note that |data_in| and |data_out| can NOT correspond to the same address.
      76             : //
      77             : // - data_in            [i]   : Input audio signal given in Q0.
      78             : // - data_length        [i]   : Length of input and output data.
      79             : // - filter_coefficient [i]   : Given in Q15.
      80             : // - filter_state       [i/o] : State of the filter given in Q(-1).
      81             : // - data_out           [o]   : Output audio signal given in Q(-1).
      82           0 : static void AllPassFilter(const int16_t* data_in, size_t data_length,
      83             :                           int16_t filter_coefficient, int16_t* filter_state,
      84             :                           int16_t* data_out) {
      85             :   // The filter can only cause overflow (in the w16 output variable)
      86             :   // if more than 4 consecutive input numbers are of maximum value and
      87             :   // has the the same sign as the impulse responses first taps.
      88             :   // First 6 taps of the impulse response:
      89             :   // 0.6399 0.5905 -0.3779 0.2418 -0.1547 0.0990
      90             : 
      91             :   size_t i;
      92           0 :   int16_t tmp16 = 0;
      93           0 :   int32_t tmp32 = 0;
      94           0 :   int32_t state32 = ((int32_t) (*filter_state) << 16);  // Q15
      95             : 
      96           0 :   for (i = 0; i < data_length; i++) {
      97           0 :     tmp32 = state32 + filter_coefficient * *data_in;
      98           0 :     tmp16 = (int16_t) (tmp32 >> 16);  // Q(-1)
      99           0 :     *data_out++ = tmp16;
     100           0 :     state32 = (*data_in << 14) - filter_coefficient * tmp16;  // Q14
     101           0 :     state32 <<= 1;  // Q15.
     102           0 :     data_in += 2;
     103             :   }
     104             : 
     105           0 :   *filter_state = (int16_t) (state32 >> 16);  // Q(-1)
     106           0 : }
     107             : 
     108             : // Splits |data_in| into |hp_data_out| and |lp_data_out| corresponding to
     109             : // an upper (high pass) part and a lower (low pass) part respectively.
     110             : //
     111             : // - data_in      [i]   : Input audio data to be split into two frequency bands.
     112             : // - data_length  [i]   : Length of |data_in|.
     113             : // - upper_state  [i/o] : State of the upper filter, given in Q(-1).
     114             : // - lower_state  [i/o] : State of the lower filter, given in Q(-1).
     115             : // - hp_data_out  [o]   : Output audio data of the upper half of the spectrum.
     116             : //                        The length is |data_length| / 2.
     117             : // - lp_data_out  [o]   : Output audio data of the lower half of the spectrum.
     118             : //                        The length is |data_length| / 2.
     119           0 : static void SplitFilter(const int16_t* data_in, size_t data_length,
     120             :                         int16_t* upper_state, int16_t* lower_state,
     121             :                         int16_t* hp_data_out, int16_t* lp_data_out) {
     122             :   size_t i;
     123           0 :   size_t half_length = data_length >> 1;  // Downsampling by 2.
     124             :   int16_t tmp_out;
     125             : 
     126             :   // All-pass filtering upper branch.
     127           0 :   AllPassFilter(&data_in[0], half_length, kAllPassCoefsQ15[0], upper_state,
     128             :                 hp_data_out);
     129             : 
     130             :   // All-pass filtering lower branch.
     131           0 :   AllPassFilter(&data_in[1], half_length, kAllPassCoefsQ15[1], lower_state,
     132             :                 lp_data_out);
     133             : 
     134             :   // Make LP and HP signals.
     135           0 :   for (i = 0; i < half_length; i++) {
     136           0 :     tmp_out = *hp_data_out;
     137           0 :     *hp_data_out++ -= *lp_data_out;
     138           0 :     *lp_data_out++ += tmp_out;
     139             :   }
     140           0 : }
     141             : 
     142             : // Calculates the energy of |data_in| in dB, and also updates an overall
     143             : // |total_energy| if necessary.
     144             : //
     145             : // - data_in      [i]   : Input audio data for energy calculation.
     146             : // - data_length  [i]   : Length of input data.
     147             : // - offset       [i]   : Offset value added to |log_energy|.
     148             : // - total_energy [i/o] : An external energy updated with the energy of
     149             : //                        |data_in|.
     150             : //                        NOTE: |total_energy| is only updated if
     151             : //                        |total_energy| <= |kMinEnergy|.
     152             : // - log_energy   [o]   : 10 * log10("energy of |data_in|") given in Q4.
     153           0 : static void LogOfEnergy(const int16_t* data_in, size_t data_length,
     154             :                         int16_t offset, int16_t* total_energy,
     155             :                         int16_t* log_energy) {
     156             :   // |tot_rshifts| accumulates the number of right shifts performed on |energy|.
     157           0 :   int tot_rshifts = 0;
     158             :   // The |energy| will be normalized to 15 bits. We use unsigned integer because
     159             :   // we eventually will mask out the fractional part.
     160           0 :   uint32_t energy = 0;
     161             : 
     162           0 :   RTC_DCHECK(data_in);
     163           0 :   RTC_DCHECK_GT(data_length, 0);
     164             : 
     165           0 :   energy = (uint32_t) WebRtcSpl_Energy((int16_t*) data_in, data_length,
     166             :                                        &tot_rshifts);
     167             : 
     168           0 :   if (energy != 0) {
     169             :     // By construction, normalizing to 15 bits is equivalent with 17 leading
     170             :     // zeros of an unsigned 32 bit value.
     171           0 :     int normalizing_rshifts = 17 - WebRtcSpl_NormU32(energy);
     172             :     // In a 15 bit representation the leading bit is 2^14. log2(2^14) in Q10 is
     173             :     // (14 << 10), which is what we initialize |log2_energy| with. For a more
     174             :     // detailed derivations, see below.
     175           0 :     int16_t log2_energy = kLogEnergyIntPart;
     176             : 
     177           0 :     tot_rshifts += normalizing_rshifts;
     178             :     // Normalize |energy| to 15 bits.
     179             :     // |tot_rshifts| is now the total number of right shifts performed on
     180             :     // |energy| after normalization. This means that |energy| is in
     181             :     // Q(-tot_rshifts).
     182           0 :     if (normalizing_rshifts < 0) {
     183           0 :       energy <<= -normalizing_rshifts;
     184             :     } else {
     185           0 :       energy >>= normalizing_rshifts;
     186             :     }
     187             : 
     188             :     // Calculate the energy of |data_in| in dB, in Q4.
     189             :     //
     190             :     // 10 * log10("true energy") in Q4 = 2^4 * 10 * log10("true energy") =
     191             :     // 160 * log10(|energy| * 2^|tot_rshifts|) =
     192             :     // 160 * log10(2) * log2(|energy| * 2^|tot_rshifts|) =
     193             :     // 160 * log10(2) * (log2(|energy|) + log2(2^|tot_rshifts|)) =
     194             :     // (160 * log10(2)) * (log2(|energy|) + |tot_rshifts|) =
     195             :     // |kLogConst| * (|log2_energy| + |tot_rshifts|)
     196             :     //
     197             :     // We know by construction that |energy| is normalized to 15 bits. Hence,
     198             :     // |energy| = 2^14 + frac_Q15, where frac_Q15 is a fractional part in Q15.
     199             :     // Further, we'd like |log2_energy| in Q10
     200             :     // log2(|energy|) in Q10 = 2^10 * log2(2^14 + frac_Q15) =
     201             :     // 2^10 * log2(2^14 * (1 + frac_Q15 * 2^-14)) =
     202             :     // 2^10 * (14 + log2(1 + frac_Q15 * 2^-14)) ~=
     203             :     // (14 << 10) + 2^10 * (frac_Q15 * 2^-14) =
     204             :     // (14 << 10) + (frac_Q15 * 2^-4) = (14 << 10) + (frac_Q15 >> 4)
     205             :     //
     206             :     // Note that frac_Q15 = (|energy| & 0x00003FFF)
     207             : 
     208             :     // Calculate and add the fractional part to |log2_energy|.
     209           0 :     log2_energy += (int16_t) ((energy & 0x00003FFF) >> 4);
     210             : 
     211             :     // |kLogConst| is in Q9, |log2_energy| in Q10 and |tot_rshifts| in Q0.
     212             :     // Note that we in our derivation above have accounted for an output in Q4.
     213           0 :     *log_energy = (int16_t)(((kLogConst * log2_energy) >> 19) +
     214           0 :         ((tot_rshifts * kLogConst) >> 9));
     215             : 
     216           0 :     if (*log_energy < 0) {
     217           0 :       *log_energy = 0;
     218             :     }
     219             :   } else {
     220           0 :     *log_energy = offset;
     221           0 :     return;
     222             :   }
     223             : 
     224           0 :   *log_energy += offset;
     225             : 
     226             :   // Update the approximate |total_energy| with the energy of |data_in|, if
     227             :   // |total_energy| has not exceeded |kMinEnergy|. |total_energy| is used as an
     228             :   // energy indicator in WebRtcVad_GmmProbability() in vad_core.c.
     229           0 :   if (*total_energy <= kMinEnergy) {
     230           0 :     if (tot_rshifts >= 0) {
     231             :       // We know by construction that the |energy| > |kMinEnergy| in Q0, so add
     232             :       // an arbitrary value such that |total_energy| exceeds |kMinEnergy|.
     233           0 :       *total_energy += kMinEnergy + 1;
     234             :     } else {
     235             :       // By construction |energy| is represented by 15 bits, hence any number of
     236             :       // right shifted |energy| will fit in an int16_t. In addition, adding the
     237             :       // value to |total_energy| is wrap around safe as long as
     238             :       // |kMinEnergy| < 8192.
     239           0 :       *total_energy += (int16_t) (energy >> -tot_rshifts);  // Q0.
     240             :     }
     241             :   }
     242             : }
     243             : 
     244           0 : int16_t WebRtcVad_CalculateFeatures(VadInstT* self, const int16_t* data_in,
     245             :                                     size_t data_length, int16_t* features) {
     246           0 :   int16_t total_energy = 0;
     247             :   // We expect |data_length| to be 80, 160 or 240 samples, which corresponds to
     248             :   // 10, 20 or 30 ms in 8 kHz. Therefore, the intermediate downsampled data will
     249             :   // have at most 120 samples after the first split and at most 60 samples after
     250             :   // the second split.
     251             :   int16_t hp_120[120], lp_120[120];
     252             :   int16_t hp_60[60], lp_60[60];
     253           0 :   const size_t half_data_length = data_length >> 1;
     254           0 :   size_t length = half_data_length;  // |data_length| / 2, corresponds to
     255             :                                      // bandwidth = 2000 Hz after downsampling.
     256             : 
     257             :   // Initialize variables for the first SplitFilter().
     258           0 :   int frequency_band = 0;
     259           0 :   const int16_t* in_ptr = data_in;  // [0 - 4000] Hz.
     260           0 :   int16_t* hp_out_ptr = hp_120;  // [2000 - 4000] Hz.
     261           0 :   int16_t* lp_out_ptr = lp_120;  // [0 - 2000] Hz.
     262             : 
     263           0 :   RTC_DCHECK_LE(data_length, 240);
     264             :   RTC_DCHECK_LT(4, kNumChannels - 1);  // Checking maximum |frequency_band|.
     265             : 
     266             :   // Split at 2000 Hz and downsample.
     267           0 :   SplitFilter(in_ptr, data_length, &self->upper_state[frequency_band],
     268             :               &self->lower_state[frequency_band], hp_out_ptr, lp_out_ptr);
     269             : 
     270             :   // For the upper band (2000 Hz - 4000 Hz) split at 3000 Hz and downsample.
     271           0 :   frequency_band = 1;
     272           0 :   in_ptr = hp_120;  // [2000 - 4000] Hz.
     273           0 :   hp_out_ptr = hp_60;  // [3000 - 4000] Hz.
     274           0 :   lp_out_ptr = lp_60;  // [2000 - 3000] Hz.
     275           0 :   SplitFilter(in_ptr, length, &self->upper_state[frequency_band],
     276             :               &self->lower_state[frequency_band], hp_out_ptr, lp_out_ptr);
     277             : 
     278             :   // Energy in 3000 Hz - 4000 Hz.
     279           0 :   length >>= 1;  // |data_length| / 4 <=> bandwidth = 1000 Hz.
     280             : 
     281           0 :   LogOfEnergy(hp_60, length, kOffsetVector[5], &total_energy, &features[5]);
     282             : 
     283             :   // Energy in 2000 Hz - 3000 Hz.
     284           0 :   LogOfEnergy(lp_60, length, kOffsetVector[4], &total_energy, &features[4]);
     285             : 
     286             :   // For the lower band (0 Hz - 2000 Hz) split at 1000 Hz and downsample.
     287           0 :   frequency_band = 2;
     288           0 :   in_ptr = lp_120;  // [0 - 2000] Hz.
     289           0 :   hp_out_ptr = hp_60;  // [1000 - 2000] Hz.
     290           0 :   lp_out_ptr = lp_60;  // [0 - 1000] Hz.
     291           0 :   length = half_data_length;  // |data_length| / 2 <=> bandwidth = 2000 Hz.
     292           0 :   SplitFilter(in_ptr, length, &self->upper_state[frequency_band],
     293             :               &self->lower_state[frequency_band], hp_out_ptr, lp_out_ptr);
     294             : 
     295             :   // Energy in 1000 Hz - 2000 Hz.
     296           0 :   length >>= 1;  // |data_length| / 4 <=> bandwidth = 1000 Hz.
     297           0 :   LogOfEnergy(hp_60, length, kOffsetVector[3], &total_energy, &features[3]);
     298             : 
     299             :   // For the lower band (0 Hz - 1000 Hz) split at 500 Hz and downsample.
     300           0 :   frequency_band = 3;
     301           0 :   in_ptr = lp_60;  // [0 - 1000] Hz.
     302           0 :   hp_out_ptr = hp_120;  // [500 - 1000] Hz.
     303           0 :   lp_out_ptr = lp_120;  // [0 - 500] Hz.
     304           0 :   SplitFilter(in_ptr, length, &self->upper_state[frequency_band],
     305             :               &self->lower_state[frequency_band], hp_out_ptr, lp_out_ptr);
     306             : 
     307             :   // Energy in 500 Hz - 1000 Hz.
     308           0 :   length >>= 1;  // |data_length| / 8 <=> bandwidth = 500 Hz.
     309           0 :   LogOfEnergy(hp_120, length, kOffsetVector[2], &total_energy, &features[2]);
     310             : 
     311             :   // For the lower band (0 Hz - 500 Hz) split at 250 Hz and downsample.
     312           0 :   frequency_band = 4;
     313           0 :   in_ptr = lp_120;  // [0 - 500] Hz.
     314           0 :   hp_out_ptr = hp_60;  // [250 - 500] Hz.
     315           0 :   lp_out_ptr = lp_60;  // [0 - 250] Hz.
     316           0 :   SplitFilter(in_ptr, length, &self->upper_state[frequency_band],
     317             :               &self->lower_state[frequency_band], hp_out_ptr, lp_out_ptr);
     318             : 
     319             :   // Energy in 250 Hz - 500 Hz.
     320           0 :   length >>= 1;  // |data_length| / 16 <=> bandwidth = 250 Hz.
     321           0 :   LogOfEnergy(hp_60, length, kOffsetVector[1], &total_energy, &features[1]);
     322             : 
     323             :   // Remove 0 Hz - 80 Hz, by high pass filtering the lower band.
     324           0 :   HighPassFilter(lp_60, length, self->hp_filter_state, hp_120);
     325             : 
     326             :   // Energy in 80 Hz - 250 Hz.
     327           0 :   LogOfEnergy(hp_120, length, kOffsetVector[0], &total_energy, &features[0]);
     328             : 
     329           0 :   return total_energy;
     330             : }

Generated by: LCOV version 1.13