LCOV - code coverage report
Current view: top level - media/webrtc/trunk/webrtc/modules/audio_coding/neteq - time_stretch.cc (source / functions) Hit Total Coverage
Test: output.info Lines: 0 83 0.0 %
Date: 2017-07-14 16:53:18 Functions: 0 3 0.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*
       2             :  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
       3             :  *
       4             :  *  Use of this source code is governed by a BSD-style license
       5             :  *  that can be found in the LICENSE file in the root of the source
       6             :  *  tree. An additional intellectual property rights grant can be found
       7             :  *  in the file PATENTS.  All contributing project authors may
       8             :  *  be found in the AUTHORS file in the root of the source tree.
       9             :  */
      10             : 
      11             : #include "webrtc/modules/audio_coding/neteq/time_stretch.h"
      12             : 
      13             : #include <algorithm>  // min, max
      14             : #include <memory>
      15             : 
      16             : #include "webrtc/base/safe_conversions.h"
      17             : #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
      18             : #include "webrtc/modules/audio_coding/neteq/background_noise.h"
      19             : #include "webrtc/modules/audio_coding/neteq/cross_correlation.h"
      20             : #include "webrtc/modules/audio_coding/neteq/dsp_helper.h"
      21             : 
      22             : namespace webrtc {
      23             : 
      24           0 : TimeStretch::ReturnCodes TimeStretch::Process(const int16_t* input,
      25             :                                               size_t input_len,
      26             :                                               bool fast_mode,
      27             :                                               AudioMultiVector* output,
      28             :                                               size_t* length_change_samples) {
      29             :   // Pre-calculate common multiplication with |fs_mult_|.
      30             :   size_t fs_mult_120 =
      31           0 :       static_cast<size_t>(fs_mult_ * 120);  // Corresponds to 15 ms.
      32             : 
      33             :   const int16_t* signal;
      34           0 :   std::unique_ptr<int16_t[]> signal_array;
      35             :   size_t signal_len;
      36           0 :   if (num_channels_ == 1) {
      37           0 :     signal = input;
      38           0 :     signal_len = input_len;
      39             :   } else {
      40             :     // We want |signal| to be only the first channel of |input|, which is
      41             :     // interleaved. Thus, we take the first sample, skip forward |num_channels|
      42             :     // samples, and continue like that.
      43           0 :     signal_len = input_len / num_channels_;
      44           0 :     signal_array.reset(new int16_t[signal_len]);
      45           0 :     signal = signal_array.get();
      46           0 :     size_t j = master_channel_;
      47           0 :     for (size_t i = 0; i < signal_len; ++i) {
      48           0 :       signal_array[i] = input[j];
      49           0 :       j += num_channels_;
      50             :     }
      51             :   }
      52             : 
      53             :   // Find maximum absolute value of input signal.
      54           0 :   max_input_value_ = WebRtcSpl_MaxAbsValueW16(signal, signal_len);
      55             : 
      56             :   // Downsample to 4 kHz sample rate and calculate auto-correlation.
      57           0 :   DspHelper::DownsampleTo4kHz(signal, signal_len, kDownsampledLen,
      58           0 :                               sample_rate_hz_, true /* compensate delay*/,
      59           0 :                               downsampled_input_);
      60           0 :   AutoCorrelation();
      61             : 
      62             :   // Find the strongest correlation peak.
      63             :   static const size_t kNumPeaks = 1;
      64             :   size_t peak_index;
      65             :   int16_t peak_value;
      66           0 :   DspHelper::PeakDetection(auto_correlation_, kCorrelationLen, kNumPeaks,
      67           0 :                            fs_mult_, &peak_index, &peak_value);
      68             :   // Assert that |peak_index| stays within boundaries.
      69           0 :   assert(peak_index <= (2 * kCorrelationLen - 1) * fs_mult_);
      70             : 
      71             :   // Compensate peak_index for displaced starting position. The displacement
      72             :   // happens in AutoCorrelation(). Here, |kMinLag| is in the down-sampled 4 kHz
      73             :   // domain, while the |peak_index| is in the original sample rate; hence, the
      74             :   // multiplication by fs_mult_ * 2.
      75           0 :   peak_index += kMinLag * fs_mult_ * 2;
      76             :   // Assert that |peak_index| stays within boundaries.
      77           0 :   assert(peak_index >= static_cast<size_t>(20 * fs_mult_));
      78           0 :   assert(peak_index <= 20 * fs_mult_ + (2 * kCorrelationLen - 1) * fs_mult_);
      79             : 
      80             :   // Calculate scaling to ensure that |peak_index| samples can be square-summed
      81             :   // without overflowing.
      82           0 :   int scaling = 31 - WebRtcSpl_NormW32(max_input_value_ * max_input_value_) -
      83           0 :       WebRtcSpl_NormW32(static_cast<int32_t>(peak_index));
      84           0 :   scaling = std::max(0, scaling);
      85             : 
      86             :   // |vec1| starts at 15 ms minus one pitch period.
      87           0 :   const int16_t* vec1 = &signal[fs_mult_120 - peak_index];
      88             :   // |vec2| start at 15 ms.
      89           0 :   const int16_t* vec2 = &signal[fs_mult_120];
      90             :   // Calculate energies for |vec1| and |vec2|, assuming they both contain
      91             :   // |peak_index| samples.
      92             :   int32_t vec1_energy =
      93           0 :       WebRtcSpl_DotProductWithScale(vec1, vec1, peak_index, scaling);
      94             :   int32_t vec2_energy =
      95           0 :       WebRtcSpl_DotProductWithScale(vec2, vec2, peak_index, scaling);
      96             : 
      97             :   // Calculate cross-correlation between |vec1| and |vec2|.
      98             :   int32_t cross_corr =
      99           0 :       WebRtcSpl_DotProductWithScale(vec1, vec2, peak_index, scaling);
     100             : 
     101             :   // Check if the signal seems to be active speech or not (simple VAD).
     102           0 :   bool active_speech = SpeechDetection(vec1_energy, vec2_energy, peak_index,
     103           0 :                                        scaling);
     104             : 
     105             :   int16_t best_correlation;
     106           0 :   if (!active_speech) {
     107           0 :     SetParametersForPassiveSpeech(signal_len, &best_correlation, &peak_index);
     108             :   } else {
     109             :     // Calculate correlation:
     110             :     // cross_corr / sqrt(vec1_energy * vec2_energy).
     111             : 
     112             :     // Start with calculating scale values.
     113           0 :     int energy1_scale = std::max(0, 16 - WebRtcSpl_NormW32(vec1_energy));
     114           0 :     int energy2_scale = std::max(0, 16 - WebRtcSpl_NormW32(vec2_energy));
     115             : 
     116             :     // Make sure total scaling is even (to simplify scale factor after sqrt).
     117           0 :     if ((energy1_scale + energy2_scale) & 1) {
     118             :       // The sum is odd.
     119           0 :       energy1_scale += 1;
     120             :     }
     121             : 
     122             :     // Scale energies to int16_t.
     123             :     int16_t vec1_energy_int16 =
     124           0 :         static_cast<int16_t>(vec1_energy >> energy1_scale);
     125             :     int16_t vec2_energy_int16 =
     126           0 :         static_cast<int16_t>(vec2_energy >> energy2_scale);
     127             : 
     128             :     // Calculate square-root of energy product.
     129           0 :     int16_t sqrt_energy_prod = WebRtcSpl_SqrtFloor(vec1_energy_int16 *
     130           0 :                                                    vec2_energy_int16);
     131             : 
     132             :     // Calculate cross_corr / sqrt(en1*en2) in Q14.
     133           0 :     int temp_scale = 14 - (energy1_scale + energy2_scale) / 2;
     134           0 :     cross_corr = WEBRTC_SPL_SHIFT_W32(cross_corr, temp_scale);
     135           0 :     cross_corr = std::max(0, cross_corr);  // Don't use if negative.
     136           0 :     best_correlation = WebRtcSpl_DivW32W16(cross_corr, sqrt_energy_prod);
     137             :     // Make sure |best_correlation| is no larger than 1 in Q14.
     138           0 :     best_correlation = std::min(static_cast<int16_t>(16384), best_correlation);
     139             :   }
     140             : 
     141             : 
     142             :   // Check accelerate criteria and stretch the signal.
     143             :   ReturnCodes return_value =
     144           0 :       CheckCriteriaAndStretch(input, input_len, peak_index, best_correlation,
     145           0 :                               active_speech, fast_mode, output);
     146           0 :   switch (return_value) {
     147             :     case kSuccess:
     148           0 :       *length_change_samples = peak_index;
     149           0 :       break;
     150             :     case kSuccessLowEnergy:
     151           0 :       *length_change_samples = peak_index;
     152           0 :       break;
     153             :     case kNoStretch:
     154             :     case kError:
     155           0 :       *length_change_samples = 0;
     156           0 :       break;
     157             :   }
     158           0 :   return return_value;
     159             : }
     160             : 
     161           0 : void TimeStretch::AutoCorrelation() {
     162             :   // Calculate correlation from lag kMinLag to lag kMaxLag in 4 kHz domain.
     163             :   int32_t auto_corr[kCorrelationLen];
     164             :   CrossCorrelationWithAutoShift(
     165           0 :       &downsampled_input_[kMaxLag], &downsampled_input_[kMaxLag - kMinLag],
     166           0 :       kCorrelationLen, kMaxLag - kMinLag, -1, auto_corr);
     167             : 
     168             :   // Normalize correlation to 14 bits and write to |auto_correlation_|.
     169           0 :   int32_t max_corr = WebRtcSpl_MaxAbsValueW32(auto_corr, kCorrelationLen);
     170           0 :   int scaling = std::max(0, 17 - WebRtcSpl_NormW32(max_corr));
     171           0 :   WebRtcSpl_VectorBitShiftW32ToW16(auto_correlation_, kCorrelationLen,
     172           0 :                                    auto_corr, scaling);
     173           0 : }
     174             : 
     175           0 : bool TimeStretch::SpeechDetection(int32_t vec1_energy, int32_t vec2_energy,
     176             :                                   size_t peak_index, int scaling) const {
     177             :   // Check if the signal seems to be active speech or not (simple VAD).
     178             :   // If (vec1_energy + vec2_energy) / (2 * peak_index) <=
     179             :   // 8 * background_noise_energy, then we say that the signal contains no
     180             :   // active speech.
     181             :   // Rewrite the inequality as:
     182             :   // (vec1_energy + vec2_energy) / 16 <= peak_index * background_noise_energy.
     183             :   // The two sides of the inequality will be denoted |left_side| and
     184             :   // |right_side|.
     185           0 :   int32_t left_side = (vec1_energy + vec2_energy) / 16;
     186             :   int32_t right_side;
     187           0 :   if (background_noise_.initialized()) {
     188           0 :     right_side = background_noise_.Energy(master_channel_);
     189             :   } else {
     190             :     // If noise parameters have not been estimated, use a fixed threshold.
     191           0 :     right_side = 75000;
     192             :   }
     193           0 :   int right_scale = 16 - WebRtcSpl_NormW32(right_side);
     194           0 :   right_scale = std::max(0, right_scale);
     195           0 :   left_side = left_side >> right_scale;
     196           0 :   right_side =
     197           0 :       rtc::checked_cast<int32_t>(peak_index) * (right_side >> right_scale);
     198             : 
     199             :   // Scale |left_side| properly before comparing with |right_side|.
     200             :   // (|scaling| is the scale factor before energy calculation, thus the scale
     201             :   // factor for the energy is 2 * scaling.)
     202           0 :   if (WebRtcSpl_NormW32(left_side) < 2 * scaling) {
     203             :     // Cannot scale only |left_side|, must scale |right_side| too.
     204           0 :     int temp_scale = WebRtcSpl_NormW32(left_side);
     205           0 :     left_side = left_side << temp_scale;
     206           0 :     right_side = right_side >> (2 * scaling - temp_scale);
     207             :   } else {
     208           0 :     left_side = left_side << 2 * scaling;
     209             :   }
     210           0 :   return left_side > right_side;
     211             : }
     212             : 
     213             : }  // namespace webrtc

Generated by: LCOV version 1.13