Line data Source code
1 : /*
2 : * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
3 : *
4 : * Use of this source code is governed by a BSD-style license
5 : * that can be found in the LICENSE file in the root of the source
6 : * tree. An additional intellectual property rights grant can be found
7 : * in the file PATENTS. All contributing project authors may
8 : * be found in the AUTHORS file in the root of the source tree.
9 : */
10 :
11 : #include "webrtc/modules/audio_processing/transient/transient_detector.h"
12 :
13 : #include <float.h>
14 : #include <math.h>
15 : #include <string.h>
16 :
17 : #include <algorithm>
18 :
19 : #include "webrtc/base/checks.h"
20 : #include "webrtc/modules/audio_processing/transient/common.h"
21 : #include "webrtc/modules/audio_processing/transient/daubechies_8_wavelet_coeffs.h"
22 : #include "webrtc/modules/audio_processing/transient/moving_moments.h"
23 : #include "webrtc/modules/audio_processing/transient/wpd_tree.h"
24 :
25 : namespace webrtc {
26 :
27 : static const int kTransientLengthMs = 30;
28 : static const int kChunksAtStartupLeftToDelete =
29 : kTransientLengthMs / ts::kChunkSizeMs;
30 : static const float kDetectThreshold = 16.f;
31 :
32 0 : TransientDetector::TransientDetector(int sample_rate_hz)
33 0 : : samples_per_chunk_(sample_rate_hz * ts::kChunkSizeMs / 1000),
34 : last_first_moment_(),
35 : last_second_moment_(),
36 : chunks_at_startup_left_to_delete_(kChunksAtStartupLeftToDelete),
37 : reference_energy_(1.f),
38 0 : using_reference_(false) {
39 0 : RTC_DCHECK(sample_rate_hz == ts::kSampleRate8kHz ||
40 : sample_rate_hz == ts::kSampleRate16kHz ||
41 : sample_rate_hz == ts::kSampleRate32kHz ||
42 0 : sample_rate_hz == ts::kSampleRate48kHz);
43 0 : int samples_per_transient = sample_rate_hz * kTransientLengthMs / 1000;
44 : // Adjustment to avoid data loss while downsampling, making
45 : // |samples_per_chunk_| and |samples_per_transient| always divisible by
46 : // |kLeaves|.
47 0 : samples_per_chunk_ -= samples_per_chunk_ % kLeaves;
48 0 : samples_per_transient -= samples_per_transient % kLeaves;
49 :
50 0 : tree_leaves_data_length_ = samples_per_chunk_ / kLeaves;
51 0 : wpd_tree_.reset(new WPDTree(samples_per_chunk_,
52 : kDaubechies8HighPassCoefficients,
53 : kDaubechies8LowPassCoefficients,
54 : kDaubechies8CoefficientsLength,
55 0 : kLevels));
56 0 : for (size_t i = 0; i < kLeaves; ++i) {
57 0 : moving_moments_[i].reset(
58 0 : new MovingMoments(samples_per_transient / kLeaves));
59 : }
60 :
61 0 : first_moments_.reset(new float[tree_leaves_data_length_]);
62 0 : second_moments_.reset(new float[tree_leaves_data_length_]);
63 :
64 0 : for (int i = 0; i < kChunksAtStartupLeftToDelete; ++i) {
65 0 : previous_results_.push_back(0.f);
66 : }
67 0 : }
68 :
69 0 : TransientDetector::~TransientDetector() {}
70 :
71 0 : float TransientDetector::Detect(const float* data,
72 : size_t data_length,
73 : const float* reference_data,
74 : size_t reference_length) {
75 0 : RTC_DCHECK(data);
76 0 : RTC_DCHECK_EQ(samples_per_chunk_, data_length);
77 :
78 : // TODO(aluebs): Check if these errors can logically happen and if not assert
79 : // on them.
80 0 : if (wpd_tree_->Update(data, samples_per_chunk_) != 0) {
81 0 : return -1.f;
82 : }
83 :
84 0 : float result = 0.f;
85 :
86 0 : for (size_t i = 0; i < kLeaves; ++i) {
87 0 : WPDNode* leaf = wpd_tree_->NodeAt(kLevels, i);
88 :
89 0 : moving_moments_[i]->CalculateMoments(leaf->data(),
90 : tree_leaves_data_length_,
91 : first_moments_.get(),
92 0 : second_moments_.get());
93 :
94 : // Add value delayed (Use the last moments from the last call to Detect).
95 0 : float unbiased_data = leaf->data()[0] - last_first_moment_[i];
96 0 : result +=
97 0 : unbiased_data * unbiased_data / (last_second_moment_[i] + FLT_MIN);
98 :
99 : // Add new values.
100 0 : for (size_t j = 1; j < tree_leaves_data_length_; ++j) {
101 0 : unbiased_data = leaf->data()[j] - first_moments_[j - 1];
102 0 : result +=
103 0 : unbiased_data * unbiased_data / (second_moments_[j - 1] + FLT_MIN);
104 : }
105 :
106 0 : last_first_moment_[i] = first_moments_[tree_leaves_data_length_ - 1];
107 0 : last_second_moment_[i] = second_moments_[tree_leaves_data_length_ - 1];
108 : }
109 :
110 0 : result /= tree_leaves_data_length_;
111 :
112 0 : result *= ReferenceDetectionValue(reference_data, reference_length);
113 :
114 0 : if (chunks_at_startup_left_to_delete_ > 0) {
115 0 : chunks_at_startup_left_to_delete_--;
116 0 : result = 0.f;
117 : }
118 :
119 0 : if (result >= kDetectThreshold) {
120 0 : result = 1.f;
121 : } else {
122 : // Get proportional value.
123 : // Proportion achieved with a squared raised cosine function with domain
124 : // [0, kDetectThreshold) and image [0, 1), it's always increasing.
125 0 : const float horizontal_scaling = ts::kPi / kDetectThreshold;
126 0 : const float kHorizontalShift = ts::kPi;
127 0 : const float kVerticalScaling = 0.5f;
128 0 : const float kVerticalShift = 1.f;
129 :
130 0 : result = (cos(result * horizontal_scaling + kHorizontalShift)
131 0 : + kVerticalShift) * kVerticalScaling;
132 0 : result *= result;
133 : }
134 :
135 0 : previous_results_.pop_front();
136 0 : previous_results_.push_back(result);
137 :
138 : // In the current implementation we return the max of the current result and
139 : // the previous results, so the high results have a width equals to
140 : // |transient_length|.
141 0 : return *std::max_element(previous_results_.begin(), previous_results_.end());
142 : }
143 :
144 : // Looks for the highest slope and compares it with the previous ones.
145 : // An exponential transformation takes this to the [0, 1] range. This value is
146 : // multiplied by the detection result to avoid false positives.
147 0 : float TransientDetector::ReferenceDetectionValue(const float* data,
148 : size_t length) {
149 0 : if (data == NULL) {
150 0 : using_reference_ = false;
151 0 : return 1.f;
152 : }
153 : static const float kEnergyRatioThreshold = 0.2f;
154 : static const float kReferenceNonLinearity = 20.f;
155 : static const float kMemory = 0.99f;
156 0 : float reference_energy = 0.f;
157 0 : for (size_t i = 1; i < length; ++i) {
158 0 : reference_energy += data[i] * data[i];
159 : }
160 0 : if (reference_energy == 0.f) {
161 0 : using_reference_ = false;
162 0 : return 1.f;
163 : }
164 0 : RTC_DCHECK_NE(0, reference_energy_);
165 0 : float result = 1.f / (1.f + exp(kReferenceNonLinearity *
166 0 : (kEnergyRatioThreshold -
167 0 : reference_energy / reference_energy_)));
168 0 : reference_energy_ =
169 0 : kMemory * reference_energy_ + (1.f - kMemory) * reference_energy;
170 :
171 0 : using_reference_ = true;
172 :
173 0 : return result;
174 : }
175 :
176 : } // namespace webrtc
|