Line data Source code
1 : /*
2 : * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 : *
4 : * Use of this source code is governed by a BSD-style license
5 : * that can be found in the LICENSE file in the root of the source
6 : * tree. An additional intellectual property rights grant can be found
7 : * in the file PATENTS. All contributing project authors may
8 : * be found in the AUTHORS file in the root of the source tree.
9 : */
10 :
11 : #include "webrtc/modules/audio_processing/agc/loudness_histogram.h"
12 :
13 : #include <cmath>
14 : #include <cstring>
15 :
16 : #include "webrtc/base/checks.h"
17 : #include "webrtc/modules/include/module_common_types.h"
18 :
19 : namespace webrtc {
20 :
21 : static const double kHistBinCenters[] = {
22 : 7.59621091765857e-02, 9.02036021061016e-02, 1.07115112009343e-01,
23 : 1.27197217770508e-01, 1.51044347572047e-01, 1.79362373905283e-01,
24 : 2.12989507320644e-01, 2.52921107370304e-01, 3.00339145144454e-01,
25 : 3.56647189489147e-01, 4.23511952494003e-01, 5.02912623991786e-01,
26 : 5.97199455365749e-01, 7.09163326739184e-01, 8.42118356728544e-01,
27 : 1.00000000000000e+00, 1.18748153630660e+00, 1.41011239906908e+00,
28 : 1.67448243801153e+00, 1.98841697800836e+00, 2.36120844786349e+00,
29 : 2.80389143520905e+00, 3.32956930911896e+00, 3.95380207843188e+00,
30 : 4.69506696634852e+00, 5.57530533426190e+00, 6.62057214370769e+00,
31 : 7.86180718043869e+00, 9.33575086877358e+00, 1.10860317842269e+01,
32 : 1.31644580546776e+01, 1.56325508754123e+01, 1.85633655299256e+01,
33 : 2.20436538184971e+01, 2.61764319021997e+01, 3.10840295702492e+01,
34 : 3.69117111886792e+01, 4.38319755100383e+01, 5.20496616180135e+01,
35 : 6.18080121423973e+01, 7.33958732149108e+01, 8.71562442838066e+01,
36 : 1.03496430860848e+02, 1.22900100720889e+02, 1.45941600416277e+02,
37 : 1.73302955873365e+02, 2.05794060286978e+02, 2.44376646872353e+02,
38 : 2.90192756065437e+02, 3.44598539797631e+02, 4.09204403447902e+02,
39 : 4.85922673669740e+02, 5.77024203055553e+02, 6.85205587130498e+02,
40 : 8.13668983291589e+02, 9.66216894324125e+02, 1.14736472207740e+03,
41 : 1.36247442287647e+03, 1.61791322085579e+03, 1.92124207711260e+03,
42 : 2.28143949334655e+03, 2.70916727454970e+03, 3.21708611729384e+03,
43 : 3.82023036499473e+03, 4.53645302286906e+03, 5.38695420497926e+03,
44 : 6.39690865534207e+03, 7.59621091765857e+03, 9.02036021061016e+03,
45 : 1.07115112009343e+04, 1.27197217770508e+04, 1.51044347572047e+04,
46 : 1.79362373905283e+04, 2.12989507320644e+04, 2.52921107370304e+04,
47 : 3.00339145144454e+04, 3.56647189489147e+04};
48 :
49 : static const double kProbQDomain = 1024.0;
50 : // Loudness of -15 dB (smallest expected loudness) in log domain,
51 : // loudness_db = 13.5 * log10(rms);
52 : static const double kLogDomainMinBinCenter = -2.57752062648587;
53 : // Loudness step of 1 dB in log domain
54 : static const double kLogDomainStepSizeInverse = 5.81954605750359;
55 :
56 : static const int kTransientWidthThreshold = 7;
57 : static const double kLowProbabilityThreshold = 0.2;
58 :
59 : static const int kLowProbThresholdQ10 =
60 : static_cast<int>(kLowProbabilityThreshold * kProbQDomain);
61 :
62 0 : LoudnessHistogram::LoudnessHistogram()
63 : : num_updates_(0),
64 : audio_content_q10_(0),
65 : bin_count_q10_(),
66 : activity_probability_(),
67 : hist_bin_index_(),
68 : buffer_index_(0),
69 : buffer_is_full_(false),
70 : len_circular_buffer_(0),
71 0 : len_high_activity_(0) {
72 : static_assert(
73 : kHistSize == sizeof(kHistBinCenters) / sizeof(kHistBinCenters[0]),
74 : "histogram bin centers incorrect size");
75 0 : }
76 :
77 0 : LoudnessHistogram::LoudnessHistogram(int window_size)
78 : : num_updates_(0),
79 : audio_content_q10_(0),
80 : bin_count_q10_(),
81 0 : activity_probability_(new int[window_size]),
82 0 : hist_bin_index_(new int[window_size]),
83 : buffer_index_(0),
84 : buffer_is_full_(false),
85 : len_circular_buffer_(window_size),
86 0 : len_high_activity_(0) {}
87 :
88 0 : LoudnessHistogram::~LoudnessHistogram() {}
89 :
90 0 : void LoudnessHistogram::Update(double rms, double activity_probaility) {
91 : // If circular histogram is activated then remove the oldest entry.
92 0 : if (len_circular_buffer_ > 0)
93 0 : RemoveOldestEntryAndUpdate();
94 :
95 : // Find the corresponding bin.
96 0 : int hist_index = GetBinIndex(rms);
97 : // To Q10 domain.
98 : int prob_q10 =
99 0 : static_cast<int16_t>(floor(activity_probaility * kProbQDomain));
100 0 : InsertNewestEntryAndUpdate(prob_q10, hist_index);
101 0 : }
102 :
103 : // Doing nothing if buffer is not full, yet.
104 0 : void LoudnessHistogram::RemoveOldestEntryAndUpdate() {
105 0 : RTC_DCHECK_GT(len_circular_buffer_, 0);
106 : // Do nothing if circular buffer is not full.
107 0 : if (!buffer_is_full_)
108 0 : return;
109 :
110 0 : int oldest_prob = activity_probability_[buffer_index_];
111 0 : int oldest_hist_index = hist_bin_index_[buffer_index_];
112 0 : UpdateHist(-oldest_prob, oldest_hist_index);
113 : }
114 :
115 0 : void LoudnessHistogram::RemoveTransient() {
116 : // Don't expect to be here if high-activity region is longer than
117 : // |kTransientWidthThreshold| or there has not been any transient.
118 0 : RTC_DCHECK_LE(len_high_activity_, kTransientWidthThreshold);
119 : int index =
120 0 : (buffer_index_ > 0) ? (buffer_index_ - 1) : len_circular_buffer_ - 1;
121 0 : while (len_high_activity_ > 0) {
122 0 : UpdateHist(-activity_probability_[index], hist_bin_index_[index]);
123 0 : activity_probability_[index] = 0;
124 0 : index = (index > 0) ? (index - 1) : (len_circular_buffer_ - 1);
125 0 : len_high_activity_--;
126 : }
127 0 : }
128 :
129 0 : void LoudnessHistogram::InsertNewestEntryAndUpdate(int activity_prob_q10,
130 : int hist_index) {
131 : // Update the circular buffer if it is enabled.
132 0 : if (len_circular_buffer_ > 0) {
133 : // Removing transient.
134 0 : if (activity_prob_q10 <= kLowProbThresholdQ10) {
135 : // Lower than threshold probability, set it to zero.
136 0 : activity_prob_q10 = 0;
137 : // Check if this has been a transient.
138 0 : if (len_high_activity_ <= kTransientWidthThreshold)
139 0 : RemoveTransient(); // Remove this transient.
140 0 : len_high_activity_ = 0;
141 0 : } else if (len_high_activity_ <= kTransientWidthThreshold) {
142 0 : len_high_activity_++;
143 : }
144 : // Updating the circular buffer.
145 0 : activity_probability_[buffer_index_] = activity_prob_q10;
146 0 : hist_bin_index_[buffer_index_] = hist_index;
147 : // Increment the buffer index and check for wrap-around.
148 0 : buffer_index_++;
149 0 : if (buffer_index_ >= len_circular_buffer_) {
150 0 : buffer_index_ = 0;
151 0 : buffer_is_full_ = true;
152 : }
153 : }
154 :
155 0 : num_updates_++;
156 0 : if (num_updates_ < 0)
157 0 : num_updates_--;
158 :
159 0 : UpdateHist(activity_prob_q10, hist_index);
160 0 : }
161 :
162 0 : void LoudnessHistogram::UpdateHist(int activity_prob_q10, int hist_index) {
163 0 : bin_count_q10_[hist_index] += activity_prob_q10;
164 0 : audio_content_q10_ += activity_prob_q10;
165 0 : }
166 :
167 0 : double LoudnessHistogram::AudioContent() const {
168 0 : return audio_content_q10_ / kProbQDomain;
169 : }
170 :
171 0 : LoudnessHistogram* LoudnessHistogram::Create() {
172 0 : return new LoudnessHistogram;
173 : }
174 :
175 0 : LoudnessHistogram* LoudnessHistogram::Create(int window_size) {
176 0 : if (window_size < 0)
177 0 : return NULL;
178 0 : return new LoudnessHistogram(window_size);
179 : }
180 :
181 0 : void LoudnessHistogram::Reset() {
182 : // Reset the histogram, audio-content and number of updates.
183 0 : memset(bin_count_q10_, 0, sizeof(bin_count_q10_));
184 0 : audio_content_q10_ = 0;
185 0 : num_updates_ = 0;
186 : // Empty the circular buffer.
187 0 : buffer_index_ = 0;
188 0 : buffer_is_full_ = false;
189 0 : len_high_activity_ = 0;
190 0 : }
191 :
192 0 : int LoudnessHistogram::GetBinIndex(double rms) {
193 : // First exclude overload cases.
194 0 : if (rms <= kHistBinCenters[0]) {
195 0 : return 0;
196 0 : } else if (rms >= kHistBinCenters[kHistSize - 1]) {
197 0 : return kHistSize - 1;
198 : } else {
199 : // The quantizer is uniform in log domain. Alternatively we could do binary
200 : // search in linear domain.
201 0 : double rms_log = log(rms);
202 :
203 : int index = static_cast<int>(
204 0 : floor((rms_log - kLogDomainMinBinCenter) * kLogDomainStepSizeInverse));
205 : // The final decision is in linear domain.
206 0 : double b = 0.5 * (kHistBinCenters[index] + kHistBinCenters[index + 1]);
207 0 : if (rms > b) {
208 0 : return index + 1;
209 : }
210 0 : return index;
211 : }
212 : }
213 :
214 0 : double LoudnessHistogram::CurrentRms() const {
215 : double p;
216 0 : double mean_val = 0;
217 0 : if (audio_content_q10_ > 0) {
218 0 : double p_total_inverse = 1. / static_cast<double>(audio_content_q10_);
219 0 : for (int n = 0; n < kHistSize; n++) {
220 0 : p = static_cast<double>(bin_count_q10_[n]) * p_total_inverse;
221 0 : mean_val += p * kHistBinCenters[n];
222 : }
223 : } else {
224 0 : mean_val = kHistBinCenters[0];
225 : }
226 0 : return mean_val;
227 : }
228 :
229 : } // namespace webrtc
|