Line data Source code
1 : /*
2 : * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
3 : *
4 : * Use of this source code is governed by a BSD-style license
5 : * that can be found in the LICENSE file in the root of the source
6 : * tree. An additional intellectual property rights grant can be found
7 : * in the file PATENTS. All contributing project authors may
8 : * be found in the AUTHORS file in the root of the source tree.
9 : */
10 :
11 : #include "webrtc/modules/audio_processing/level_controller/level_controller.h"
12 :
13 : #include <math.h>
14 : #include <algorithm>
15 : #include <numeric>
16 :
17 : #include "webrtc/base/array_view.h"
18 : #include "webrtc/base/arraysize.h"
19 : #include "webrtc/base/checks.h"
20 : #include "webrtc/modules/audio_processing/audio_buffer.h"
21 : #include "webrtc/modules/audio_processing/level_controller/gain_applier.h"
22 : #include "webrtc/modules/audio_processing/level_controller/gain_selector.h"
23 : #include "webrtc/modules/audio_processing/level_controller/noise_level_estimator.h"
24 : #include "webrtc/modules/audio_processing/level_controller/peak_level_estimator.h"
25 : #include "webrtc/modules/audio_processing/level_controller/saturating_gain_estimator.h"
26 : #include "webrtc/modules/audio_processing/level_controller/signal_classifier.h"
27 : #include "webrtc/modules/audio_processing/logging/apm_data_dumper.h"
28 : #include "webrtc/system_wrappers/include/logging.h"
29 : #include "webrtc/system_wrappers/include/metrics.h"
30 :
31 : namespace webrtc {
32 : namespace {
33 :
34 0 : void UpdateAndRemoveDcLevel(float forgetting_factor,
35 : float* dc_level,
36 : rtc::ArrayView<float> x) {
37 0 : RTC_DCHECK(!x.empty());
38 : float mean =
39 0 : std::accumulate(x.begin(), x.end(), 0.0f) / static_cast<float>(x.size());
40 0 : *dc_level += forgetting_factor * (mean - *dc_level);
41 :
42 0 : for (float& v : x) {
43 0 : v -= *dc_level;
44 : }
45 0 : }
46 :
47 0 : float FrameEnergy(const AudioBuffer& audio) {
48 0 : float energy = 0.f;
49 0 : for (size_t k = 0; k < audio.num_channels(); ++k) {
50 : float channel_energy =
51 0 : std::accumulate(audio.channels_const_f()[k],
52 0 : audio.channels_const_f()[k] + audio.num_frames(), 0,
53 0 : [](float a, float b) -> float { return a + b * b; });
54 0 : energy = std::max(channel_energy, energy);
55 : }
56 0 : return energy;
57 : }
58 :
59 0 : float PeakLevel(const AudioBuffer& audio) {
60 0 : float peak_level = 0.f;
61 0 : for (size_t k = 0; k < audio.num_channels(); ++k) {
62 0 : auto channel_peak_level = std::max_element(
63 0 : audio.channels_const_f()[k],
64 0 : audio.channels_const_f()[k] + audio.num_frames(),
65 0 : [](float a, float b) { return std::abs(a) < std::abs(b); });
66 0 : peak_level = std::max(*channel_peak_level, peak_level);
67 : }
68 0 : return peak_level;
69 : }
70 :
71 : const int kMetricsFrameInterval = 1000;
72 :
73 : } // namespace
74 :
75 : int LevelController::instance_count_ = 0;
76 :
77 0 : void LevelController::Metrics::Initialize(int sample_rate_hz) {
78 0 : RTC_DCHECK(sample_rate_hz == AudioProcessing::kSampleRate8kHz ||
79 : sample_rate_hz == AudioProcessing::kSampleRate16kHz ||
80 : sample_rate_hz == AudioProcessing::kSampleRate32kHz ||
81 0 : sample_rate_hz == AudioProcessing::kSampleRate48kHz);
82 :
83 0 : Reset();
84 0 : frame_length_ = rtc::CheckedDivExact(sample_rate_hz, 100);
85 0 : }
86 :
87 0 : void LevelController::Metrics::Reset() {
88 0 : metrics_frame_counter_ = 0;
89 0 : gain_sum_ = 0.f;
90 0 : peak_level_sum_ = 0.f;
91 0 : noise_energy_sum_ = 0.f;
92 0 : max_gain_ = 0.f;
93 0 : max_peak_level_ = 0.f;
94 0 : max_noise_energy_ = 0.f;
95 0 : }
96 :
97 0 : void LevelController::Metrics::Update(float long_term_peak_level,
98 : float noise_energy,
99 : float gain,
100 : float frame_peak_level) {
101 0 : const float kdBFSOffset = 90.3090f;
102 0 : gain_sum_ += gain;
103 0 : peak_level_sum_ += long_term_peak_level;
104 0 : noise_energy_sum_ += noise_energy;
105 0 : max_gain_ = std::max(max_gain_, gain);
106 0 : max_peak_level_ = std::max(max_peak_level_, long_term_peak_level);
107 0 : max_noise_energy_ = std::max(max_noise_energy_, noise_energy);
108 :
109 0 : ++metrics_frame_counter_;
110 0 : if (metrics_frame_counter_ == kMetricsFrameInterval) {
111 0 : RTC_DCHECK_LT(0, frame_length_);
112 0 : RTC_DCHECK_LT(0, kMetricsFrameInterval);
113 :
114 : const int max_noise_power_dbfs = static_cast<int>(
115 0 : 10 * log10(max_noise_energy_ / frame_length_ + 1e-10f) - kdBFSOffset);
116 0 : RTC_HISTOGRAM_COUNTS("WebRTC.Audio.LevelControl.MaxNoisePower",
117 : max_noise_power_dbfs, -90, 0, 50);
118 :
119 : const int average_noise_power_dbfs = static_cast<int>(
120 0 : 10 * log10(noise_energy_sum_ / (frame_length_ * kMetricsFrameInterval) +
121 0 : 1e-10f) -
122 0 : kdBFSOffset);
123 0 : RTC_HISTOGRAM_COUNTS("WebRTC.Audio.LevelControl.AverageNoisePower",
124 : average_noise_power_dbfs, -90, 0, 50);
125 :
126 : const int max_peak_level_dbfs = static_cast<int>(
127 0 : 10 * log10(max_peak_level_ * max_peak_level_ + 1e-10f) - kdBFSOffset);
128 0 : RTC_HISTOGRAM_COUNTS("WebRTC.Audio.LevelControl.MaxPeakLevel",
129 : max_peak_level_dbfs, -90, 0, 50);
130 :
131 : const int average_peak_level_dbfs = static_cast<int>(
132 0 : 10 * log10(peak_level_sum_ * peak_level_sum_ /
133 : (kMetricsFrameInterval * kMetricsFrameInterval) +
134 0 : 1e-10f) -
135 0 : kdBFSOffset);
136 0 : RTC_HISTOGRAM_COUNTS("WebRTC.Audio.LevelControl.AveragePeakLevel",
137 : average_peak_level_dbfs, -90, 0, 50);
138 :
139 0 : RTC_DCHECK_LE(1.f, max_gain_);
140 0 : RTC_DCHECK_LE(1.f, gain_sum_ / kMetricsFrameInterval);
141 :
142 0 : const int max_gain_db = static_cast<int>(10 * log10(max_gain_ * max_gain_));
143 0 : RTC_HISTOGRAM_COUNTS("WebRTC.Audio.LevelControl.MaxGain", max_gain_db, 0,
144 : 33, 30);
145 :
146 : const int average_gain_db = static_cast<int>(
147 0 : 10 * log10(gain_sum_ * gain_sum_ /
148 0 : (kMetricsFrameInterval * kMetricsFrameInterval)));
149 0 : RTC_HISTOGRAM_COUNTS("WebRTC.Audio.LevelControl.AverageGain",
150 : average_gain_db, 0, 33, 30);
151 :
152 : const int long_term_peak_level_dbfs = static_cast<int>(
153 0 : 10 * log10(long_term_peak_level * long_term_peak_level + 1e-10f) -
154 0 : kdBFSOffset);
155 :
156 : const int frame_peak_level_dbfs = static_cast<int>(
157 0 : 10 * log10(frame_peak_level * frame_peak_level + 1e-10f) - kdBFSOffset);
158 :
159 0 : LOG(LS_INFO) << "Level Controller metrics: {"
160 0 : << "Max noise power: " << max_noise_power_dbfs << " dBFS, "
161 0 : << "Average noise power: " << average_noise_power_dbfs
162 : << " dBFS, "
163 0 : << "Max long term peak level: " << max_peak_level_dbfs
164 : << " dBFS, "
165 0 : << "Average long term peak level: " << average_peak_level_dbfs
166 : << " dBFS, "
167 0 : << "Max gain: " << max_gain_db << " dB, "
168 0 : << "Average gain: " << average_gain_db << " dB, "
169 0 : << "Long term peak level: " << long_term_peak_level_dbfs
170 : << " dBFS, "
171 0 : << "Last frame peak level: " << frame_peak_level_dbfs
172 : << " dBFS"
173 0 : << "}";
174 :
175 0 : Reset();
176 : }
177 0 : }
178 :
179 0 : LevelController::LevelController()
180 0 : : data_dumper_(new ApmDataDumper(instance_count_)),
181 : gain_applier_(data_dumper_.get()),
182 : signal_classifier_(data_dumper_.get()),
183 0 : peak_level_estimator_(kTargetLcPeakLeveldBFS) {
184 0 : Initialize(AudioProcessing::kSampleRate48kHz);
185 0 : ++instance_count_;
186 0 : }
187 :
188 0 : LevelController::~LevelController() {}
189 :
190 0 : void LevelController::Initialize(int sample_rate_hz) {
191 0 : RTC_DCHECK(sample_rate_hz == AudioProcessing::kSampleRate8kHz ||
192 : sample_rate_hz == AudioProcessing::kSampleRate16kHz ||
193 : sample_rate_hz == AudioProcessing::kSampleRate32kHz ||
194 0 : sample_rate_hz == AudioProcessing::kSampleRate48kHz);
195 0 : data_dumper_->InitiateNewSetOfRecordings();
196 0 : gain_selector_.Initialize(sample_rate_hz);
197 0 : gain_applier_.Initialize(sample_rate_hz);
198 0 : signal_classifier_.Initialize(sample_rate_hz);
199 0 : noise_level_estimator_.Initialize(sample_rate_hz);
200 0 : peak_level_estimator_.Initialize(config_.initial_peak_level_dbfs);
201 0 : saturating_gain_estimator_.Initialize();
202 0 : metrics_.Initialize(sample_rate_hz);
203 :
204 0 : last_gain_ = 1.0f;
205 0 : sample_rate_hz_ = rtc::Optional<int>(sample_rate_hz);
206 0 : dc_forgetting_factor_ = 0.01f * sample_rate_hz / 48000.f;
207 0 : std::fill(dc_level_, dc_level_ + arraysize(dc_level_), 0.f);
208 0 : }
209 :
210 0 : void LevelController::Process(AudioBuffer* audio) {
211 0 : RTC_DCHECK_LT(0, audio->num_channels());
212 0 : RTC_DCHECK_GE(2, audio->num_channels());
213 0 : RTC_DCHECK_NE(0.f, dc_forgetting_factor_);
214 0 : RTC_DCHECK(sample_rate_hz_);
215 0 : data_dumper_->DumpWav("lc_input", audio->num_frames(),
216 0 : audio->channels_const_f()[0], *sample_rate_hz_, 1);
217 :
218 : // Remove DC level.
219 0 : for (size_t k = 0; k < audio->num_channels(); ++k) {
220 0 : UpdateAndRemoveDcLevel(
221 : dc_forgetting_factor_, &dc_level_[k],
222 0 : rtc::ArrayView<float>(audio->channels_f()[k], audio->num_frames()));
223 : }
224 :
225 : SignalClassifier::SignalType signal_type;
226 0 : signal_classifier_.Analyze(*audio, &signal_type);
227 0 : int tmp = static_cast<int>(signal_type);
228 0 : data_dumper_->DumpRaw("lc_signal_type", 1, &tmp);
229 :
230 : // Estimate the noise energy.
231 : float noise_energy =
232 0 : noise_level_estimator_.Analyze(signal_type, FrameEnergy(*audio));
233 :
234 : // Estimate the overall signal peak level.
235 0 : const float frame_peak_level = PeakLevel(*audio);
236 : const float long_term_peak_level =
237 0 : peak_level_estimator_.Analyze(signal_type, frame_peak_level);
238 :
239 0 : float saturating_gain = saturating_gain_estimator_.GetGain();
240 :
241 : // Compute the new gain to apply.
242 0 : last_gain_ =
243 0 : gain_selector_.GetNewGain(long_term_peak_level, noise_energy,
244 0 : saturating_gain, gain_jumpstart_, signal_type);
245 :
246 : // Unflag the jumpstart of the gain as it should only happen once.
247 0 : gain_jumpstart_ = false;
248 :
249 : // Apply the gain to the signal.
250 0 : int num_saturations = gain_applier_.Process(last_gain_, audio);
251 :
252 : // Estimate the gain that saturates the overall signal.
253 0 : saturating_gain_estimator_.Update(last_gain_, num_saturations);
254 :
255 : // Update the metrics.
256 0 : metrics_.Update(long_term_peak_level, noise_energy, last_gain_,
257 0 : frame_peak_level);
258 :
259 0 : data_dumper_->DumpRaw("lc_selected_gain", 1, &last_gain_);
260 0 : data_dumper_->DumpRaw("lc_noise_energy", 1, &noise_energy);
261 0 : data_dumper_->DumpRaw("lc_peak_level", 1, &long_term_peak_level);
262 0 : data_dumper_->DumpRaw("lc_saturating_gain", 1, &saturating_gain);
263 :
264 0 : data_dumper_->DumpWav("lc_output", audio->num_frames(),
265 0 : audio->channels_f()[0], *sample_rate_hz_, 1);
266 0 : }
267 :
268 0 : void LevelController::ApplyConfig(
269 : const AudioProcessing::Config::LevelController& config) {
270 0 : RTC_DCHECK(Validate(config));
271 0 : config_ = config;
272 0 : peak_level_estimator_.Initialize(config_.initial_peak_level_dbfs);
273 0 : gain_jumpstart_ = true;
274 0 : }
275 :
276 0 : std::string LevelController::ToString(
277 : const AudioProcessing::Config::LevelController& config) {
278 0 : std::stringstream ss;
279 : ss << "{"
280 0 : << "enabled: " << (config.enabled ? "true" : "false") << ", "
281 0 : << "initial_peak_level_dbfs: " << config.initial_peak_level_dbfs << "}";
282 0 : return ss.str();
283 : }
284 :
285 0 : bool LevelController::Validate(
286 : const AudioProcessing::Config::LevelController& config) {
287 0 : return (config.initial_peak_level_dbfs <
288 0 : std::numeric_limits<float>::epsilon() &&
289 0 : config.initial_peak_level_dbfs >
290 0 : -(100.f + std::numeric_limits<float>::epsilon()));
291 : }
292 :
293 : } // namespace webrtc
|