Line data Source code
1 : /*
2 : * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 : *
4 : * Use of this source code is governed by a BSD-style license
5 : * that can be found in the LICENSE file in the root of the source
6 : * tree. An additional intellectual property rights grant can be found
7 : * in the file PATENTS. All contributing project authors may
8 : * be found in the AUTHORS file in the root of the source tree.
9 : */
10 :
11 : #include "webrtc/modules/audio_coding/neteq/normal.h"
12 :
13 : #include <string.h> // memset, memcpy
14 :
15 : #include <algorithm> // min
16 :
17 : #include "webrtc/base/checks.h"
18 : #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
19 : #include "webrtc/modules/audio_coding/codecs/audio_decoder.h"
20 : #include "webrtc/modules/audio_coding/neteq/audio_multi_vector.h"
21 : #include "webrtc/modules/audio_coding/neteq/background_noise.h"
22 : #include "webrtc/modules/audio_coding/neteq/decoder_database.h"
23 : #include "webrtc/modules/audio_coding/neteq/expand.h"
24 :
25 : namespace webrtc {
26 :
27 0 : int Normal::Process(const int16_t* input,
28 : size_t length,
29 : Modes last_mode,
30 : int16_t* external_mute_factor_array,
31 : AudioMultiVector* output) {
32 0 : if (length == 0) {
33 : // Nothing to process.
34 0 : output->Clear();
35 0 : return static_cast<int>(length);
36 : }
37 :
38 0 : RTC_DCHECK(output->Empty());
39 : // Output should be empty at this point.
40 0 : if (length % output->Channels() != 0) {
41 : // The length does not match the number of channels.
42 0 : output->Clear();
43 0 : return 0;
44 : }
45 0 : output->PushBackInterleaved(input, length);
46 :
47 0 : const int fs_mult = fs_hz_ / 8000;
48 0 : RTC_DCHECK_GT(fs_mult, 0);
49 : // fs_shift = log2(fs_mult), rounded down.
50 : // Note that |fs_shift| is not "exact" for 48 kHz.
51 : // TODO(hlundin): Investigate this further.
52 0 : const int fs_shift = 30 - WebRtcSpl_NormW32(fs_mult);
53 :
54 : // Check if last RecOut call resulted in an Expand. If so, we have to take
55 : // care of some cross-fading and unmuting.
56 0 : if (last_mode == kModeExpand) {
57 : // Generate interpolation data using Expand.
58 : // First, set Expand parameters to appropriate values.
59 0 : expand_->SetParametersForNormalAfterExpand();
60 :
61 : // Call Expand.
62 0 : AudioMultiVector expanded(output->Channels());
63 0 : expand_->Process(&expanded);
64 0 : expand_->Reset();
65 :
66 0 : size_t length_per_channel = length / output->Channels();
67 0 : std::unique_ptr<int16_t[]> signal(new int16_t[length_per_channel]);
68 0 : for (size_t channel_ix = 0; channel_ix < output->Channels(); ++channel_ix) {
69 : // Adjust muting factor (main muting factor times expand muting factor).
70 0 : external_mute_factor_array[channel_ix] = static_cast<int16_t>(
71 0 : (external_mute_factor_array[channel_ix] *
72 0 : expand_->MuteFactor(channel_ix)) >> 14);
73 :
74 0 : (*output)[channel_ix].CopyTo(length_per_channel, 0, signal.get());
75 :
76 : // Find largest absolute value in new data.
77 : int16_t decoded_max =
78 0 : WebRtcSpl_MaxAbsValueW16(signal.get(), length_per_channel);
79 : // Adjust muting factor if needed (to BGN level).
80 : size_t energy_length =
81 0 : std::min(static_cast<size_t>(fs_mult * 64), length_per_channel);
82 0 : int scaling = 6 + fs_shift
83 0 : - WebRtcSpl_NormW32(decoded_max * decoded_max);
84 0 : scaling = std::max(scaling, 0); // |scaling| should always be >= 0.
85 0 : int32_t energy = WebRtcSpl_DotProductWithScale(signal.get(), signal.get(),
86 0 : energy_length, scaling);
87 : int32_t scaled_energy_length =
88 0 : static_cast<int32_t>(energy_length >> scaling);
89 0 : if (scaled_energy_length > 0) {
90 0 : energy = energy / scaled_energy_length;
91 : } else {
92 0 : energy = 0;
93 : }
94 :
95 : int mute_factor;
96 0 : if ((energy != 0) &&
97 0 : (energy > background_noise_.Energy(channel_ix))) {
98 : // Normalize new frame energy to 15 bits.
99 0 : scaling = WebRtcSpl_NormW32(energy) - 16;
100 : // We want background_noise_.energy() / energy in Q14.
101 0 : int32_t bgn_energy = WEBRTC_SPL_SHIFT_W32(
102 : background_noise_.Energy(channel_ix), scaling + 14);
103 : int16_t energy_scaled =
104 0 : static_cast<int16_t>(WEBRTC_SPL_SHIFT_W32(energy, scaling));
105 0 : int32_t ratio = WebRtcSpl_DivW32W16(bgn_energy, energy_scaled);
106 0 : mute_factor = WebRtcSpl_SqrtFloor(ratio << 14);
107 : } else {
108 0 : mute_factor = 16384; // 1.0 in Q14.
109 : }
110 0 : if (mute_factor > external_mute_factor_array[channel_ix]) {
111 0 : external_mute_factor_array[channel_ix] =
112 0 : static_cast<int16_t>(std::min(mute_factor, 16384));
113 : }
114 :
115 : // If muted increase by 0.64 for every 20 ms (NB/WB 0.0040/0.0020 in Q14).
116 0 : int increment = 64 / fs_mult;
117 0 : for (size_t i = 0; i < length_per_channel; i++) {
118 : // Scale with mute factor.
119 0 : RTC_DCHECK_LT(channel_ix, output->Channels());
120 0 : RTC_DCHECK_LT(i, output->Size());
121 0 : int32_t scaled_signal = (*output)[channel_ix][i] *
122 0 : external_mute_factor_array[channel_ix];
123 : // Shift 14 with proper rounding.
124 0 : (*output)[channel_ix][i] =
125 0 : static_cast<int16_t>((scaled_signal + 8192) >> 14);
126 : // Increase mute_factor towards 16384.
127 0 : external_mute_factor_array[channel_ix] = static_cast<int16_t>(std::min(
128 0 : external_mute_factor_array[channel_ix] + increment, 16384));
129 : }
130 :
131 : // Interpolate the expanded data into the new vector.
132 : // (NB/WB/SWB32/SWB48 8/16/32/48 samples.)
133 0 : RTC_DCHECK_LT(fs_shift, 3); // Will always be 0, 1, or, 2.
134 0 : increment = 4 >> fs_shift;
135 0 : int fraction = increment;
136 : // Don't interpolate over more samples than what is in output. When this
137 : // cap strikes, the interpolation will likely sound worse, but this is an
138 : // emergency operation in response to unexpected input.
139 : const size_t interp_len_samples =
140 0 : std::min(static_cast<size_t>(8 * fs_mult), output->Size());
141 0 : for (size_t i = 0; i < interp_len_samples; ++i) {
142 : // TODO(hlundin): Add 16 instead of 8 for correct rounding. Keeping 8
143 : // now for legacy bit-exactness.
144 0 : RTC_DCHECK_LT(channel_ix, output->Channels());
145 0 : RTC_DCHECK_LT(i, output->Size());
146 0 : (*output)[channel_ix][i] =
147 0 : static_cast<int16_t>((fraction * (*output)[channel_ix][i] +
148 0 : (32 - fraction) * expanded[channel_ix][i] + 8) >> 5);
149 0 : fraction += increment;
150 : }
151 : }
152 0 : } else if (last_mode == kModeRfc3389Cng) {
153 0 : RTC_DCHECK_EQ(output->Channels(), 1); // Not adapted for multi-channel yet.
154 : static const size_t kCngLength = 48;
155 0 : RTC_DCHECK_LE(8 * fs_mult, kCngLength);
156 : int16_t cng_output[kCngLength];
157 : // Reset mute factor and start up fresh.
158 0 : external_mute_factor_array[0] = 16384;
159 0 : ComfortNoiseDecoder* cng_decoder = decoder_database_->GetActiveCngDecoder();
160 :
161 0 : if (cng_decoder) {
162 : // Generate long enough for 48kHz.
163 0 : if (!cng_decoder->Generate(cng_output, 0)) {
164 : // Error returned; set return vector to all zeros.
165 0 : memset(cng_output, 0, sizeof(cng_output));
166 : }
167 : } else {
168 : // If no CNG instance is defined, just copy from the decoded data.
169 : // (This will result in interpolating the decoded with itself.)
170 0 : (*output)[0].CopyTo(fs_mult * 8, 0, cng_output);
171 : }
172 : // Interpolate the CNG into the new vector.
173 : // (NB/WB/SWB32/SWB48 8/16/32/48 samples.)
174 0 : RTC_DCHECK_LT(fs_shift, 3); // Will always be 0, 1, or, 2.
175 0 : int16_t increment = 4 >> fs_shift;
176 0 : int16_t fraction = increment;
177 0 : for (size_t i = 0; i < static_cast<size_t>(8 * fs_mult); i++) {
178 : // TODO(hlundin): Add 16 instead of 8 for correct rounding. Keeping 8 now
179 : // for legacy bit-exactness.
180 0 : (*output)[0][i] = (fraction * (*output)[0][i] +
181 0 : (32 - fraction) * cng_output[i] + 8) >> 5;
182 0 : fraction += increment;
183 : }
184 0 : } else if (external_mute_factor_array[0] < 16384) {
185 : // Previous was neither of Expand, FadeToBGN or RFC3389_CNG, but we are
186 : // still ramping up from previous muting.
187 : // If muted increase by 0.64 for every 20 ms (NB/WB 0.0040/0.0020 in Q14).
188 0 : int increment = 64 / fs_mult;
189 0 : size_t length_per_channel = length / output->Channels();
190 0 : for (size_t i = 0; i < length_per_channel; i++) {
191 0 : for (size_t channel_ix = 0; channel_ix < output->Channels();
192 : ++channel_ix) {
193 : // Scale with mute factor.
194 0 : RTC_DCHECK_LT(channel_ix, output->Channels());
195 0 : RTC_DCHECK_LT(i, output->Size());
196 0 : int32_t scaled_signal = (*output)[channel_ix][i] *
197 0 : external_mute_factor_array[channel_ix];
198 : // Shift 14 with proper rounding.
199 0 : (*output)[channel_ix][i] =
200 0 : static_cast<int16_t>((scaled_signal + 8192) >> 14);
201 : // Increase mute_factor towards 16384.
202 0 : external_mute_factor_array[channel_ix] = static_cast<int16_t>(std::min(
203 0 : 16384, external_mute_factor_array[channel_ix] + increment));
204 : }
205 : }
206 : }
207 :
208 0 : return static_cast<int>(length);
209 : }
210 :
211 : } // namespace webrtc
|