Line data Source code
1 : /*
2 : * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
3 : *
4 : * Use of this source code is governed by a BSD-style license
5 : * that can be found in the LICENSE file in the root of the source
6 : * tree. An additional intellectual property rights grant can be found
7 : * in the file PATENTS. All contributing project authors may
8 : * be found in the AUTHORS file in the root of the source tree.
9 : */
10 :
11 : #include "webrtc/modules/audio_coding/codecs/cng/audio_encoder_cng.h"
12 :
13 : #include <algorithm>
14 : #include <memory>
15 : #include <limits>
16 : #include <utility>
17 :
18 : namespace webrtc {
19 :
20 : namespace {
21 :
22 : const int kMaxFrameSizeMs = 60;
23 :
24 : } // namespace
25 :
26 : AudioEncoderCng::Config::Config() = default;
27 : AudioEncoderCng::Config::Config(Config&&) = default;
28 : AudioEncoderCng::Config::~Config() = default;
29 :
30 0 : bool AudioEncoderCng::Config::IsOk() const {
31 0 : if (num_channels != 1)
32 0 : return false;
33 0 : if (!speech_encoder)
34 0 : return false;
35 0 : if (num_channels != speech_encoder->NumChannels())
36 0 : return false;
37 0 : if (sid_frame_interval_ms <
38 0 : static_cast<int>(speech_encoder->Max10MsFramesInAPacket() * 10))
39 0 : return false;
40 0 : if (num_cng_coefficients > WEBRTC_CNG_MAX_LPC_ORDER ||
41 0 : num_cng_coefficients <= 0)
42 0 : return false;
43 0 : return true;
44 : }
45 :
46 0 : AudioEncoderCng::AudioEncoderCng(Config&& config)
47 : : speech_encoder_(
48 0 : ([&] { RTC_CHECK(config.IsOk()) << "Invalid configuration."; }(),
49 0 : std::move(config.speech_encoder))),
50 0 : cng_payload_type_(config.payload_type),
51 0 : num_cng_coefficients_(config.num_cng_coefficients),
52 0 : sid_frame_interval_ms_(config.sid_frame_interval_ms),
53 : last_frame_active_(true),
54 0 : vad_(config.vad ? std::unique_ptr<Vad>(config.vad)
55 : : CreateVad(config.vad_mode)),
56 0 : cng_encoder_(new ComfortNoiseEncoder(SampleRateHz(),
57 0 : sid_frame_interval_ms_,
58 0 : num_cng_coefficients_)) {
59 0 : }
60 :
61 : AudioEncoderCng::~AudioEncoderCng() = default;
62 :
63 0 : int AudioEncoderCng::SampleRateHz() const {
64 0 : return speech_encoder_->SampleRateHz();
65 : }
66 :
67 0 : size_t AudioEncoderCng::NumChannels() const {
68 0 : return 1;
69 : }
70 :
71 0 : int AudioEncoderCng::RtpTimestampRateHz() const {
72 0 : return speech_encoder_->RtpTimestampRateHz();
73 : }
74 :
75 0 : size_t AudioEncoderCng::Num10MsFramesInNextPacket() const {
76 0 : return speech_encoder_->Num10MsFramesInNextPacket();
77 : }
78 :
79 0 : size_t AudioEncoderCng::Max10MsFramesInAPacket() const {
80 0 : return speech_encoder_->Max10MsFramesInAPacket();
81 : }
82 :
83 0 : int AudioEncoderCng::GetTargetBitrate() const {
84 0 : return speech_encoder_->GetTargetBitrate();
85 : }
86 :
87 0 : AudioEncoder::EncodedInfo AudioEncoderCng::EncodeImpl(
88 : uint32_t rtp_timestamp,
89 : rtc::ArrayView<const int16_t> audio,
90 : rtc::Buffer* encoded) {
91 0 : const size_t samples_per_10ms_frame = SamplesPer10msFrame();
92 0 : RTC_CHECK_EQ(speech_buffer_.size(),
93 0 : rtp_timestamps_.size() * samples_per_10ms_frame);
94 0 : rtp_timestamps_.push_back(rtp_timestamp);
95 0 : RTC_DCHECK_EQ(samples_per_10ms_frame, audio.size());
96 0 : speech_buffer_.insert(speech_buffer_.end(), audio.cbegin(), audio.cend());
97 0 : const size_t frames_to_encode = speech_encoder_->Num10MsFramesInNextPacket();
98 0 : if (rtp_timestamps_.size() < frames_to_encode) {
99 0 : return EncodedInfo();
100 : }
101 0 : RTC_CHECK_LE(frames_to_encode * 10, kMaxFrameSizeMs)
102 0 : << "Frame size cannot be larger than " << kMaxFrameSizeMs
103 0 : << " ms when using VAD/CNG.";
104 :
105 : // Group several 10 ms blocks per VAD call. Call VAD once or twice using the
106 : // following split sizes:
107 : // 10 ms = 10 + 0 ms; 20 ms = 20 + 0 ms; 30 ms = 30 + 0 ms;
108 : // 40 ms = 20 + 20 ms; 50 ms = 30 + 20 ms; 60 ms = 30 + 30 ms.
109 : size_t blocks_in_first_vad_call =
110 0 : (frames_to_encode > 3 ? 3 : frames_to_encode);
111 0 : if (frames_to_encode == 4)
112 0 : blocks_in_first_vad_call = 2;
113 0 : RTC_CHECK_GE(frames_to_encode, blocks_in_first_vad_call);
114 : const size_t blocks_in_second_vad_call =
115 0 : frames_to_encode - blocks_in_first_vad_call;
116 :
117 : // Check if all of the buffer is passive speech. Start with checking the first
118 : // block.
119 0 : Vad::Activity activity = vad_->VoiceActivity(
120 0 : &speech_buffer_[0], samples_per_10ms_frame * blocks_in_first_vad_call,
121 0 : SampleRateHz());
122 0 : if (activity == Vad::kPassive && blocks_in_second_vad_call > 0) {
123 : // Only check the second block if the first was passive.
124 0 : activity = vad_->VoiceActivity(
125 0 : &speech_buffer_[samples_per_10ms_frame * blocks_in_first_vad_call],
126 0 : samples_per_10ms_frame * blocks_in_second_vad_call, SampleRateHz());
127 : }
128 :
129 0 : EncodedInfo info;
130 0 : switch (activity) {
131 : case Vad::kPassive: {
132 0 : info = EncodePassive(frames_to_encode, encoded);
133 0 : last_frame_active_ = false;
134 0 : break;
135 : }
136 : case Vad::kActive: {
137 0 : info = EncodeActive(frames_to_encode, encoded);
138 0 : last_frame_active_ = true;
139 0 : break;
140 : }
141 : case Vad::kError: {
142 0 : FATAL(); // Fails only if fed invalid data.
143 : break;
144 : }
145 : }
146 :
147 : speech_buffer_.erase(
148 0 : speech_buffer_.begin(),
149 0 : speech_buffer_.begin() + frames_to_encode * samples_per_10ms_frame);
150 0 : rtp_timestamps_.erase(rtp_timestamps_.begin(),
151 0 : rtp_timestamps_.begin() + frames_to_encode);
152 0 : return info;
153 : }
154 :
155 0 : void AudioEncoderCng::Reset() {
156 0 : speech_encoder_->Reset();
157 0 : speech_buffer_.clear();
158 0 : rtp_timestamps_.clear();
159 0 : last_frame_active_ = true;
160 0 : vad_->Reset();
161 0 : cng_encoder_.reset(
162 0 : new ComfortNoiseEncoder(SampleRateHz(), sid_frame_interval_ms_,
163 0 : num_cng_coefficients_));
164 0 : }
165 :
166 0 : bool AudioEncoderCng::SetFec(bool enable) {
167 0 : return speech_encoder_->SetFec(enable);
168 : }
169 :
170 0 : bool AudioEncoderCng::SetDtx(bool enable) {
171 0 : return speech_encoder_->SetDtx(enable);
172 : }
173 :
174 0 : bool AudioEncoderCng::SetApplication(Application application) {
175 0 : return speech_encoder_->SetApplication(application);
176 : }
177 :
178 0 : void AudioEncoderCng::SetMaxPlaybackRate(int frequency_hz) {
179 0 : speech_encoder_->SetMaxPlaybackRate(frequency_hz);
180 0 : }
181 :
182 : rtc::ArrayView<std::unique_ptr<AudioEncoder>>
183 0 : AudioEncoderCng::ReclaimContainedEncoders() {
184 0 : return rtc::ArrayView<std::unique_ptr<AudioEncoder>>(&speech_encoder_, 1);
185 : }
186 :
187 0 : void AudioEncoderCng::OnReceivedUplinkPacketLossFraction(
188 : float uplink_packet_loss_fraction) {
189 0 : speech_encoder_->OnReceivedUplinkPacketLossFraction(
190 0 : uplink_packet_loss_fraction);
191 0 : }
192 :
193 0 : void AudioEncoderCng::OnReceivedUplinkBandwidth(
194 : int target_audio_bitrate_bps,
195 : rtc::Optional<int64_t> probing_interval_ms) {
196 0 : speech_encoder_->OnReceivedUplinkBandwidth(target_audio_bitrate_bps,
197 0 : probing_interval_ms);
198 0 : }
199 :
200 0 : AudioEncoder::EncodedInfo AudioEncoderCng::EncodePassive(
201 : size_t frames_to_encode,
202 : rtc::Buffer* encoded) {
203 0 : bool force_sid = last_frame_active_;
204 0 : bool output_produced = false;
205 0 : const size_t samples_per_10ms_frame = SamplesPer10msFrame();
206 0 : AudioEncoder::EncodedInfo info;
207 :
208 0 : for (size_t i = 0; i < frames_to_encode; ++i) {
209 : // It's important not to pass &info.encoded_bytes directly to
210 : // WebRtcCng_Encode(), since later loop iterations may return zero in
211 : // that value, in which case we don't want to overwrite any value from
212 : // an earlier iteration.
213 : size_t encoded_bytes_tmp =
214 0 : cng_encoder_->Encode(
215 : rtc::ArrayView<const int16_t>(
216 0 : &speech_buffer_[i * samples_per_10ms_frame],
217 : samples_per_10ms_frame),
218 0 : force_sid, encoded);
219 :
220 0 : if (encoded_bytes_tmp > 0) {
221 0 : RTC_CHECK(!output_produced);
222 0 : info.encoded_bytes = encoded_bytes_tmp;
223 0 : output_produced = true;
224 0 : force_sid = false;
225 : }
226 : }
227 :
228 0 : info.encoded_timestamp = rtp_timestamps_.front();
229 0 : info.payload_type = cng_payload_type_;
230 0 : info.send_even_if_empty = true;
231 0 : info.speech = false;
232 0 : return info;
233 : }
234 :
235 0 : AudioEncoder::EncodedInfo AudioEncoderCng::EncodeActive(
236 : size_t frames_to_encode,
237 : rtc::Buffer* encoded) {
238 0 : const size_t samples_per_10ms_frame = SamplesPer10msFrame();
239 0 : AudioEncoder::EncodedInfo info;
240 0 : for (size_t i = 0; i < frames_to_encode; ++i) {
241 : info =
242 0 : speech_encoder_->Encode(rtp_timestamps_.front(),
243 : rtc::ArrayView<const int16_t>(
244 0 : &speech_buffer_[i * samples_per_10ms_frame],
245 : samples_per_10ms_frame),
246 0 : encoded);
247 0 : if (i + 1 == frames_to_encode) {
248 0 : RTC_CHECK_GT(info.encoded_bytes, 0) << "Encoder didn't deliver data.";
249 : } else {
250 0 : RTC_CHECK_EQ(info.encoded_bytes, 0)
251 0 : << "Encoder delivered data too early.";
252 : }
253 : }
254 0 : return info;
255 : }
256 :
257 0 : size_t AudioEncoderCng::SamplesPer10msFrame() const {
258 0 : return rtc::CheckedDivExact(10 * SampleRateHz(), 1000);
259 : }
260 :
261 : } // namespace webrtc
|