Line data Source code
1 : /*
2 : * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 : *
4 : * Use of this source code is governed by a BSD-style license
5 : * that can be found in the LICENSE file in the root of the source
6 : * tree. An additional intellectual property rights grant can be found
7 : * in the file PATENTS. All contributing project authors may
8 : * be found in the AUTHORS file in the root of the source tree.
9 : */
10 :
11 : #include "webrtc/modules/audio_coding/include/audio_coding_module.h"
12 :
13 : #include "webrtc/base/checks.h"
14 : #include "webrtc/base/safe_conversions.h"
15 : #include "webrtc/modules/audio_coding/acm2/acm_receiver.h"
16 : #include "webrtc/modules/audio_coding/acm2/acm_resampler.h"
17 : #include "webrtc/modules/audio_coding/acm2/codec_manager.h"
18 : #include "webrtc/modules/audio_coding/acm2/rent_a_codec.h"
19 : #include "webrtc/modules/audio_coding/codecs/builtin_audio_decoder_factory.h"
20 : #include "webrtc/system_wrappers/include/metrics.h"
21 : #include "webrtc/system_wrappers/include/trace.h"
22 :
23 : namespace webrtc {
24 :
25 : namespace {
26 :
27 0 : struct EncoderFactory {
28 : AudioEncoder* external_speech_encoder = nullptr;
29 : acm2::CodecManager codec_manager;
30 : acm2::RentACodec rent_a_codec;
31 : };
32 :
33 0 : class AudioCodingModuleImpl final : public AudioCodingModule {
34 : public:
35 : explicit AudioCodingModuleImpl(const AudioCodingModule::Config& config);
36 : ~AudioCodingModuleImpl() override;
37 :
38 : /////////////////////////////////////////
39 : // Sender
40 : //
41 :
42 : // Can be called multiple times for Codec, CNG, RED.
43 : int RegisterSendCodec(const CodecInst& send_codec) override;
44 :
45 : void RegisterExternalSendCodec(
46 : AudioEncoder* external_speech_encoder) override;
47 :
48 : void ModifyEncoder(rtc::FunctionView<void(std::unique_ptr<AudioEncoder>*)>
49 : modifier) override;
50 :
51 : void QueryEncoder(
52 : rtc::FunctionView<void(const AudioEncoder*)> query) override;
53 :
54 : // Get current send codec.
55 : rtc::Optional<CodecInst> SendCodec() const override;
56 :
57 : // Get current send frequency.
58 : int SendFrequency() const override;
59 :
60 : // Sets the bitrate to the specified value in bits/sec. In case the codec does
61 : // not support the requested value it will choose an appropriate value
62 : // instead.
63 : void SetBitRate(int bitrate_bps) override;
64 :
65 : // Register a transport callback which will be
66 : // called to deliver the encoded buffers.
67 : int RegisterTransportCallback(AudioPacketizationCallback* transport) override;
68 :
69 : // Add 10 ms of raw (PCM) audio data to the encoder.
70 : int Add10MsData(const AudioFrame& audio_frame) override;
71 :
72 : /////////////////////////////////////////
73 : // (RED) Redundant Coding
74 : //
75 :
76 : // Configure RED status i.e. on/off.
77 : int SetREDStatus(bool enable_red) override;
78 :
79 : // Get RED status.
80 : bool REDStatus() const override;
81 :
82 : /////////////////////////////////////////
83 : // (FEC) Forward Error Correction (codec internal)
84 : //
85 :
86 : // Configure FEC status i.e. on/off.
87 : int SetCodecFEC(bool enabled_codec_fec) override;
88 :
89 : // Get FEC status.
90 : bool CodecFEC() const override;
91 :
92 : // Set target packet loss rate
93 : int SetPacketLossRate(int loss_rate) override;
94 :
95 : /////////////////////////////////////////
96 : // (VAD) Voice Activity Detection
97 : // and
98 : // (CNG) Comfort Noise Generation
99 : //
100 :
101 : int SetVAD(bool enable_dtx = true,
102 : bool enable_vad = false,
103 : ACMVADMode mode = VADNormal) override;
104 :
105 : int VAD(bool* dtx_enabled,
106 : bool* vad_enabled,
107 : ACMVADMode* mode) const override;
108 :
109 : int RegisterVADCallback(ACMVADCallback* vad_callback) override;
110 :
111 : /////////////////////////////////////////
112 : // Receiver
113 : //
114 :
115 : // Initialize receiver, resets codec database etc.
116 : int InitializeReceiver() override;
117 :
118 : // Get current receive frequency.
119 : int ReceiveFrequency() const override;
120 :
121 : // Get current playout frequency.
122 : int PlayoutFrequency() const override;
123 :
124 : bool RegisterReceiveCodec(int rtp_payload_type,
125 : const SdpAudioFormat& audio_format) override;
126 :
127 : int RegisterReceiveCodec(const CodecInst& receive_codec) override;
128 : int RegisterReceiveCodec(
129 : const CodecInst& receive_codec,
130 : rtc::FunctionView<std::unique_ptr<AudioDecoder>()> isac_factory) override;
131 :
132 : int RegisterExternalReceiveCodec(int rtp_payload_type,
133 : AudioDecoder* external_decoder,
134 : int sample_rate_hz,
135 : int num_channels,
136 : const std::string& name) override;
137 :
138 : // Get current received codec.
139 : int ReceiveCodec(CodecInst* current_codec) const override;
140 :
141 : rtc::Optional<SdpAudioFormat> ReceiveFormat() const override;
142 :
143 : // Incoming packet from network parsed and ready for decode.
144 : int IncomingPacket(const uint8_t* incoming_payload,
145 : const size_t payload_length,
146 : const WebRtcRTPHeader& rtp_info) override;
147 :
148 : // Incoming payloads, without rtp-info, the rtp-info will be created in ACM.
149 : // One usage for this API is when pre-encoded files are pushed in ACM.
150 : int IncomingPayload(const uint8_t* incoming_payload,
151 : const size_t payload_length,
152 : uint8_t payload_type,
153 : uint32_t timestamp) override;
154 :
155 : // Minimum playout delay.
156 : int SetMinimumPlayoutDelay(int time_ms) override;
157 :
158 : // Maximum playout delay.
159 : int SetMaximumPlayoutDelay(int time_ms) override;
160 :
161 : // Smallest latency NetEq will maintain.
162 : int LeastRequiredDelayMs() const override;
163 :
164 : RTC_DEPRECATED int32_t PlayoutTimestamp(uint32_t* timestamp) override;
165 :
166 : rtc::Optional<uint32_t> PlayoutTimestamp() override;
167 :
168 : int FilteredCurrentDelayMs() const override;
169 :
170 : // Get 10 milliseconds of raw audio data to play out, and
171 : // automatic resample to the requested frequency if > 0.
172 : int PlayoutData10Ms(int desired_freq_hz,
173 : AudioFrame* audio_frame,
174 : bool* muted) override;
175 : int PlayoutData10Ms(int desired_freq_hz, AudioFrame* audio_frame) override;
176 :
177 : /////////////////////////////////////////
178 : // Statistics
179 : //
180 :
181 : int GetNetworkStatistics(NetworkStatistics* statistics) override;
182 :
183 : int SetOpusApplication(OpusApplicationMode application) override;
184 :
185 : // If current send codec is Opus, informs it about the maximum playback rate
186 : // the receiver will render.
187 : int SetOpusMaxPlaybackRate(int frequency_hz) override;
188 :
189 : int EnableOpusDtx() override;
190 :
191 : int DisableOpusDtx() override;
192 :
193 : int UnregisterReceiveCodec(uint8_t payload_type) override;
194 :
195 : int EnableNack(size_t max_nack_list_size) override;
196 :
197 : void DisableNack() override;
198 :
199 : std::vector<uint16_t> GetNackList(int64_t round_trip_time_ms) const override;
200 :
201 : void GetDecodingCallStatistics(AudioDecodingCallStats* stats) const override;
202 :
203 : private:
204 : struct InputData {
205 : uint32_t input_timestamp;
206 : const int16_t* audio;
207 : size_t length_per_channel;
208 : size_t audio_channel;
209 : // If a re-mix is required (up or down), this buffer will store a re-mixed
210 : // version of the input.
211 : int16_t buffer[WEBRTC_10MS_PCM_AUDIO];
212 : };
213 :
214 : // This member class writes values to the named UMA histogram, but only if
215 : // the value has changed since the last time (and always for the first call).
216 0 : class ChangeLogger {
217 : public:
218 0 : explicit ChangeLogger(const std::string& histogram_name)
219 0 : : histogram_name_(histogram_name) {}
220 : // Logs the new value if it is different from the last logged value, or if
221 : // this is the first call.
222 : void MaybeLog(int value);
223 :
224 : private:
225 : int last_value_ = 0;
226 : int first_time_ = true;
227 : const std::string histogram_name_;
228 : };
229 :
230 : int RegisterReceiveCodecUnlocked(
231 : const CodecInst& codec,
232 : rtc::FunctionView<std::unique_ptr<AudioDecoder>()> isac_factory)
233 : EXCLUSIVE_LOCKS_REQUIRED(acm_crit_sect_);
234 :
235 : int Add10MsDataInternal(const AudioFrame& audio_frame, InputData* input_data)
236 : EXCLUSIVE_LOCKS_REQUIRED(acm_crit_sect_);
237 : int Encode(const InputData& input_data)
238 : EXCLUSIVE_LOCKS_REQUIRED(acm_crit_sect_);
239 :
240 : int InitializeReceiverSafe() EXCLUSIVE_LOCKS_REQUIRED(acm_crit_sect_);
241 :
242 : bool HaveValidEncoder(const char* caller_name) const
243 : EXCLUSIVE_LOCKS_REQUIRED(acm_crit_sect_);
244 :
245 : // Preprocessing of input audio, including resampling and down-mixing if
246 : // required, before pushing audio into encoder's buffer.
247 : //
248 : // in_frame: input audio-frame
249 : // ptr_out: pointer to output audio_frame. If no preprocessing is required
250 : // |ptr_out| will be pointing to |in_frame|, otherwise pointing to
251 : // |preprocess_frame_|.
252 : //
253 : // Return value:
254 : // -1: if encountering an error.
255 : // 0: otherwise.
256 : int PreprocessToAddData(const AudioFrame& in_frame,
257 : const AudioFrame** ptr_out)
258 : EXCLUSIVE_LOCKS_REQUIRED(acm_crit_sect_);
259 :
260 : // Change required states after starting to receive the codec corresponding
261 : // to |index|.
262 : int UpdateUponReceivingCodec(int index);
263 :
264 : rtc::CriticalSection acm_crit_sect_;
265 : rtc::Buffer encode_buffer_ GUARDED_BY(acm_crit_sect_);
266 : int id_; // TODO(henrik.lundin) Make const.
267 : uint32_t expected_codec_ts_ GUARDED_BY(acm_crit_sect_);
268 : uint32_t expected_in_ts_ GUARDED_BY(acm_crit_sect_);
269 : acm2::ACMResampler resampler_ GUARDED_BY(acm_crit_sect_);
270 : acm2::AcmReceiver receiver_; // AcmReceiver has it's own internal lock.
271 : ChangeLogger bitrate_logger_ GUARDED_BY(acm_crit_sect_);
272 :
273 : std::unique_ptr<EncoderFactory> encoder_factory_ GUARDED_BY(acm_crit_sect_);
274 :
275 : // Current encoder stack, either obtained from
276 : // encoder_factory_->rent_a_codec.RentEncoderStack or provided by a call to
277 : // RegisterEncoder.
278 : std::unique_ptr<AudioEncoder> encoder_stack_ GUARDED_BY(acm_crit_sect_);
279 :
280 : std::unique_ptr<AudioDecoder> isac_decoder_16k_ GUARDED_BY(acm_crit_sect_);
281 : std::unique_ptr<AudioDecoder> isac_decoder_32k_ GUARDED_BY(acm_crit_sect_);
282 :
283 : // This is to keep track of CN instances where we can send DTMFs.
284 : uint8_t previous_pltype_ GUARDED_BY(acm_crit_sect_);
285 :
286 : // Used when payloads are pushed into ACM without any RTP info
287 : // One example is when pre-encoded bit-stream is pushed from
288 : // a file.
289 : // IMPORTANT: this variable is only used in IncomingPayload(), therefore,
290 : // no lock acquired when interacting with this variable. If it is going to
291 : // be used in other methods, locks need to be taken.
292 : std::unique_ptr<WebRtcRTPHeader> aux_rtp_header_;
293 :
294 : bool receiver_initialized_ GUARDED_BY(acm_crit_sect_);
295 :
296 : AudioFrame preprocess_frame_ GUARDED_BY(acm_crit_sect_);
297 : bool first_10ms_data_ GUARDED_BY(acm_crit_sect_);
298 :
299 : bool first_frame_ GUARDED_BY(acm_crit_sect_);
300 : uint32_t last_timestamp_ GUARDED_BY(acm_crit_sect_);
301 : uint32_t last_rtp_timestamp_ GUARDED_BY(acm_crit_sect_);
302 :
303 : rtc::CriticalSection callback_crit_sect_;
304 : AudioPacketizationCallback* packetization_callback_
305 : GUARDED_BY(callback_crit_sect_);
306 : ACMVADCallback* vad_callback_ GUARDED_BY(callback_crit_sect_);
307 :
308 : int codec_histogram_bins_log_[static_cast<size_t>(
309 : AudioEncoder::CodecType::kMaxLoggedAudioCodecTypes)];
310 : int number_of_consecutive_empty_packets_;
311 : };
312 :
313 : // Adds a codec usage sample to the histogram.
314 0 : void UpdateCodecTypeHistogram(size_t codec_type) {
315 0 : RTC_HISTOGRAM_ENUMERATION(
316 : "WebRTC.Audio.Encoder.CodecType", static_cast<int>(codec_type),
317 : static_cast<int>(
318 : webrtc::AudioEncoder::CodecType::kMaxLoggedAudioCodecTypes));
319 0 : }
320 :
321 : // TODO(turajs): the same functionality is used in NetEq. If both classes
322 : // need them, make it a static function in ACMCodecDB.
323 0 : bool IsCodecRED(const CodecInst& codec) {
324 0 : return (STR_CASE_CMP(codec.plname, "RED") == 0);
325 : }
326 :
327 0 : bool IsCodecCN(const CodecInst& codec) {
328 0 : return (STR_CASE_CMP(codec.plname, "CN") == 0);
329 : }
330 :
331 : // Stereo-to-mono can be used as in-place.
332 0 : int DownMix(const AudioFrame& frame,
333 : size_t length_out_buff,
334 : int16_t* out_buff) {
335 0 : if (length_out_buff < frame.samples_per_channel_) {
336 0 : return -1;
337 : }
338 0 : for (size_t n = 0; n < frame.samples_per_channel_; ++n)
339 0 : out_buff[n] = (frame.data_[2 * n] + frame.data_[2 * n + 1]) >> 1;
340 0 : return 0;
341 : }
342 :
343 : // Mono-to-stereo can be used as in-place.
344 0 : int UpMix(const AudioFrame& frame, size_t length_out_buff, int16_t* out_buff) {
345 0 : if (length_out_buff < frame.samples_per_channel_) {
346 0 : return -1;
347 : }
348 0 : for (size_t n = frame.samples_per_channel_; n != 0; --n) {
349 0 : size_t i = n - 1;
350 0 : int16_t sample = frame.data_[i];
351 0 : out_buff[2 * i + 1] = sample;
352 0 : out_buff[2 * i] = sample;
353 : }
354 0 : return 0;
355 : }
356 :
357 0 : void ConvertEncodedInfoToFragmentationHeader(
358 : const AudioEncoder::EncodedInfo& info,
359 : RTPFragmentationHeader* frag) {
360 0 : if (info.redundant.empty()) {
361 0 : frag->fragmentationVectorSize = 0;
362 0 : return;
363 : }
364 :
365 0 : frag->VerifyAndAllocateFragmentationHeader(
366 0 : static_cast<uint16_t>(info.redundant.size()));
367 0 : frag->fragmentationVectorSize = static_cast<uint16_t>(info.redundant.size());
368 0 : size_t offset = 0;
369 0 : for (size_t i = 0; i < info.redundant.size(); ++i) {
370 0 : frag->fragmentationOffset[i] = offset;
371 0 : offset += info.redundant[i].encoded_bytes;
372 0 : frag->fragmentationLength[i] = info.redundant[i].encoded_bytes;
373 0 : frag->fragmentationTimeDiff[i] = rtc::checked_cast<uint16_t>(
374 0 : info.encoded_timestamp - info.redundant[i].encoded_timestamp);
375 0 : frag->fragmentationPlType[i] = info.redundant[i].payload_type;
376 : }
377 : }
378 :
379 : // Wraps a raw AudioEncoder pointer. The idea is that you can put one of these
380 : // in a unique_ptr, to protect the contained raw pointer from being deleted
381 : // when the unique_ptr expires. (This is of course a bad idea in general, but
382 : // backwards compatibility.)
383 0 : class RawAudioEncoderWrapper final : public AudioEncoder {
384 : public:
385 0 : RawAudioEncoderWrapper(AudioEncoder* enc) : enc_(enc) {}
386 0 : int SampleRateHz() const override { return enc_->SampleRateHz(); }
387 0 : size_t NumChannels() const override { return enc_->NumChannels(); }
388 0 : int RtpTimestampRateHz() const override { return enc_->RtpTimestampRateHz(); }
389 0 : size_t Num10MsFramesInNextPacket() const override {
390 0 : return enc_->Num10MsFramesInNextPacket();
391 : }
392 0 : size_t Max10MsFramesInAPacket() const override {
393 0 : return enc_->Max10MsFramesInAPacket();
394 : }
395 0 : int GetTargetBitrate() const override { return enc_->GetTargetBitrate(); }
396 0 : EncodedInfo EncodeImpl(uint32_t rtp_timestamp,
397 : rtc::ArrayView<const int16_t> audio,
398 : rtc::Buffer* encoded) override {
399 0 : return enc_->Encode(rtp_timestamp, audio, encoded);
400 : }
401 0 : void Reset() override { return enc_->Reset(); }
402 0 : bool SetFec(bool enable) override { return enc_->SetFec(enable); }
403 0 : bool SetDtx(bool enable) override { return enc_->SetDtx(enable); }
404 0 : bool SetApplication(Application application) override {
405 0 : return enc_->SetApplication(application);
406 : }
407 0 : void SetMaxPlaybackRate(int frequency_hz) override {
408 0 : return enc_->SetMaxPlaybackRate(frequency_hz);
409 : }
410 :
411 : private:
412 : AudioEncoder* enc_;
413 : };
414 :
415 : // Return false on error.
416 0 : bool CreateSpeechEncoderIfNecessary(EncoderFactory* ef) {
417 0 : auto* sp = ef->codec_manager.GetStackParams();
418 0 : if (sp->speech_encoder) {
419 : // Do nothing; we already have a speech encoder.
420 0 : } else if (ef->codec_manager.GetCodecInst()) {
421 0 : RTC_DCHECK(!ef->external_speech_encoder);
422 : // We have no speech encoder, but we have a specification for making one.
423 : std::unique_ptr<AudioEncoder> enc =
424 0 : ef->rent_a_codec.RentEncoder(*ef->codec_manager.GetCodecInst());
425 0 : if (!enc)
426 0 : return false; // Encoder spec was bad.
427 0 : sp->speech_encoder = std::move(enc);
428 0 : } else if (ef->external_speech_encoder) {
429 0 : RTC_DCHECK(!ef->codec_manager.GetCodecInst());
430 : // We have an external speech encoder.
431 0 : sp->speech_encoder = std::unique_ptr<AudioEncoder>(
432 0 : new RawAudioEncoderWrapper(ef->external_speech_encoder));
433 : }
434 0 : return true;
435 : }
436 :
437 0 : void AudioCodingModuleImpl::ChangeLogger::MaybeLog(int value) {
438 0 : if (value != last_value_ || first_time_) {
439 0 : first_time_ = false;
440 0 : last_value_ = value;
441 0 : RTC_HISTOGRAM_COUNTS_SPARSE_100(histogram_name_, value);
442 : }
443 0 : }
444 :
445 0 : AudioCodingModuleImpl::AudioCodingModuleImpl(
446 0 : const AudioCodingModule::Config& config)
447 0 : : id_(config.id),
448 : expected_codec_ts_(0xD87F3F9F),
449 : expected_in_ts_(0xD87F3F9F),
450 : receiver_(config),
451 : bitrate_logger_("WebRTC.Audio.TargetBitrateInKbps"),
452 0 : encoder_factory_(new EncoderFactory),
453 : encoder_stack_(nullptr),
454 : previous_pltype_(255),
455 : receiver_initialized_(false),
456 : first_10ms_data_(false),
457 : first_frame_(true),
458 : packetization_callback_(NULL),
459 : vad_callback_(NULL),
460 : codec_histogram_bins_log_(),
461 0 : number_of_consecutive_empty_packets_(0) {
462 0 : if (InitializeReceiverSafe() < 0) {
463 : WEBRTC_TRACE(webrtc::kTraceError, webrtc::kTraceAudioCoding, id_,
464 : "Cannot initialize receiver");
465 : }
466 : WEBRTC_TRACE(webrtc::kTraceMemory, webrtc::kTraceAudioCoding, id_, "Created");
467 0 : }
468 :
469 : AudioCodingModuleImpl::~AudioCodingModuleImpl() = default;
470 :
471 0 : int32_t AudioCodingModuleImpl::Encode(const InputData& input_data) {
472 0 : AudioEncoder::EncodedInfo encoded_info;
473 : uint8_t previous_pltype;
474 :
475 : // Check if there is an encoder before.
476 0 : if (!HaveValidEncoder("Process"))
477 0 : return -1;
478 :
479 0 : if(!first_frame_) {
480 0 : RTC_DCHECK(IsNewerTimestamp(input_data.input_timestamp, last_timestamp_))
481 0 : << "Time should not move backwards";
482 : }
483 :
484 : // Scale the timestamp to the codec's RTP timestamp rate.
485 : uint32_t rtp_timestamp =
486 0 : first_frame_ ? input_data.input_timestamp
487 0 : : last_rtp_timestamp_ +
488 0 : rtc::CheckedDivExact(
489 0 : input_data.input_timestamp - last_timestamp_,
490 0 : static_cast<uint32_t>(rtc::CheckedDivExact(
491 0 : encoder_stack_->SampleRateHz(),
492 0 : encoder_stack_->RtpTimestampRateHz())));
493 0 : last_timestamp_ = input_data.input_timestamp;
494 0 : last_rtp_timestamp_ = rtp_timestamp;
495 0 : first_frame_ = false;
496 :
497 : // Clear the buffer before reuse - encoded data will get appended.
498 0 : encode_buffer_.Clear();
499 0 : encoded_info = encoder_stack_->Encode(
500 : rtp_timestamp, rtc::ArrayView<const int16_t>(
501 0 : input_data.audio, input_data.audio_channel *
502 0 : input_data.length_per_channel),
503 0 : &encode_buffer_);
504 :
505 0 : bitrate_logger_.MaybeLog(encoder_stack_->GetTargetBitrate() / 1000);
506 0 : if (encode_buffer_.size() == 0 && !encoded_info.send_even_if_empty) {
507 : // Not enough data.
508 0 : return 0;
509 : }
510 0 : previous_pltype = previous_pltype_; // Read it while we have the critsect.
511 :
512 : // Log codec type to histogram once every 500 packets.
513 0 : if (encoded_info.encoded_bytes == 0) {
514 0 : ++number_of_consecutive_empty_packets_;
515 : } else {
516 0 : size_t codec_type = static_cast<size_t>(encoded_info.encoder_type);
517 0 : codec_histogram_bins_log_[codec_type] +=
518 0 : number_of_consecutive_empty_packets_ + 1;
519 0 : number_of_consecutive_empty_packets_ = 0;
520 0 : if (codec_histogram_bins_log_[codec_type] >= 500) {
521 0 : codec_histogram_bins_log_[codec_type] -= 500;
522 0 : UpdateCodecTypeHistogram(codec_type);
523 : }
524 : }
525 :
526 0 : RTPFragmentationHeader my_fragmentation;
527 0 : ConvertEncodedInfoToFragmentationHeader(encoded_info, &my_fragmentation);
528 : FrameType frame_type;
529 0 : if (encode_buffer_.size() == 0 && encoded_info.send_even_if_empty) {
530 0 : frame_type = kEmptyFrame;
531 0 : encoded_info.payload_type = previous_pltype;
532 : } else {
533 0 : RTC_DCHECK_GT(encode_buffer_.size(), 0);
534 0 : frame_type = encoded_info.speech ? kAudioFrameSpeech : kAudioFrameCN;
535 : }
536 :
537 : {
538 0 : rtc::CritScope lock(&callback_crit_sect_);
539 0 : if (packetization_callback_) {
540 0 : packetization_callback_->SendData(
541 0 : frame_type, encoded_info.payload_type, encoded_info.encoded_timestamp,
542 0 : encode_buffer_.data(), encode_buffer_.size(),
543 0 : my_fragmentation.fragmentationVectorSize > 0 ? &my_fragmentation
544 0 : : nullptr);
545 : }
546 :
547 0 : if (vad_callback_) {
548 : // Callback with VAD decision.
549 0 : vad_callback_->InFrameType(frame_type);
550 : }
551 : }
552 0 : previous_pltype_ = encoded_info.payload_type;
553 0 : return static_cast<int32_t>(encode_buffer_.size());
554 : }
555 :
556 : /////////////////////////////////////////
557 : // Sender
558 : //
559 :
560 : // Can be called multiple times for Codec, CNG, RED.
561 0 : int AudioCodingModuleImpl::RegisterSendCodec(const CodecInst& send_codec) {
562 0 : rtc::CritScope lock(&acm_crit_sect_);
563 0 : if (!encoder_factory_->codec_manager.RegisterEncoder(send_codec)) {
564 0 : return -1;
565 : }
566 0 : if (encoder_factory_->codec_manager.GetCodecInst()) {
567 0 : encoder_factory_->external_speech_encoder = nullptr;
568 : }
569 0 : if (!CreateSpeechEncoderIfNecessary(encoder_factory_.get())) {
570 0 : return -1;
571 : }
572 0 : auto* sp = encoder_factory_->codec_manager.GetStackParams();
573 0 : if (sp->speech_encoder)
574 0 : encoder_stack_ = encoder_factory_->rent_a_codec.RentEncoderStack(sp);
575 0 : return 0;
576 : }
577 :
578 0 : void AudioCodingModuleImpl::RegisterExternalSendCodec(
579 : AudioEncoder* external_speech_encoder) {
580 0 : rtc::CritScope lock(&acm_crit_sect_);
581 0 : encoder_factory_->codec_manager.UnsetCodecInst();
582 0 : encoder_factory_->external_speech_encoder = external_speech_encoder;
583 0 : RTC_CHECK(CreateSpeechEncoderIfNecessary(encoder_factory_.get()));
584 0 : auto* sp = encoder_factory_->codec_manager.GetStackParams();
585 0 : RTC_CHECK(sp->speech_encoder);
586 0 : encoder_stack_ = encoder_factory_->rent_a_codec.RentEncoderStack(sp);
587 0 : }
588 :
589 0 : void AudioCodingModuleImpl::ModifyEncoder(
590 : rtc::FunctionView<void(std::unique_ptr<AudioEncoder>*)> modifier) {
591 0 : rtc::CritScope lock(&acm_crit_sect_);
592 :
593 : // Wipe the encoder factory, so that everything that relies on it will fail.
594 : // We don't want the complexity of supporting swapping back and forth.
595 0 : if (encoder_factory_) {
596 0 : encoder_factory_.reset();
597 0 : RTC_CHECK(!encoder_stack_); // Ensure we hadn't started using the factory.
598 : }
599 :
600 0 : modifier(&encoder_stack_);
601 0 : }
602 :
603 0 : void AudioCodingModuleImpl::QueryEncoder(
604 : rtc::FunctionView<void(const AudioEncoder*)> query) {
605 0 : rtc::CritScope lock(&acm_crit_sect_);
606 0 : query(encoder_stack_.get());
607 0 : }
608 :
609 : // Get current send codec.
610 0 : rtc::Optional<CodecInst> AudioCodingModuleImpl::SendCodec() const {
611 0 : rtc::CritScope lock(&acm_crit_sect_);
612 0 : if (encoder_factory_) {
613 0 : auto* ci = encoder_factory_->codec_manager.GetCodecInst();
614 0 : if (ci) {
615 0 : return rtc::Optional<CodecInst>(*ci);
616 : }
617 0 : CreateSpeechEncoderIfNecessary(encoder_factory_.get());
618 : const std::unique_ptr<AudioEncoder>& enc =
619 0 : encoder_factory_->codec_manager.GetStackParams()->speech_encoder;
620 0 : if (enc) {
621 : return rtc::Optional<CodecInst>(
622 0 : acm2::CodecManager::ForgeCodecInst(enc.get()));
623 : }
624 0 : return rtc::Optional<CodecInst>();
625 : } else {
626 : return encoder_stack_
627 : ? rtc::Optional<CodecInst>(
628 0 : acm2::CodecManager::ForgeCodecInst(encoder_stack_.get()))
629 0 : : rtc::Optional<CodecInst>();
630 : }
631 : }
632 :
633 : // Get current send frequency.
634 0 : int AudioCodingModuleImpl::SendFrequency() const {
635 : WEBRTC_TRACE(webrtc::kTraceStream, webrtc::kTraceAudioCoding, id_,
636 : "SendFrequency()");
637 0 : rtc::CritScope lock(&acm_crit_sect_);
638 :
639 0 : if (!encoder_stack_) {
640 : WEBRTC_TRACE(webrtc::kTraceStream, webrtc::kTraceAudioCoding, id_,
641 : "SendFrequency Failed, no codec is registered");
642 0 : return -1;
643 : }
644 :
645 0 : return encoder_stack_->SampleRateHz();
646 : }
647 :
648 0 : void AudioCodingModuleImpl::SetBitRate(int bitrate_bps) {
649 0 : rtc::CritScope lock(&acm_crit_sect_);
650 0 : if (encoder_stack_) {
651 0 : encoder_stack_->OnReceivedUplinkBandwidth(bitrate_bps,
652 0 : rtc::Optional<int64_t>());
653 : }
654 0 : }
655 :
656 : // Register a transport callback which will be called to deliver
657 : // the encoded buffers.
658 0 : int AudioCodingModuleImpl::RegisterTransportCallback(
659 : AudioPacketizationCallback* transport) {
660 0 : rtc::CritScope lock(&callback_crit_sect_);
661 0 : packetization_callback_ = transport;
662 0 : return 0;
663 : }
664 :
665 : // Add 10MS of raw (PCM) audio data to the encoder.
666 0 : int AudioCodingModuleImpl::Add10MsData(const AudioFrame& audio_frame) {
667 : InputData input_data;
668 0 : rtc::CritScope lock(&acm_crit_sect_);
669 0 : int r = Add10MsDataInternal(audio_frame, &input_data);
670 0 : return r < 0 ? r : Encode(input_data);
671 : }
672 :
673 0 : int AudioCodingModuleImpl::Add10MsDataInternal(const AudioFrame& audio_frame,
674 : InputData* input_data) {
675 0 : if (audio_frame.samples_per_channel_ == 0) {
676 0 : assert(false);
677 : WEBRTC_TRACE(webrtc::kTraceError, webrtc::kTraceAudioCoding, id_,
678 : "Cannot Add 10 ms audio, payload length is zero");
679 : return -1;
680 : }
681 :
682 0 : if (audio_frame.sample_rate_hz_ > 48000) {
683 0 : assert(false);
684 : WEBRTC_TRACE(webrtc::kTraceError, webrtc::kTraceAudioCoding, id_,
685 : "Cannot Add 10 ms audio, input frequency not valid");
686 : return -1;
687 : }
688 :
689 : // If the length and frequency matches. We currently just support raw PCM.
690 0 : if (static_cast<size_t>(audio_frame.sample_rate_hz_ / 100) !=
691 0 : audio_frame.samples_per_channel_) {
692 : WEBRTC_TRACE(webrtc::kTraceError, webrtc::kTraceAudioCoding, id_,
693 : "Cannot Add 10 ms audio, input frequency and length doesn't"
694 : " match");
695 0 : return -1;
696 : }
697 :
698 0 : if (audio_frame.num_channels_ != 1 && audio_frame.num_channels_ != 2) {
699 : WEBRTC_TRACE(webrtc::kTraceError, webrtc::kTraceAudioCoding, id_,
700 : "Cannot Add 10 ms audio, invalid number of channels.");
701 0 : return -1;
702 : }
703 :
704 : // Do we have a codec registered?
705 0 : if (!HaveValidEncoder("Add10MsData")) {
706 0 : return -1;
707 : }
708 :
709 : const AudioFrame* ptr_frame;
710 : // Perform a resampling, also down-mix if it is required and can be
711 : // performed before resampling (a down mix prior to resampling will take
712 : // place if both primary and secondary encoders are mono and input is in
713 : // stereo).
714 0 : if (PreprocessToAddData(audio_frame, &ptr_frame) < 0) {
715 0 : return -1;
716 : }
717 :
718 : // Check whether we need an up-mix or down-mix?
719 0 : const size_t current_num_channels = encoder_stack_->NumChannels();
720 : const bool same_num_channels =
721 0 : ptr_frame->num_channels_ == current_num_channels;
722 :
723 0 : if (!same_num_channels) {
724 0 : if (ptr_frame->num_channels_ == 1) {
725 0 : if (UpMix(*ptr_frame, WEBRTC_10MS_PCM_AUDIO, input_data->buffer) < 0)
726 0 : return -1;
727 : } else {
728 0 : if (DownMix(*ptr_frame, WEBRTC_10MS_PCM_AUDIO, input_data->buffer) < 0)
729 0 : return -1;
730 : }
731 : }
732 :
733 : // When adding data to encoders this pointer is pointing to an audio buffer
734 : // with correct number of channels.
735 0 : const int16_t* ptr_audio = ptr_frame->data_;
736 :
737 : // For pushing data to primary, point the |ptr_audio| to correct buffer.
738 0 : if (!same_num_channels)
739 0 : ptr_audio = input_data->buffer;
740 :
741 0 : input_data->input_timestamp = ptr_frame->timestamp_;
742 0 : input_data->audio = ptr_audio;
743 0 : input_data->length_per_channel = ptr_frame->samples_per_channel_;
744 0 : input_data->audio_channel = current_num_channels;
745 :
746 0 : return 0;
747 : }
748 :
749 : // Perform a resampling and down-mix if required. We down-mix only if
750 : // encoder is mono and input is stereo. In case of dual-streaming, both
751 : // encoders has to be mono for down-mix to take place.
752 : // |*ptr_out| will point to the pre-processed audio-frame. If no pre-processing
753 : // is required, |*ptr_out| points to |in_frame|.
754 0 : int AudioCodingModuleImpl::PreprocessToAddData(const AudioFrame& in_frame,
755 : const AudioFrame** ptr_out) {
756 : const bool resample =
757 0 : in_frame.sample_rate_hz_ != encoder_stack_->SampleRateHz();
758 :
759 : // This variable is true if primary codec and secondary codec (if exists)
760 : // are both mono and input is stereo.
761 : // TODO(henrik.lundin): This condition should probably be
762 : // in_frame.num_channels_ > encoder_stack_->NumChannels()
763 : const bool down_mix =
764 0 : in_frame.num_channels_ == 2 && encoder_stack_->NumChannels() == 1;
765 :
766 0 : if (!first_10ms_data_) {
767 0 : expected_in_ts_ = in_frame.timestamp_;
768 0 : expected_codec_ts_ = in_frame.timestamp_;
769 0 : first_10ms_data_ = true;
770 0 : } else if (in_frame.timestamp_ != expected_in_ts_) {
771 0 : LOG(LS_WARNING) << "Unexpected input timestamp: " << in_frame.timestamp_
772 0 : << ", expected: " << expected_in_ts_;
773 0 : expected_codec_ts_ +=
774 0 : (in_frame.timestamp_ - expected_in_ts_) *
775 0 : static_cast<uint32_t>(
776 0 : static_cast<double>(encoder_stack_->SampleRateHz()) /
777 0 : static_cast<double>(in_frame.sample_rate_hz_));
778 0 : expected_in_ts_ = in_frame.timestamp_;
779 : }
780 :
781 :
782 0 : if (!down_mix && !resample) {
783 : // No pre-processing is required.
784 0 : if (expected_in_ts_ == expected_codec_ts_) {
785 : // If we've never resampled, we can use the input frame as-is
786 0 : *ptr_out = &in_frame;
787 : } else {
788 : // Otherwise we'll need to alter the timestamp. Since in_frame is const,
789 : // we'll have to make a copy of it.
790 0 : preprocess_frame_.CopyFrom(in_frame);
791 0 : preprocess_frame_.timestamp_ = expected_codec_ts_;
792 0 : *ptr_out = &preprocess_frame_;
793 : }
794 :
795 0 : expected_in_ts_ += static_cast<uint32_t>(in_frame.samples_per_channel_);
796 0 : expected_codec_ts_ += static_cast<uint32_t>(in_frame.samples_per_channel_);
797 0 : return 0;
798 : }
799 :
800 0 : *ptr_out = &preprocess_frame_;
801 0 : preprocess_frame_.num_channels_ = in_frame.num_channels_;
802 : int16_t audio[WEBRTC_10MS_PCM_AUDIO];
803 0 : const int16_t* src_ptr_audio = in_frame.data_;
804 0 : int16_t* dest_ptr_audio = preprocess_frame_.data_;
805 0 : if (down_mix) {
806 : // If a resampling is required the output of a down-mix is written into a
807 : // local buffer, otherwise, it will be written to the output frame.
808 0 : if (resample)
809 0 : dest_ptr_audio = audio;
810 0 : if (DownMix(in_frame, WEBRTC_10MS_PCM_AUDIO, dest_ptr_audio) < 0)
811 0 : return -1;
812 0 : preprocess_frame_.num_channels_ = 1;
813 : // Set the input of the resampler is the down-mixed signal.
814 0 : src_ptr_audio = audio;
815 : }
816 :
817 0 : preprocess_frame_.timestamp_ = expected_codec_ts_;
818 0 : preprocess_frame_.samples_per_channel_ = in_frame.samples_per_channel_;
819 0 : preprocess_frame_.sample_rate_hz_ = in_frame.sample_rate_hz_;
820 : // If it is required, we have to do a resampling.
821 0 : if (resample) {
822 : // The result of the resampler is written to output frame.
823 0 : dest_ptr_audio = preprocess_frame_.data_;
824 :
825 0 : int samples_per_channel = resampler_.Resample10Msec(
826 0 : src_ptr_audio, in_frame.sample_rate_hz_, encoder_stack_->SampleRateHz(),
827 : preprocess_frame_.num_channels_, AudioFrame::kMaxDataSizeSamples,
828 0 : dest_ptr_audio);
829 :
830 0 : if (samples_per_channel < 0) {
831 : WEBRTC_TRACE(webrtc::kTraceError, webrtc::kTraceAudioCoding, id_,
832 : "Cannot add 10 ms audio, resampling failed");
833 0 : return -1;
834 : }
835 0 : preprocess_frame_.samples_per_channel_ =
836 0 : static_cast<size_t>(samples_per_channel);
837 0 : preprocess_frame_.sample_rate_hz_ = encoder_stack_->SampleRateHz();
838 : }
839 :
840 0 : expected_codec_ts_ +=
841 0 : static_cast<uint32_t>(preprocess_frame_.samples_per_channel_);
842 0 : expected_in_ts_ += static_cast<uint32_t>(in_frame.samples_per_channel_);
843 :
844 0 : return 0;
845 : }
846 :
847 : /////////////////////////////////////////
848 : // (RED) Redundant Coding
849 : //
850 :
851 0 : bool AudioCodingModuleImpl::REDStatus() const {
852 0 : rtc::CritScope lock(&acm_crit_sect_);
853 0 : return encoder_factory_->codec_manager.GetStackParams()->use_red;
854 : }
855 :
856 : // Configure RED status i.e on/off.
857 0 : int AudioCodingModuleImpl::SetREDStatus(bool enable_red) {
858 : #ifdef WEBRTC_CODEC_RED
859 : rtc::CritScope lock(&acm_crit_sect_);
860 : CreateSpeechEncoderIfNecessary(encoder_factory_.get());
861 : if (!encoder_factory_->codec_manager.SetCopyRed(enable_red)) {
862 : return -1;
863 : }
864 : auto* sp = encoder_factory_->codec_manager.GetStackParams();
865 : if (sp->speech_encoder)
866 : encoder_stack_ = encoder_factory_->rent_a_codec.RentEncoderStack(sp);
867 : return 0;
868 : #else
869 : WEBRTC_TRACE(webrtc::kTraceWarning, webrtc::kTraceAudioCoding, id_,
870 : " WEBRTC_CODEC_RED is undefined");
871 0 : return -1;
872 : #endif
873 : }
874 :
875 : /////////////////////////////////////////
876 : // (FEC) Forward Error Correction (codec internal)
877 : //
878 :
879 0 : bool AudioCodingModuleImpl::CodecFEC() const {
880 0 : rtc::CritScope lock(&acm_crit_sect_);
881 0 : return encoder_factory_->codec_manager.GetStackParams()->use_codec_fec;
882 : }
883 :
884 0 : int AudioCodingModuleImpl::SetCodecFEC(bool enable_codec_fec) {
885 0 : rtc::CritScope lock(&acm_crit_sect_);
886 0 : CreateSpeechEncoderIfNecessary(encoder_factory_.get());
887 0 : if (!encoder_factory_->codec_manager.SetCodecFEC(enable_codec_fec)) {
888 0 : return -1;
889 : }
890 0 : auto* sp = encoder_factory_->codec_manager.GetStackParams();
891 0 : if (sp->speech_encoder)
892 0 : encoder_stack_ = encoder_factory_->rent_a_codec.RentEncoderStack(sp);
893 0 : if (enable_codec_fec) {
894 0 : return sp->use_codec_fec ? 0 : -1;
895 : } else {
896 0 : RTC_DCHECK(!sp->use_codec_fec);
897 0 : return 0;
898 : }
899 : }
900 :
901 0 : int AudioCodingModuleImpl::SetPacketLossRate(int loss_rate) {
902 0 : rtc::CritScope lock(&acm_crit_sect_);
903 0 : if (HaveValidEncoder("SetPacketLossRate")) {
904 0 : encoder_stack_->OnReceivedUplinkPacketLossFraction(loss_rate / 100.0);
905 : }
906 0 : return 0;
907 : }
908 :
909 : /////////////////////////////////////////
910 : // (VAD) Voice Activity Detection
911 : //
912 0 : int AudioCodingModuleImpl::SetVAD(bool enable_dtx,
913 : bool enable_vad,
914 : ACMVADMode mode) {
915 : // Note: |enable_vad| is not used; VAD is enabled based on the DTX setting.
916 0 : RTC_DCHECK_EQ(enable_dtx, enable_vad);
917 0 : rtc::CritScope lock(&acm_crit_sect_);
918 0 : CreateSpeechEncoderIfNecessary(encoder_factory_.get());
919 0 : if (!encoder_factory_->codec_manager.SetVAD(enable_dtx, mode)) {
920 0 : return -1;
921 : }
922 0 : auto* sp = encoder_factory_->codec_manager.GetStackParams();
923 0 : if (sp->speech_encoder)
924 0 : encoder_stack_ = encoder_factory_->rent_a_codec.RentEncoderStack(sp);
925 0 : return 0;
926 : }
927 :
928 : // Get VAD/DTX settings.
929 0 : int AudioCodingModuleImpl::VAD(bool* dtx_enabled, bool* vad_enabled,
930 : ACMVADMode* mode) const {
931 0 : rtc::CritScope lock(&acm_crit_sect_);
932 0 : const auto* sp = encoder_factory_->codec_manager.GetStackParams();
933 0 : *dtx_enabled = *vad_enabled = sp->use_cng;
934 0 : *mode = sp->vad_mode;
935 0 : return 0;
936 : }
937 :
938 : /////////////////////////////////////////
939 : // Receiver
940 : //
941 :
942 0 : int AudioCodingModuleImpl::InitializeReceiver() {
943 0 : rtc::CritScope lock(&acm_crit_sect_);
944 0 : return InitializeReceiverSafe();
945 : }
946 :
947 : // Initialize receiver, resets codec database etc.
948 0 : int AudioCodingModuleImpl::InitializeReceiverSafe() {
949 : // If the receiver is already initialized then we want to destroy any
950 : // existing decoders. After a call to this function, we should have a clean
951 : // start-up.
952 0 : if (receiver_initialized_)
953 0 : receiver_.RemoveAllCodecs();
954 0 : receiver_.ResetInitialDelay();
955 0 : receiver_.SetMinimumDelay(0);
956 0 : receiver_.SetMaximumDelay(0);
957 0 : receiver_.FlushBuffers();
958 :
959 : // Register RED and CN.
960 0 : auto db = acm2::RentACodec::Database();
961 0 : for (size_t i = 0; i < db.size(); i++) {
962 0 : if (IsCodecRED(db[i]) || IsCodecCN(db[i])) {
963 0 : if (receiver_.AddCodec(static_cast<int>(i),
964 0 : static_cast<uint8_t>(db[i].pltype), 1,
965 0 : db[i].plfreq, nullptr, db[i].plname) < 0) {
966 : WEBRTC_TRACE(webrtc::kTraceError, webrtc::kTraceAudioCoding, id_,
967 : "Cannot register master codec.");
968 0 : return -1;
969 : }
970 : }
971 : }
972 0 : receiver_initialized_ = true;
973 0 : return 0;
974 : }
975 :
976 : // Get current receive frequency.
977 0 : int AudioCodingModuleImpl::ReceiveFrequency() const {
978 0 : const auto last_packet_sample_rate = receiver_.last_packet_sample_rate_hz();
979 0 : return last_packet_sample_rate ? *last_packet_sample_rate
980 0 : : receiver_.last_output_sample_rate_hz();
981 : }
982 :
983 : // Get current playout frequency.
984 0 : int AudioCodingModuleImpl::PlayoutFrequency() const {
985 : WEBRTC_TRACE(webrtc::kTraceStream, webrtc::kTraceAudioCoding, id_,
986 : "PlayoutFrequency()");
987 0 : return receiver_.last_output_sample_rate_hz();
988 : }
989 :
990 0 : bool AudioCodingModuleImpl::RegisterReceiveCodec(
991 : int rtp_payload_type,
992 : const SdpAudioFormat& audio_format) {
993 0 : rtc::CritScope lock(&acm_crit_sect_);
994 0 : RTC_DCHECK(receiver_initialized_);
995 :
996 0 : if (!acm2::RentACodec::IsPayloadTypeValid(rtp_payload_type)) {
997 0 : LOG_F(LS_ERROR) << "Invalid payload-type " << rtp_payload_type
998 0 : << " for decoder.";
999 0 : return false;
1000 : }
1001 :
1002 0 : return receiver_.AddCodec(rtp_payload_type, audio_format);
1003 : }
1004 :
1005 0 : int AudioCodingModuleImpl::RegisterReceiveCodec(const CodecInst& codec) {
1006 0 : rtc::CritScope lock(&acm_crit_sect_);
1007 0 : auto* ef = encoder_factory_.get();
1008 0 : return RegisterReceiveCodecUnlocked(
1009 0 : codec, [&] { return ef->rent_a_codec.RentIsacDecoder(codec.plfreq); });
1010 : }
1011 :
1012 0 : int AudioCodingModuleImpl::RegisterReceiveCodec(
1013 : const CodecInst& codec,
1014 : rtc::FunctionView<std::unique_ptr<AudioDecoder>()> isac_factory) {
1015 0 : rtc::CritScope lock(&acm_crit_sect_);
1016 0 : return RegisterReceiveCodecUnlocked(codec, isac_factory);
1017 : }
1018 :
1019 0 : int AudioCodingModuleImpl::RegisterReceiveCodecUnlocked(
1020 : const CodecInst& codec,
1021 : rtc::FunctionView<std::unique_ptr<AudioDecoder>()> isac_factory) {
1022 0 : RTC_DCHECK(receiver_initialized_);
1023 0 : if (codec.channels > 2) {
1024 0 : LOG_F(LS_ERROR) << "Unsupported number of channels: " << codec.channels;
1025 0 : return -1;
1026 : }
1027 :
1028 0 : auto codec_id = acm2::RentACodec::CodecIdByParams(codec.plname, codec.plfreq,
1029 0 : codec.channels);
1030 0 : if (!codec_id) {
1031 0 : LOG_F(LS_ERROR) << "Wrong codec params to be registered as receive codec";
1032 0 : return -1;
1033 : }
1034 0 : auto codec_index = acm2::RentACodec::CodecIndexFromId(*codec_id);
1035 0 : RTC_CHECK(codec_index) << "Invalid codec ID: " << static_cast<int>(*codec_id);
1036 :
1037 : // Check if the payload-type is valid.
1038 0 : if (!acm2::RentACodec::IsPayloadTypeValid(codec.pltype)) {
1039 0 : LOG_F(LS_ERROR) << "Invalid payload type " << codec.pltype << " for "
1040 0 : << codec.plname;
1041 0 : return -1;
1042 : }
1043 :
1044 0 : AudioDecoder* isac_decoder = nullptr;
1045 0 : if (STR_CASE_CMP(codec.plname, "isac") == 0) {
1046 : std::unique_ptr<AudioDecoder>& saved_isac_decoder =
1047 0 : codec.plfreq == 16000 ? isac_decoder_16k_ : isac_decoder_32k_;
1048 0 : if (!saved_isac_decoder) {
1049 0 : saved_isac_decoder = isac_factory();
1050 : }
1051 0 : isac_decoder = saved_isac_decoder.get();
1052 : }
1053 0 : return receiver_.AddCodec(*codec_index, codec.pltype, codec.channels,
1054 0 : codec.plfreq, isac_decoder, codec.plname);
1055 : }
1056 :
1057 0 : int AudioCodingModuleImpl::RegisterExternalReceiveCodec(
1058 : int rtp_payload_type,
1059 : AudioDecoder* external_decoder,
1060 : int sample_rate_hz,
1061 : int num_channels,
1062 : const std::string& name) {
1063 0 : rtc::CritScope lock(&acm_crit_sect_);
1064 0 : RTC_DCHECK(receiver_initialized_);
1065 0 : if (num_channels > 2 || num_channels < 0) {
1066 0 : LOG_F(LS_ERROR) << "Unsupported number of channels: " << num_channels;
1067 0 : return -1;
1068 : }
1069 :
1070 : // Check if the payload-type is valid.
1071 0 : if (!acm2::RentACodec::IsPayloadTypeValid(rtp_payload_type)) {
1072 0 : LOG_F(LS_ERROR) << "Invalid payload-type " << rtp_payload_type
1073 0 : << " for external decoder.";
1074 0 : return -1;
1075 : }
1076 :
1077 0 : return receiver_.AddCodec(-1 /* external */, rtp_payload_type, num_channels,
1078 0 : sample_rate_hz, external_decoder, name);
1079 : }
1080 :
1081 : // Get current received codec.
1082 0 : int AudioCodingModuleImpl::ReceiveCodec(CodecInst* current_codec) const {
1083 0 : rtc::CritScope lock(&acm_crit_sect_);
1084 0 : return receiver_.LastAudioCodec(current_codec);
1085 : }
1086 :
1087 0 : rtc::Optional<SdpAudioFormat> AudioCodingModuleImpl::ReceiveFormat() const {
1088 0 : rtc::CritScope lock(&acm_crit_sect_);
1089 0 : return receiver_.LastAudioFormat();
1090 : }
1091 :
1092 : // Incoming packet from network parsed and ready for decode.
1093 0 : int AudioCodingModuleImpl::IncomingPacket(const uint8_t* incoming_payload,
1094 : const size_t payload_length,
1095 : const WebRtcRTPHeader& rtp_header) {
1096 0 : return receiver_.InsertPacket(
1097 : rtp_header,
1098 0 : rtc::ArrayView<const uint8_t>(incoming_payload, payload_length));
1099 : }
1100 :
1101 : // Minimum playout delay (Used for lip-sync).
1102 0 : int AudioCodingModuleImpl::SetMinimumPlayoutDelay(int time_ms) {
1103 0 : if ((time_ms < 0) || (time_ms > 10000)) {
1104 : WEBRTC_TRACE(webrtc::kTraceError, webrtc::kTraceAudioCoding, id_,
1105 : "Delay must be in the range of 0-1000 milliseconds.");
1106 0 : return -1;
1107 : }
1108 0 : return receiver_.SetMinimumDelay(time_ms);
1109 : }
1110 :
1111 0 : int AudioCodingModuleImpl::SetMaximumPlayoutDelay(int time_ms) {
1112 0 : if ((time_ms < 0) || (time_ms > 10000)) {
1113 : WEBRTC_TRACE(webrtc::kTraceError, webrtc::kTraceAudioCoding, id_,
1114 : "Delay must be in the range of 0-1000 milliseconds.");
1115 0 : return -1;
1116 : }
1117 0 : return receiver_.SetMaximumDelay(time_ms);
1118 : }
1119 :
1120 : // Get 10 milliseconds of raw audio data to play out.
1121 : // Automatic resample to the requested frequency.
1122 0 : int AudioCodingModuleImpl::PlayoutData10Ms(int desired_freq_hz,
1123 : AudioFrame* audio_frame,
1124 : bool* muted) {
1125 : // GetAudio always returns 10 ms, at the requested sample rate.
1126 0 : if (receiver_.GetAudio(desired_freq_hz, audio_frame, muted) != 0) {
1127 : WEBRTC_TRACE(webrtc::kTraceError, webrtc::kTraceAudioCoding, id_,
1128 : "PlayoutData failed, RecOut Failed");
1129 0 : return -1;
1130 : }
1131 0 : audio_frame->id_ = id_;
1132 0 : return 0;
1133 : }
1134 :
1135 0 : int AudioCodingModuleImpl::PlayoutData10Ms(int desired_freq_hz,
1136 : AudioFrame* audio_frame) {
1137 : bool muted;
1138 0 : int ret = PlayoutData10Ms(desired_freq_hz, audio_frame, &muted);
1139 0 : RTC_DCHECK(!muted);
1140 0 : return ret;
1141 : }
1142 :
1143 : /////////////////////////////////////////
1144 : // Statistics
1145 : //
1146 :
1147 : // TODO(turajs) change the return value to void. Also change the corresponding
1148 : // NetEq function.
1149 0 : int AudioCodingModuleImpl::GetNetworkStatistics(NetworkStatistics* statistics) {
1150 0 : receiver_.GetNetworkStatistics(statistics);
1151 0 : return 0;
1152 : }
1153 :
1154 0 : int AudioCodingModuleImpl::RegisterVADCallback(ACMVADCallback* vad_callback) {
1155 : WEBRTC_TRACE(webrtc::kTraceDebug, webrtc::kTraceAudioCoding, id_,
1156 : "RegisterVADCallback()");
1157 0 : rtc::CritScope lock(&callback_crit_sect_);
1158 0 : vad_callback_ = vad_callback;
1159 0 : return 0;
1160 : }
1161 :
1162 : // TODO(kwiberg): Remove this method, and have callers call IncomingPacket
1163 : // instead. The translation logic and state belong with them, not with
1164 : // AudioCodingModuleImpl.
1165 0 : int AudioCodingModuleImpl::IncomingPayload(const uint8_t* incoming_payload,
1166 : size_t payload_length,
1167 : uint8_t payload_type,
1168 : uint32_t timestamp) {
1169 : // We are not acquiring any lock when interacting with |aux_rtp_header_| no
1170 : // other method uses this member variable.
1171 0 : if (!aux_rtp_header_) {
1172 : // This is the first time that we are using |dummy_rtp_header_|
1173 : // so we have to create it.
1174 0 : aux_rtp_header_.reset(new WebRtcRTPHeader);
1175 0 : aux_rtp_header_->header.payloadType = payload_type;
1176 : // Don't matter in this case.
1177 0 : aux_rtp_header_->header.ssrc = 0;
1178 0 : aux_rtp_header_->header.markerBit = false;
1179 : // Start with random numbers.
1180 0 : aux_rtp_header_->header.sequenceNumber = 0x1234; // Arbitrary.
1181 0 : aux_rtp_header_->type.Audio.channel = 1;
1182 : }
1183 :
1184 0 : aux_rtp_header_->header.timestamp = timestamp;
1185 0 : IncomingPacket(incoming_payload, payload_length, *aux_rtp_header_);
1186 : // Get ready for the next payload.
1187 0 : aux_rtp_header_->header.sequenceNumber++;
1188 0 : return 0;
1189 : }
1190 :
1191 0 : int AudioCodingModuleImpl::SetOpusApplication(OpusApplicationMode application) {
1192 0 : rtc::CritScope lock(&acm_crit_sect_);
1193 0 : if (!HaveValidEncoder("SetOpusApplication")) {
1194 0 : return -1;
1195 : }
1196 : AudioEncoder::Application app;
1197 0 : switch (application) {
1198 : case kVoip:
1199 0 : app = AudioEncoder::Application::kSpeech;
1200 0 : break;
1201 : case kAudio:
1202 0 : app = AudioEncoder::Application::kAudio;
1203 0 : break;
1204 : default:
1205 0 : FATAL();
1206 : return 0;
1207 : }
1208 0 : return encoder_stack_->SetApplication(app) ? 0 : -1;
1209 : }
1210 :
1211 : // Informs Opus encoder of the maximum playback rate the receiver will render.
1212 0 : int AudioCodingModuleImpl::SetOpusMaxPlaybackRate(int frequency_hz) {
1213 0 : rtc::CritScope lock(&acm_crit_sect_);
1214 0 : if (!HaveValidEncoder("SetOpusMaxPlaybackRate")) {
1215 0 : return -1;
1216 : }
1217 0 : encoder_stack_->SetMaxPlaybackRate(frequency_hz);
1218 0 : return 0;
1219 : }
1220 :
1221 0 : int AudioCodingModuleImpl::EnableOpusDtx() {
1222 0 : rtc::CritScope lock(&acm_crit_sect_);
1223 0 : if (!HaveValidEncoder("EnableOpusDtx")) {
1224 0 : return -1;
1225 : }
1226 0 : return encoder_stack_->SetDtx(true) ? 0 : -1;
1227 : }
1228 :
1229 0 : int AudioCodingModuleImpl::DisableOpusDtx() {
1230 0 : rtc::CritScope lock(&acm_crit_sect_);
1231 0 : if (!HaveValidEncoder("DisableOpusDtx")) {
1232 0 : return -1;
1233 : }
1234 0 : return encoder_stack_->SetDtx(false) ? 0 : -1;
1235 : }
1236 :
1237 0 : int32_t AudioCodingModuleImpl::PlayoutTimestamp(uint32_t* timestamp) {
1238 0 : rtc::Optional<uint32_t> ts = PlayoutTimestamp();
1239 0 : if (!ts)
1240 0 : return -1;
1241 0 : *timestamp = *ts;
1242 0 : return 0;
1243 : }
1244 :
1245 0 : rtc::Optional<uint32_t> AudioCodingModuleImpl::PlayoutTimestamp() {
1246 0 : return receiver_.GetPlayoutTimestamp();
1247 : }
1248 :
1249 0 : int AudioCodingModuleImpl::FilteredCurrentDelayMs() const {
1250 0 : return receiver_.FilteredCurrentDelayMs();
1251 : }
1252 :
1253 0 : bool AudioCodingModuleImpl::HaveValidEncoder(const char* caller_name) const {
1254 0 : if (!encoder_stack_) {
1255 : WEBRTC_TRACE(webrtc::kTraceError, webrtc::kTraceAudioCoding, id_,
1256 : "%s failed: No send codec is registered.", caller_name);
1257 0 : return false;
1258 : }
1259 0 : return true;
1260 : }
1261 :
1262 0 : int AudioCodingModuleImpl::UnregisterReceiveCodec(uint8_t payload_type) {
1263 0 : return receiver_.RemoveCodec(payload_type);
1264 : }
1265 :
1266 0 : int AudioCodingModuleImpl::EnableNack(size_t max_nack_list_size) {
1267 0 : return receiver_.EnableNack(max_nack_list_size);
1268 : }
1269 :
1270 0 : void AudioCodingModuleImpl::DisableNack() {
1271 0 : receiver_.DisableNack();
1272 0 : }
1273 :
1274 0 : std::vector<uint16_t> AudioCodingModuleImpl::GetNackList(
1275 : int64_t round_trip_time_ms) const {
1276 0 : return receiver_.GetNackList(round_trip_time_ms);
1277 : }
1278 :
1279 0 : int AudioCodingModuleImpl::LeastRequiredDelayMs() const {
1280 0 : return receiver_.LeastRequiredDelayMs();
1281 : }
1282 :
1283 0 : void AudioCodingModuleImpl::GetDecodingCallStatistics(
1284 : AudioDecodingCallStats* call_stats) const {
1285 0 : receiver_.GetDecodingCallStatistics(call_stats);
1286 0 : }
1287 :
1288 : } // namespace
1289 :
1290 0 : AudioCodingModule::Config::Config()
1291 0 : : id(0), neteq_config(), clock(Clock::GetRealTimeClock()) {
1292 : // Post-decode VAD is disabled by default in NetEq, however, Audio
1293 : // Conference Mixer relies on VAD decisions and fails without them.
1294 0 : neteq_config.enable_post_decode_vad = true;
1295 0 : }
1296 :
1297 : AudioCodingModule::Config::Config(const Config&) = default;
1298 : AudioCodingModule::Config::~Config() = default;
1299 :
1300 : // Create module
1301 0 : AudioCodingModule* AudioCodingModule::Create(int id) {
1302 0 : Config config;
1303 0 : config.id = id;
1304 0 : config.clock = Clock::GetRealTimeClock();
1305 0 : config.decoder_factory = CreateBuiltinAudioDecoderFactory();
1306 0 : return Create(config);
1307 : }
1308 :
1309 0 : AudioCodingModule* AudioCodingModule::Create(int id, Clock* clock) {
1310 0 : Config config;
1311 0 : config.id = id;
1312 0 : config.clock = clock;
1313 0 : config.decoder_factory = CreateBuiltinAudioDecoderFactory();
1314 0 : return Create(config);
1315 : }
1316 :
1317 0 : AudioCodingModule* AudioCodingModule::Create(const Config& config) {
1318 0 : if (!config.decoder_factory) {
1319 : // TODO(ossu): Backwards compatibility. Will be removed after a deprecation
1320 : // cycle.
1321 0 : Config config_copy = config;
1322 0 : config_copy.decoder_factory = CreateBuiltinAudioDecoderFactory();
1323 0 : return new AudioCodingModuleImpl(config_copy);
1324 : }
1325 0 : return new AudioCodingModuleImpl(config);
1326 : }
1327 :
1328 0 : int AudioCodingModule::NumberOfCodecs() {
1329 0 : return static_cast<int>(acm2::RentACodec::NumberOfCodecs());
1330 : }
1331 :
1332 0 : int AudioCodingModule::Codec(int list_id, CodecInst* codec) {
1333 0 : auto codec_id = acm2::RentACodec::CodecIdFromIndex(list_id);
1334 0 : if (!codec_id)
1335 0 : return -1;
1336 0 : auto ci = acm2::RentACodec::CodecInstById(*codec_id);
1337 0 : if (!ci)
1338 0 : return -1;
1339 0 : *codec = *ci;
1340 0 : return 0;
1341 : }
1342 :
1343 0 : int AudioCodingModule::Codec(const char* payload_name,
1344 : CodecInst* codec,
1345 : int sampling_freq_hz,
1346 : size_t channels) {
1347 : rtc::Optional<CodecInst> ci = acm2::RentACodec::CodecInstByParams(
1348 0 : payload_name, sampling_freq_hz, channels);
1349 0 : if (ci) {
1350 0 : *codec = *ci;
1351 0 : return 0;
1352 : } else {
1353 : // We couldn't find a matching codec, so set the parameters to unacceptable
1354 : // values and return.
1355 0 : codec->plname[0] = '\0';
1356 0 : codec->pltype = -1;
1357 0 : codec->pacsize = 0;
1358 0 : codec->rate = 0;
1359 0 : codec->plfreq = 0;
1360 0 : return -1;
1361 : }
1362 : }
1363 :
1364 0 : int AudioCodingModule::Codec(const char* payload_name,
1365 : int sampling_freq_hz,
1366 : size_t channels) {
1367 : rtc::Optional<acm2::RentACodec::CodecId> ci =
1368 : acm2::RentACodec::CodecIdByParams(payload_name, sampling_freq_hz,
1369 0 : channels);
1370 0 : if (!ci)
1371 0 : return -1;
1372 0 : rtc::Optional<int> i = acm2::RentACodec::CodecIndexFromId(*ci);
1373 0 : return i ? *i : -1;
1374 : }
1375 :
1376 : // Checks the validity of the parameters of the given codec
1377 0 : bool AudioCodingModule::IsCodecValid(const CodecInst& codec) {
1378 0 : bool valid = acm2::RentACodec::IsCodecValid(codec);
1379 0 : if (!valid)
1380 : WEBRTC_TRACE(webrtc::kTraceError, webrtc::kTraceAudioCoding, -1,
1381 : "Invalid codec setting");
1382 0 : return valid;
1383 : }
1384 :
1385 : } // namespace webrtc
|