Line data Source code
1 : /*
2 : * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 : *
4 : * Use of this source code is governed by a BSD-style license
5 : * that can be found in the LICENSE file in the root of the source
6 : * tree. An additional intellectual property rights grant can be found
7 : * in the file PATENTS. All contributing project authors may
8 : * be found in the AUTHORS file in the root of the source tree.
9 : */
10 :
11 : #include "webrtc/modules/rtp_rtcp/source/rtp_receiver_audio.h"
12 :
13 : #include <assert.h> // assert
14 : #include <math.h> // pow()
15 : #include <string.h> // memcpy()
16 :
17 : #include "webrtc/common_types.h"
18 : #include "webrtc/base/logging.h"
19 : #include "webrtc/base/trace_event.h"
20 :
21 : namespace webrtc {
22 0 : RTPReceiverStrategy* RTPReceiverStrategy::CreateAudioStrategy(
23 : RtpData* data_callback) {
24 0 : return new RTPReceiverAudio(data_callback);
25 : }
26 :
27 0 : RTPReceiverAudio::RTPReceiverAudio(RtpData* data_callback)
28 : : RTPReceiverStrategy(data_callback),
29 : TelephoneEventHandler(),
30 : telephone_event_forward_to_decoder_(false),
31 : telephone_event_payload_type_(-1),
32 : cng_nb_payload_type_(-1),
33 : cng_wb_payload_type_(-1),
34 : cng_swb_payload_type_(-1),
35 : cng_fb_payload_type_(-1),
36 : num_energy_(0),
37 0 : current_remote_energy_() {
38 0 : last_payload_.Audio.channels = 1;
39 0 : memset(current_remote_energy_, 0, sizeof(current_remote_energy_));
40 0 : }
41 :
42 : // Outband TelephoneEvent(DTMF) detection
43 0 : void RTPReceiverAudio::SetTelephoneEventForwardToDecoder(
44 : bool forward_to_decoder) {
45 0 : rtc::CritScope lock(&crit_sect_);
46 0 : telephone_event_forward_to_decoder_ = forward_to_decoder;
47 0 : }
48 :
49 : // Is forwarding of outband telephone events turned on/off?
50 0 : bool RTPReceiverAudio::TelephoneEventForwardToDecoder() const {
51 0 : rtc::CritScope lock(&crit_sect_);
52 0 : return telephone_event_forward_to_decoder_;
53 : }
54 :
55 0 : bool RTPReceiverAudio::TelephoneEventPayloadType(
56 : int8_t payload_type) const {
57 0 : rtc::CritScope lock(&crit_sect_);
58 0 : return telephone_event_payload_type_ == payload_type;
59 : }
60 :
61 0 : bool RTPReceiverAudio::CNGPayloadType(int8_t payload_type) {
62 0 : rtc::CritScope lock(&crit_sect_);
63 0 : return payload_type == cng_nb_payload_type_ ||
64 0 : payload_type == cng_wb_payload_type_ ||
65 0 : payload_type == cng_swb_payload_type_ ||
66 0 : payload_type == cng_fb_payload_type_;
67 : }
68 :
69 0 : bool RTPReceiverAudio::ShouldReportCsrcChanges(uint8_t payload_type) const {
70 : // Don't do this for DTMF packets, otherwise it's fine.
71 0 : return !TelephoneEventPayloadType(payload_type);
72 : }
73 :
74 : // - Sample based or frame based codecs based on RFC 3551
75 : // -
76 : // - NOTE! There is one error in the RFC, stating G.722 uses 8 bits/samples.
77 : // - The correct rate is 4 bits/sample.
78 : // -
79 : // - name of sampling default
80 : // - encoding sample/frame bits/sample rate ms/frame ms/packet
81 : // -
82 : // - Sample based audio codecs
83 : // - DVI4 sample 4 var. 20
84 : // - G722 sample 4 16,000 20
85 : // - G726-40 sample 5 8,000 20
86 : // - G726-32 sample 4 8,000 20
87 : // - G726-24 sample 3 8,000 20
88 : // - G726-16 sample 2 8,000 20
89 : // - L8 sample 8 var. 20
90 : // - L16 sample 16 var. 20
91 : // - PCMA sample 8 var. 20
92 : // - PCMU sample 8 var. 20
93 : // -
94 : // - Frame based audio codecs
95 : // - G723 frame N/A 8,000 30 30
96 : // - G728 frame N/A 8,000 2.5 20
97 : // - G729 frame N/A 8,000 10 20
98 : // - G729D frame N/A 8,000 10 20
99 : // - G729E frame N/A 8,000 10 20
100 : // - GSM frame N/A 8,000 20 20
101 : // - GSM-EFR frame N/A 8,000 20 20
102 : // - LPC frame N/A 8,000 20 20
103 : // - MPA frame N/A var. var.
104 : // -
105 : // - G7221 frame N/A
106 0 : int32_t RTPReceiverAudio::OnNewPayloadTypeCreated(
107 : const CodecInst& audio_codec) {
108 0 : rtc::CritScope lock(&crit_sect_);
109 :
110 0 : if (RtpUtility::StringCompare(audio_codec.plname, "telephone-event", 15)) {
111 0 : telephone_event_payload_type_ = audio_codec.pltype;
112 : }
113 0 : if (RtpUtility::StringCompare(audio_codec.plname, "cn", 2)) {
114 : // We support comfort noise at four different frequencies.
115 0 : if (audio_codec.plfreq == 8000) {
116 0 : cng_nb_payload_type_ = audio_codec.pltype;
117 0 : } else if (audio_codec.plfreq == 16000) {
118 0 : cng_wb_payload_type_ = audio_codec.pltype;
119 0 : } else if (audio_codec.plfreq == 32000) {
120 0 : cng_swb_payload_type_ = audio_codec.pltype;
121 0 : } else if (audio_codec.plfreq == 48000) {
122 0 : cng_fb_payload_type_ = audio_codec.pltype;
123 : } else {
124 0 : assert(false);
125 : return -1;
126 : }
127 : }
128 0 : return 0;
129 : }
130 :
131 0 : int32_t RTPReceiverAudio::ParseRtpPacket(WebRtcRTPHeader* rtp_header,
132 : const PayloadUnion& specific_payload,
133 : bool is_red,
134 : const uint8_t* payload,
135 : size_t payload_length,
136 : int64_t timestamp_ms,
137 : bool is_first_packet) {
138 0 : TRACE_EVENT2(TRACE_DISABLED_BY_DEFAULT("webrtc_rtp"), "Audio::ParseRtp",
139 : "seqnum", rtp_header->header.sequenceNumber, "timestamp",
140 : rtp_header->header.timestamp);
141 0 : rtp_header->type.Audio.numEnergy = rtp_header->header.numCSRCs;
142 0 : num_energy_ = rtp_header->type.Audio.numEnergy;
143 0 : if (rtp_header->type.Audio.numEnergy > 0 &&
144 0 : rtp_header->type.Audio.numEnergy <= kRtpCsrcSize) {
145 0 : memcpy(current_remote_energy_,
146 : rtp_header->type.Audio.arrOfEnergy,
147 0 : rtp_header->type.Audio.numEnergy);
148 : }
149 :
150 0 : if (first_packet_received_()) {
151 0 : LOG(LS_INFO) << "Received first audio RTP packet";
152 : }
153 :
154 0 : return ParseAudioCodecSpecific(rtp_header,
155 : payload,
156 : payload_length,
157 : specific_payload.Audio,
158 0 : is_red);
159 : }
160 :
161 0 : RTPAliveType RTPReceiverAudio::ProcessDeadOrAlive(
162 : uint16_t last_payload_length) const {
163 :
164 : // Our CNG is 9 bytes; if it's a likely CNG the receiver needs to check
165 : // kRtpNoRtp against NetEq speech_type kOutputPLCtoCNG.
166 0 : if (last_payload_length < 10) { // our CNG is 9 bytes
167 0 : return kRtpNoRtp;
168 : } else {
169 0 : return kRtpDead;
170 : }
171 : }
172 :
173 0 : void RTPReceiverAudio::CheckPayloadChanged(int8_t payload_type,
174 : PayloadUnion* /* specific_payload */,
175 : bool* should_discard_changes) {
176 0 : *should_discard_changes =
177 0 : TelephoneEventPayloadType(payload_type) || CNGPayloadType(payload_type);
178 0 : }
179 :
180 0 : int RTPReceiverAudio::Energy(uint8_t array_of_energy[kRtpCsrcSize]) const {
181 0 : rtc::CritScope cs(&crit_sect_);
182 :
183 0 : assert(num_energy_ <= kRtpCsrcSize);
184 :
185 0 : if (num_energy_ > 0) {
186 0 : memcpy(array_of_energy, current_remote_energy_,
187 0 : sizeof(uint8_t) * num_energy_);
188 : }
189 0 : return num_energy_;
190 : }
191 :
192 0 : int32_t RTPReceiverAudio::InvokeOnInitializeDecoder(
193 : RtpFeedback* callback,
194 : int8_t payload_type,
195 : const char payload_name[RTP_PAYLOAD_NAME_SIZE],
196 : const PayloadUnion& specific_payload) const {
197 0 : if (-1 ==
198 0 : callback->OnInitializeDecoder(
199 0 : payload_type, payload_name, specific_payload.Audio.frequency,
200 0 : specific_payload.Audio.channels, specific_payload.Audio.rate)) {
201 0 : LOG(LS_ERROR) << "Failed to create decoder for payload type: "
202 0 : << payload_name << "/" << static_cast<int>(payload_type);
203 0 : return -1;
204 : }
205 0 : return 0;
206 : }
207 :
208 : // We are not allowed to have any critsects when calling data_callback.
209 0 : int32_t RTPReceiverAudio::ParseAudioCodecSpecific(
210 : WebRtcRTPHeader* rtp_header,
211 : const uint8_t* payload_data,
212 : size_t payload_length,
213 : const AudioPayload& audio_specific,
214 : bool is_red) {
215 :
216 0 : if (payload_length == 0) {
217 0 : return 0;
218 : }
219 :
220 : bool telephone_event_packet =
221 0 : TelephoneEventPayloadType(rtp_header->header.payloadType);
222 0 : if (telephone_event_packet) {
223 0 : rtc::CritScope lock(&crit_sect_);
224 :
225 : // RFC 4733 2.3
226 : // 0 1 2 3
227 : // 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
228 : // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
229 : // | event |E|R| volume | duration |
230 : // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
231 : //
232 0 : if (payload_length % 4 != 0) {
233 0 : return -1;
234 : }
235 0 : size_t number_of_events = payload_length / 4;
236 :
237 : // sanity
238 0 : if (number_of_events >= MAX_NUMBER_OF_PARALLEL_TELEPHONE_EVENTS) {
239 0 : number_of_events = MAX_NUMBER_OF_PARALLEL_TELEPHONE_EVENTS;
240 : }
241 0 : for (size_t n = 0; n < number_of_events; ++n) {
242 0 : bool end = (payload_data[(4 * n) + 1] & 0x80) ? true : false;
243 :
244 : std::set<uint8_t>::iterator event =
245 0 : telephone_event_reported_.find(payload_data[4 * n]);
246 :
247 0 : if (event != telephone_event_reported_.end()) {
248 : // we have already seen this event
249 0 : if (end) {
250 0 : telephone_event_reported_.erase(payload_data[4 * n]);
251 : }
252 : } else {
253 0 : if (end) {
254 : // don't add if it's a end of a tone
255 : } else {
256 0 : telephone_event_reported_.insert(payload_data[4 * n]);
257 : }
258 : }
259 : }
260 :
261 : // RFC 4733 2.5.1.3 & 2.5.2.3 Long-Duration Events
262 : // should not be a problem since we don't care about the duration
263 :
264 : // RFC 4733 See 2.5.1.5. & 2.5.2.4. Multiple Events in a Packet
265 : }
266 :
267 : {
268 0 : rtc::CritScope lock(&crit_sect_);
269 :
270 : // Check if this is a CNG packet, receiver might want to know
271 0 : if (CNGPayloadType(rtp_header->header.payloadType)) {
272 0 : rtp_header->type.Audio.isCNG = true;
273 0 : rtp_header->frameType = kAudioFrameCN;
274 : } else {
275 0 : rtp_header->frameType = kAudioFrameSpeech;
276 0 : rtp_header->type.Audio.isCNG = false;
277 : }
278 :
279 : // check if it's a DTMF event, hence something we can playout
280 0 : if (telephone_event_packet) {
281 0 : if (!telephone_event_forward_to_decoder_) {
282 : // don't forward event to decoder
283 0 : return 0;
284 : }
285 : std::set<uint8_t>::iterator first =
286 0 : telephone_event_reported_.begin();
287 0 : if (first != telephone_event_reported_.end() && *first > 15) {
288 : // don't forward non DTMF events
289 0 : return 0;
290 : }
291 : }
292 : }
293 : // TODO(holmer): Break this out to have RED parsing handled generically.
294 0 : if (is_red && !(payload_data[0] & 0x80)) {
295 : // we recive only one frame packed in a RED packet remove the RED wrapper
296 0 : rtp_header->header.payloadType = payload_data[0];
297 :
298 : // only one frame in the RED strip the one byte to help NetEq
299 0 : return data_callback_->OnReceivedPayloadData(
300 0 : payload_data + 1, payload_length - 1, rtp_header);
301 : }
302 :
303 0 : rtp_header->type.Audio.channel = audio_specific.channels;
304 0 : return data_callback_->OnReceivedPayloadData(
305 0 : payload_data, payload_length, rtp_header);
306 : }
307 : } // namespace webrtc
|