Line data Source code
1 : /*
2 : * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 : *
4 : * Use of this source code is governed by a BSD-style license
5 : * that can be found in the LICENSE file in the root of the source
6 : * tree. An additional intellectual property rights grant can be found
7 : * in the file PATENTS. All contributing project authors may
8 : * be found in the AUTHORS file in the root of the source tree.
9 : */
10 :
11 : #ifndef WEBRTC_MODULES_INCLUDE_MODULE_COMMON_TYPES_H_
12 : #define WEBRTC_MODULES_INCLUDE_MODULE_COMMON_TYPES_H_
13 :
14 : #include <assert.h>
15 : #include <string.h> // memcpy
16 :
17 : #include <algorithm>
18 : #include <limits>
19 :
20 : #include "webrtc/api/video/video_rotation.h"
21 : #include "webrtc/base/constructormagic.h"
22 : #include "webrtc/base/deprecation.h"
23 : #include "webrtc/base/safe_conversions.h"
24 : #include "webrtc/common_types.h"
25 : #include "webrtc/modules/video_coding/codecs/vp8/include/vp8_globals.h"
26 : #include "webrtc/modules/video_coding/codecs/vp9/include/vp9_globals.h"
27 : #include "webrtc/modules/video_coding/codecs/h264/include/h264_globals.h"
28 : #include "webrtc/typedefs.h"
29 :
30 : namespace webrtc {
31 :
32 : struct RTPAudioHeader {
33 : uint8_t numEnergy; // number of valid entries in arrOfEnergy
34 : uint8_t arrOfEnergy[kRtpCsrcSize]; // one energy byte (0-9) per channel
35 : bool isCNG; // is this CNG
36 : size_t channel; // number of channels 2 = stereo
37 : };
38 :
39 : union RTPVideoTypeHeader {
40 : RTPVideoHeaderVP8 VP8;
41 : RTPVideoHeaderVP9 VP9;
42 : RTPVideoHeaderH264 H264;
43 : };
44 :
45 : enum RtpVideoCodecTypes {
46 : kRtpVideoNone,
47 : kRtpVideoGeneric,
48 : kRtpVideoVp8,
49 : kRtpVideoVp9,
50 : kRtpVideoH264
51 : };
52 : // Since RTPVideoHeader is used as a member of a union, it can't have a
53 : // non-trivial default constructor.
54 : struct RTPVideoHeader {
55 : uint16_t width; // size
56 : uint16_t height;
57 : VideoRotation rotation;
58 :
59 : PlayoutDelay playout_delay;
60 :
61 : union {
62 : bool is_first_packet_in_frame;
63 : RTC_DEPRECATED bool isFirstPacket; // first packet in frame
64 : };
65 : uint8_t simulcastIdx; // Index if the simulcast encoder creating
66 : // this frame, 0 if not using simulcast.
67 : RtpVideoCodecTypes codec;
68 : RTPVideoTypeHeader codecHeader;
69 : };
70 : union RTPTypeHeader {
71 : RTPAudioHeader Audio;
72 : RTPVideoHeader Video;
73 : };
74 :
75 0 : struct WebRtcRTPHeader {
76 : RTPHeader header;
77 : FrameType frameType;
78 : RTPTypeHeader type;
79 : // NTP time of the capture time in local timebase in milliseconds.
80 : int64_t ntp_time_ms;
81 : };
82 :
83 : class RTPFragmentationHeader {
84 : public:
85 0 : RTPFragmentationHeader()
86 0 : : fragmentationVectorSize(0),
87 : fragmentationOffset(NULL),
88 : fragmentationLength(NULL),
89 : fragmentationTimeDiff(NULL),
90 0 : fragmentationPlType(NULL) {};
91 :
92 0 : ~RTPFragmentationHeader() {
93 0 : delete[] fragmentationOffset;
94 0 : delete[] fragmentationLength;
95 0 : delete[] fragmentationTimeDiff;
96 0 : delete[] fragmentationPlType;
97 0 : }
98 :
99 0 : void CopyFrom(const RTPFragmentationHeader& src) {
100 0 : if (this == &src) {
101 0 : return;
102 : }
103 :
104 0 : if (src.fragmentationVectorSize != fragmentationVectorSize) {
105 : // new size of vectors
106 :
107 : // delete old
108 0 : delete[] fragmentationOffset;
109 0 : fragmentationOffset = NULL;
110 0 : delete[] fragmentationLength;
111 0 : fragmentationLength = NULL;
112 0 : delete[] fragmentationTimeDiff;
113 0 : fragmentationTimeDiff = NULL;
114 0 : delete[] fragmentationPlType;
115 0 : fragmentationPlType = NULL;
116 :
117 0 : if (src.fragmentationVectorSize > 0) {
118 : // allocate new
119 0 : if (src.fragmentationOffset) {
120 0 : fragmentationOffset = new size_t[src.fragmentationVectorSize];
121 : }
122 0 : if (src.fragmentationLength) {
123 0 : fragmentationLength = new size_t[src.fragmentationVectorSize];
124 : }
125 0 : if (src.fragmentationTimeDiff) {
126 0 : fragmentationTimeDiff = new uint16_t[src.fragmentationVectorSize];
127 : }
128 0 : if (src.fragmentationPlType) {
129 0 : fragmentationPlType = new uint8_t[src.fragmentationVectorSize];
130 : }
131 : }
132 : // set new size
133 0 : fragmentationVectorSize = src.fragmentationVectorSize;
134 : }
135 :
136 0 : if (src.fragmentationVectorSize > 0) {
137 : // copy values
138 0 : if (src.fragmentationOffset) {
139 0 : memcpy(fragmentationOffset, src.fragmentationOffset,
140 0 : src.fragmentationVectorSize * sizeof(size_t));
141 : }
142 0 : if (src.fragmentationLength) {
143 0 : memcpy(fragmentationLength, src.fragmentationLength,
144 0 : src.fragmentationVectorSize * sizeof(size_t));
145 : }
146 0 : if (src.fragmentationTimeDiff) {
147 0 : memcpy(fragmentationTimeDiff, src.fragmentationTimeDiff,
148 0 : src.fragmentationVectorSize * sizeof(uint16_t));
149 : }
150 0 : if (src.fragmentationPlType) {
151 0 : memcpy(fragmentationPlType, src.fragmentationPlType,
152 0 : src.fragmentationVectorSize * sizeof(uint8_t));
153 : }
154 : }
155 : }
156 :
157 0 : void VerifyAndAllocateFragmentationHeader(const size_t size) {
158 0 : assert(size <= std::numeric_limits<uint16_t>::max());
159 0 : const uint16_t size16 = static_cast<uint16_t>(size);
160 0 : if (fragmentationVectorSize < size16) {
161 0 : uint16_t oldVectorSize = fragmentationVectorSize;
162 : {
163 : // offset
164 0 : size_t* oldOffsets = fragmentationOffset;
165 0 : fragmentationOffset = new size_t[size16];
166 0 : memset(fragmentationOffset + oldVectorSize, 0,
167 0 : sizeof(size_t) * (size16 - oldVectorSize));
168 : // copy old values
169 0 : memcpy(fragmentationOffset, oldOffsets,
170 0 : sizeof(size_t) * oldVectorSize);
171 0 : delete[] oldOffsets;
172 : }
173 : // length
174 : {
175 0 : size_t* oldLengths = fragmentationLength;
176 0 : fragmentationLength = new size_t[size16];
177 0 : memset(fragmentationLength + oldVectorSize, 0,
178 0 : sizeof(size_t) * (size16 - oldVectorSize));
179 0 : memcpy(fragmentationLength, oldLengths,
180 0 : sizeof(size_t) * oldVectorSize);
181 0 : delete[] oldLengths;
182 : }
183 : // time diff
184 : {
185 0 : uint16_t* oldTimeDiffs = fragmentationTimeDiff;
186 0 : fragmentationTimeDiff = new uint16_t[size16];
187 0 : memset(fragmentationTimeDiff + oldVectorSize, 0,
188 0 : sizeof(uint16_t) * (size16 - oldVectorSize));
189 0 : memcpy(fragmentationTimeDiff, oldTimeDiffs,
190 0 : sizeof(uint16_t) * oldVectorSize);
191 0 : delete[] oldTimeDiffs;
192 : }
193 : // payload type
194 : {
195 0 : uint8_t* oldTimePlTypes = fragmentationPlType;
196 0 : fragmentationPlType = new uint8_t[size16];
197 0 : memset(fragmentationPlType + oldVectorSize, 0,
198 0 : sizeof(uint8_t) * (size16 - oldVectorSize));
199 0 : memcpy(fragmentationPlType, oldTimePlTypes,
200 0 : sizeof(uint8_t) * oldVectorSize);
201 0 : delete[] oldTimePlTypes;
202 : }
203 0 : fragmentationVectorSize = size16;
204 : }
205 0 : }
206 :
207 : uint16_t fragmentationVectorSize; // Number of fragmentations
208 : size_t* fragmentationOffset; // Offset of pointer to data for each
209 : // fragmentation
210 : size_t* fragmentationLength; // Data size for each fragmentation
211 : uint16_t* fragmentationTimeDiff; // Timestamp difference relative "now" for
212 : // each fragmentation
213 : uint8_t* fragmentationPlType; // Payload type of each fragmentation
214 :
215 : private:
216 : RTC_DISALLOW_COPY_AND_ASSIGN(RTPFragmentationHeader);
217 : };
218 :
219 : struct RTCPVoIPMetric {
220 : // RFC 3611 4.7
221 : uint8_t lossRate;
222 : uint8_t discardRate;
223 : uint8_t burstDensity;
224 : uint8_t gapDensity;
225 : uint16_t burstDuration;
226 : uint16_t gapDuration;
227 : uint16_t roundTripDelay;
228 : uint16_t endSystemDelay;
229 : uint8_t signalLevel;
230 : uint8_t noiseLevel;
231 : uint8_t RERL;
232 : uint8_t Gmin;
233 : uint8_t Rfactor;
234 : uint8_t extRfactor;
235 : uint8_t MOSLQ;
236 : uint8_t MOSCQ;
237 : uint8_t RXconfig;
238 : uint16_t JBnominal;
239 : uint16_t JBmax;
240 : uint16_t JBabsMax;
241 : };
242 :
243 : // Types for the FEC packet masks. The type |kFecMaskRandom| is based on a
244 : // random loss model. The type |kFecMaskBursty| is based on a bursty/consecutive
245 : // loss model. The packet masks are defined in
246 : // modules/rtp_rtcp/fec_private_tables_random(bursty).h
247 : enum FecMaskType {
248 : kFecMaskRandom,
249 : kFecMaskBursty,
250 : };
251 :
252 : // Struct containing forward error correction settings.
253 : struct FecProtectionParams {
254 : int fec_rate;
255 : int max_fec_frames;
256 : FecMaskType fec_mask_type;
257 : };
258 :
259 : // Interface used by the CallStats class to distribute call statistics.
260 : // Callbacks will be triggered as soon as the class has been registered to a
261 : // CallStats object using RegisterStatsObserver.
262 0 : class CallStatsObserver {
263 : public:
264 : virtual void OnRttUpdate(int64_t avg_rtt_ms, int64_t max_rtt_ms) = 0;
265 :
266 0 : virtual ~CallStatsObserver() {}
267 : };
268 :
269 : /* This class holds up to 60 ms of super-wideband (32 kHz) stereo audio. It
270 : * allows for adding and subtracting frames while keeping track of the resulting
271 : * states.
272 : *
273 : * Notes
274 : * - The total number of samples in |data_| is
275 : * samples_per_channel_ * num_channels_
276 : *
277 : * - Stereo data is interleaved starting with the left channel.
278 : *
279 : */
280 : class AudioFrame {
281 : public:
282 : // Stereo, 32 kHz, 60 ms (2 * 32 * 60)
283 : enum : size_t {
284 : kMaxDataSizeSamples = 3840
285 : };
286 :
287 : enum VADActivity {
288 : kVadActive = 0,
289 : kVadPassive = 1,
290 : kVadUnknown = 2
291 : };
292 : enum SpeechType {
293 : kNormalSpeech = 0,
294 : kPLC = 1,
295 : kCNG = 2,
296 : kPLCCNG = 3,
297 : kUndefined = 4
298 : };
299 :
300 : AudioFrame();
301 :
302 : // Resets all members to their default state (except does not modify the
303 : // contents of |data_|).
304 : void Reset();
305 :
306 : void UpdateFrame(int id, uint32_t timestamp, const int16_t* data,
307 : size_t samples_per_channel, int sample_rate_hz,
308 : SpeechType speech_type, VADActivity vad_activity,
309 : size_t num_channels = 1);
310 :
311 : void CopyFrom(const AudioFrame& src);
312 :
313 : // These methods are deprecated. Use the functions in
314 : // webrtc/audio/utility instead. These methods will exists for a
315 : // short period of time until webrtc clients have updated. See
316 : // webrtc:6548 for details.
317 : RTC_DEPRECATED void Mute();
318 : RTC_DEPRECATED AudioFrame& operator>>=(const int rhs);
319 : RTC_DEPRECATED AudioFrame& operator+=(const AudioFrame& rhs);
320 :
321 : int id_;
322 : // RTP timestamp of the first sample in the AudioFrame.
323 : uint32_t timestamp_ = 0;
324 : // Time since the first frame in milliseconds.
325 : // -1 represents an uninitialized value.
326 : int64_t elapsed_time_ms_ = -1;
327 : // NTP time of the estimated capture time in local timebase in milliseconds.
328 : // -1 represents an uninitialized value.
329 : int64_t ntp_time_ms_ = -1;
330 : int16_t data_[kMaxDataSizeSamples];
331 : size_t samples_per_channel_ = 0;
332 : int sample_rate_hz_ = 0;
333 : size_t num_channels_ = 0;
334 : SpeechType speech_type_ = kUndefined;
335 : VADActivity vad_activity_ = kVadUnknown;
336 :
337 : private:
338 : RTC_DISALLOW_COPY_AND_ASSIGN(AudioFrame);
339 : };
340 :
341 : // TODO(henrik.lundin) Can we remove the call to data_()?
342 : // See https://bugs.chromium.org/p/webrtc/issues/detail?id=5647.
343 0 : inline AudioFrame::AudioFrame()
344 0 : : data_() {
345 0 : }
346 :
347 0 : inline void AudioFrame::Reset() {
348 0 : id_ = -1;
349 : // TODO(wu): Zero is a valid value for |timestamp_|. We should initialize
350 : // to an invalid value, or add a new member to indicate invalidity.
351 0 : timestamp_ = 0;
352 0 : elapsed_time_ms_ = -1;
353 0 : ntp_time_ms_ = -1;
354 0 : samples_per_channel_ = 0;
355 0 : sample_rate_hz_ = 0;
356 0 : num_channels_ = 0;
357 0 : speech_type_ = kUndefined;
358 0 : vad_activity_ = kVadUnknown;
359 0 : }
360 :
361 0 : inline void AudioFrame::UpdateFrame(int id,
362 : uint32_t timestamp,
363 : const int16_t* data,
364 : size_t samples_per_channel,
365 : int sample_rate_hz,
366 : SpeechType speech_type,
367 : VADActivity vad_activity,
368 : size_t num_channels) {
369 0 : id_ = id;
370 0 : timestamp_ = timestamp;
371 0 : samples_per_channel_ = samples_per_channel;
372 0 : sample_rate_hz_ = sample_rate_hz;
373 0 : speech_type_ = speech_type;
374 0 : vad_activity_ = vad_activity;
375 0 : num_channels_ = num_channels;
376 :
377 0 : const size_t length = samples_per_channel * num_channels;
378 0 : assert(length <= kMaxDataSizeSamples);
379 0 : if (data != NULL) {
380 0 : memcpy(data_, data, sizeof(int16_t) * length);
381 : } else {
382 0 : memset(data_, 0, sizeof(int16_t) * length);
383 : }
384 0 : }
385 :
386 0 : inline void AudioFrame::CopyFrom(const AudioFrame& src) {
387 0 : if (this == &src) return;
388 :
389 0 : id_ = src.id_;
390 0 : timestamp_ = src.timestamp_;
391 0 : elapsed_time_ms_ = src.elapsed_time_ms_;
392 0 : ntp_time_ms_ = src.ntp_time_ms_;
393 0 : samples_per_channel_ = src.samples_per_channel_;
394 0 : sample_rate_hz_ = src.sample_rate_hz_;
395 0 : speech_type_ = src.speech_type_;
396 0 : vad_activity_ = src.vad_activity_;
397 0 : num_channels_ = src.num_channels_;
398 :
399 0 : const size_t length = samples_per_channel_ * num_channels_;
400 0 : assert(length <= kMaxDataSizeSamples);
401 0 : memcpy(data_, src.data_, sizeof(int16_t) * length);
402 : }
403 :
404 : inline void AudioFrame::Mute() {
405 : memset(data_, 0, samples_per_channel_ * num_channels_ * sizeof(int16_t));
406 : }
407 :
408 : inline AudioFrame& AudioFrame::operator>>=(const int rhs) {
409 : assert((num_channels_ > 0) && (num_channels_ < 3));
410 : if ((num_channels_ > 2) || (num_channels_ < 1)) return *this;
411 :
412 : for (size_t i = 0; i < samples_per_channel_ * num_channels_; i++) {
413 : data_[i] = static_cast<int16_t>(data_[i] >> rhs);
414 : }
415 : return *this;
416 : }
417 :
418 : inline AudioFrame& AudioFrame::operator+=(const AudioFrame& rhs) {
419 : // Sanity check
420 : assert((num_channels_ > 0) && (num_channels_ < 3));
421 : if ((num_channels_ > 2) || (num_channels_ < 1)) return *this;
422 : if (num_channels_ != rhs.num_channels_) return *this;
423 :
424 : bool noPrevData = false;
425 : if (samples_per_channel_ != rhs.samples_per_channel_) {
426 : if (samples_per_channel_ == 0) {
427 : // special case we have no data to start with
428 : samples_per_channel_ = rhs.samples_per_channel_;
429 : noPrevData = true;
430 : } else {
431 : return *this;
432 : }
433 : }
434 :
435 : if ((vad_activity_ == kVadActive) || rhs.vad_activity_ == kVadActive) {
436 : vad_activity_ = kVadActive;
437 : } else if (vad_activity_ == kVadUnknown || rhs.vad_activity_ == kVadUnknown) {
438 : vad_activity_ = kVadUnknown;
439 : }
440 :
441 : if (speech_type_ != rhs.speech_type_) speech_type_ = kUndefined;
442 :
443 : if (noPrevData) {
444 : memcpy(data_, rhs.data_,
445 : sizeof(int16_t) * rhs.samples_per_channel_ * num_channels_);
446 : } else {
447 : // IMPROVEMENT this can be done very fast in assembly
448 : for (size_t i = 0; i < samples_per_channel_ * num_channels_; i++) {
449 : int32_t wrap_guard =
450 : static_cast<int32_t>(data_[i]) + static_cast<int32_t>(rhs.data_[i]);
451 : data_[i] = rtc::saturated_cast<int16_t>(wrap_guard);
452 : }
453 : }
454 : return *this;
455 : }
456 :
457 0 : inline bool IsNewerSequenceNumber(uint16_t sequence_number,
458 : uint16_t prev_sequence_number) {
459 : // Distinguish between elements that are exactly 0x8000 apart.
460 : // If s1>s2 and |s1-s2| = 0x8000: IsNewer(s1,s2)=true, IsNewer(s2,s1)=false
461 : // rather than having IsNewer(s1,s2) = IsNewer(s2,s1) = false.
462 0 : if (static_cast<uint16_t>(sequence_number - prev_sequence_number) == 0x8000) {
463 0 : return sequence_number > prev_sequence_number;
464 : }
465 0 : return sequence_number != prev_sequence_number &&
466 0 : static_cast<uint16_t>(sequence_number - prev_sequence_number) < 0x8000;
467 : }
468 :
469 0 : inline bool IsNewerTimestamp(uint32_t timestamp, uint32_t prev_timestamp) {
470 : // Distinguish between elements that are exactly 0x80000000 apart.
471 : // If t1>t2 and |t1-t2| = 0x80000000: IsNewer(t1,t2)=true,
472 : // IsNewer(t2,t1)=false
473 : // rather than having IsNewer(t1,t2) = IsNewer(t2,t1) = false.
474 0 : if (static_cast<uint32_t>(timestamp - prev_timestamp) == 0x80000000) {
475 0 : return timestamp > prev_timestamp;
476 : }
477 0 : return timestamp != prev_timestamp &&
478 0 : static_cast<uint32_t>(timestamp - prev_timestamp) < 0x80000000;
479 : }
480 :
481 0 : inline bool IsNewerOrSameTimestamp(uint32_t timestamp, uint32_t prev_timestamp) {
482 0 : return timestamp == prev_timestamp ||
483 0 : static_cast<uint32_t>(timestamp - prev_timestamp) < 0x80000000;
484 : }
485 :
486 0 : inline uint16_t LatestSequenceNumber(uint16_t sequence_number1,
487 : uint16_t sequence_number2) {
488 0 : return IsNewerSequenceNumber(sequence_number1, sequence_number2)
489 0 : ? sequence_number1
490 0 : : sequence_number2;
491 : }
492 :
493 0 : inline uint32_t LatestTimestamp(uint32_t timestamp1, uint32_t timestamp2) {
494 0 : return IsNewerTimestamp(timestamp1, timestamp2) ? timestamp1 : timestamp2;
495 : }
496 :
497 : // Utility class to unwrap a sequence number to a larger type, for easier
498 : // handling large ranges. Note that sequence numbers will never be unwrapped
499 : // to a negative value.
500 : class SequenceNumberUnwrapper {
501 : public:
502 0 : SequenceNumberUnwrapper() : last_seq_(-1) {}
503 :
504 : // Get the unwrapped sequence, but don't update the internal state.
505 0 : int64_t UnwrapWithoutUpdate(uint16_t sequence_number) {
506 0 : if (last_seq_ == -1)
507 0 : return sequence_number;
508 :
509 0 : uint16_t cropped_last = static_cast<uint16_t>(last_seq_);
510 0 : int64_t delta = sequence_number - cropped_last;
511 0 : if (IsNewerSequenceNumber(sequence_number, cropped_last)) {
512 0 : if (delta < 0)
513 0 : delta += (1 << 16); // Wrap forwards.
514 0 : } else if (delta > 0 && (last_seq_ + delta - (1 << 16)) >= 0) {
515 : // If sequence_number is older but delta is positive, this is a backwards
516 : // wrap-around. However, don't wrap backwards past 0 (unwrapped).
517 0 : delta -= (1 << 16);
518 : }
519 :
520 0 : return last_seq_ + delta;
521 : }
522 :
523 : // Only update the internal state to the specified last (unwrapped) sequence.
524 0 : void UpdateLast(int64_t last_sequence) { last_seq_ = last_sequence; }
525 :
526 : // Unwrap the sequence number and update the internal state.
527 0 : int64_t Unwrap(uint16_t sequence_number) {
528 0 : int64_t unwrapped = UnwrapWithoutUpdate(sequence_number);
529 0 : UpdateLast(unwrapped);
530 0 : return unwrapped;
531 : }
532 :
533 : private:
534 : int64_t last_seq_;
535 : };
536 :
537 : } // namespace webrtc
538 :
539 : #endif // WEBRTC_MODULES_INCLUDE_MODULE_COMMON_TYPES_H_
|