Line data Source code
1 : /*
2 : * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
3 : *
4 : * Use of this source code is governed by a BSD-style license
5 : * that can be found in the LICENSE file in the root of the source
6 : * tree. An additional intellectual property rights grant can be found
7 : * in the file PATENTS. All contributing project authors may
8 : * be found in the AUTHORS file in the root of the source tree.
9 : */
10 :
11 : #include "webrtc/modules/video_coding/decoding_state.h"
12 :
13 : #include "webrtc/base/logging.h"
14 : #include "webrtc/common_video/h264/h264_common.h"
15 : #include "webrtc/modules/include/module_common_types.h"
16 : #include "webrtc/modules/video_coding/frame_buffer.h"
17 : #include "webrtc/modules/video_coding/jitter_buffer_common.h"
18 : #include "webrtc/modules/video_coding/packet.h"
19 :
20 : namespace webrtc {
21 :
22 0 : VCMDecodingState::VCMDecodingState()
23 : : sequence_num_(0),
24 : time_stamp_(0),
25 : picture_id_(kNoPictureId),
26 : temporal_id_(kNoTemporalIdx),
27 : tl0_pic_id_(kNoTl0PicIdx),
28 : full_sync_(true),
29 0 : in_initial_state_(true) {
30 0 : memset(frame_decoded_, 0, sizeof(frame_decoded_));
31 0 : }
32 :
33 0 : VCMDecodingState::~VCMDecodingState() {}
34 :
35 0 : void VCMDecodingState::Reset() {
36 : // TODO(mikhal): Verify - not always would want to reset the sync
37 0 : sequence_num_ = 0;
38 0 : time_stamp_ = 0;
39 0 : picture_id_ = kNoPictureId;
40 0 : temporal_id_ = kNoTemporalIdx;
41 0 : tl0_pic_id_ = kNoTl0PicIdx;
42 0 : full_sync_ = true;
43 0 : in_initial_state_ = true;
44 0 : memset(frame_decoded_, 0, sizeof(frame_decoded_));
45 0 : received_sps_.clear();
46 0 : received_pps_.clear();
47 0 : }
48 :
49 0 : uint32_t VCMDecodingState::time_stamp() const {
50 0 : return time_stamp_;
51 : }
52 :
53 0 : uint16_t VCMDecodingState::sequence_num() const {
54 0 : return sequence_num_;
55 : }
56 :
57 0 : bool VCMDecodingState::IsOldFrame(const VCMFrameBuffer* frame) const {
58 0 : assert(frame != NULL);
59 0 : if (in_initial_state_)
60 0 : return false;
61 0 : return !IsNewerOrSameTimestamp(frame->TimeStamp(), time_stamp_);
62 : }
63 :
64 0 : bool VCMDecodingState::IsOldPacket(const VCMPacket* packet) const {
65 0 : assert(packet != NULL);
66 0 : if (in_initial_state_)
67 0 : return false;
68 0 : return !IsNewerOrSameTimestamp(packet->timestamp, time_stamp_);
69 : }
70 :
71 0 : void VCMDecodingState::SetState(const VCMFrameBuffer* frame) {
72 0 : assert(frame != NULL && frame->GetHighSeqNum() >= 0);
73 0 : if (!UsingFlexibleMode(frame))
74 0 : UpdateSyncState(frame);
75 0 : sequence_num_ = static_cast<uint16_t>(frame->GetHighSeqNum());
76 0 : time_stamp_ = frame->TimeStamp();
77 0 : picture_id_ = frame->PictureId();
78 0 : temporal_id_ = frame->TemporalId();
79 0 : tl0_pic_id_ = frame->Tl0PicId();
80 :
81 0 : for (const NaluInfo& nalu : frame->GetNaluInfos()) {
82 0 : if (nalu.type == H264::NaluType::kPps) {
83 0 : if (nalu.pps_id < 0) {
84 0 : LOG(LS_WARNING) << "Received pps without pps id.";
85 0 : } else if (nalu.sps_id < 0) {
86 0 : LOG(LS_WARNING) << "Received pps without sps id.";
87 : } else {
88 0 : received_pps_[nalu.pps_id] = nalu.sps_id;
89 : }
90 0 : } else if (nalu.type == H264::NaluType::kSps) {
91 0 : if (nalu.sps_id < 0) {
92 0 : LOG(LS_WARNING) << "Received sps without sps id.";
93 : } else {
94 0 : received_sps_.insert(nalu.sps_id);
95 : }
96 : }
97 : }
98 :
99 0 : if (UsingFlexibleMode(frame)) {
100 0 : uint16_t frame_index = picture_id_ % kFrameDecodedLength;
101 0 : if (in_initial_state_) {
102 0 : frame_decoded_cleared_to_ = frame_index;
103 0 : } else if (frame->FrameType() == kVideoFrameKey) {
104 0 : memset(frame_decoded_, 0, sizeof(frame_decoded_));
105 0 : frame_decoded_cleared_to_ = frame_index;
106 : } else {
107 0 : if (AheadOfFramesDecodedClearedTo(frame_index)) {
108 0 : while (frame_decoded_cleared_to_ != frame_index) {
109 0 : frame_decoded_cleared_to_ =
110 0 : (frame_decoded_cleared_to_ + 1) % kFrameDecodedLength;
111 0 : frame_decoded_[frame_decoded_cleared_to_] = false;
112 : }
113 : }
114 : }
115 0 : frame_decoded_[frame_index] = true;
116 : }
117 :
118 0 : in_initial_state_ = false;
119 0 : }
120 :
121 0 : void VCMDecodingState::CopyFrom(const VCMDecodingState& state) {
122 0 : sequence_num_ = state.sequence_num_;
123 0 : time_stamp_ = state.time_stamp_;
124 0 : picture_id_ = state.picture_id_;
125 0 : temporal_id_ = state.temporal_id_;
126 0 : tl0_pic_id_ = state.tl0_pic_id_;
127 0 : full_sync_ = state.full_sync_;
128 0 : in_initial_state_ = state.in_initial_state_;
129 0 : frame_decoded_cleared_to_ = state.frame_decoded_cleared_to_;
130 0 : memcpy(frame_decoded_, state.frame_decoded_, sizeof(frame_decoded_));
131 0 : received_sps_ = state.received_sps_;
132 0 : received_pps_ = state.received_pps_;
133 0 : }
134 :
135 0 : bool VCMDecodingState::UpdateEmptyFrame(const VCMFrameBuffer* frame) {
136 0 : bool empty_packet = frame->GetHighSeqNum() == frame->GetLowSeqNum();
137 0 : if (in_initial_state_ && empty_packet) {
138 : // Drop empty packets as long as we are in the initial state.
139 0 : return true;
140 : }
141 0 : if ((empty_packet && ContinuousSeqNum(frame->GetHighSeqNum())) ||
142 0 : ContinuousFrame(frame)) {
143 : // Continuous empty packets or continuous frames can be dropped if we
144 : // advance the sequence number.
145 0 : sequence_num_ = frame->GetHighSeqNum();
146 0 : time_stamp_ = frame->TimeStamp();
147 0 : return true;
148 : }
149 0 : return false;
150 : }
151 :
152 0 : void VCMDecodingState::UpdateOldPacket(const VCMPacket* packet) {
153 0 : assert(packet != NULL);
154 0 : if (packet->timestamp == time_stamp_) {
155 : // Late packet belonging to the last decoded frame - make sure we update the
156 : // last decoded sequence number.
157 0 : sequence_num_ = LatestSequenceNumber(packet->seqNum, sequence_num_);
158 : }
159 0 : }
160 :
161 0 : void VCMDecodingState::SetSeqNum(uint16_t new_seq_num) {
162 0 : sequence_num_ = new_seq_num;
163 0 : }
164 :
165 0 : bool VCMDecodingState::in_initial_state() const {
166 0 : return in_initial_state_;
167 : }
168 :
169 0 : bool VCMDecodingState::full_sync() const {
170 0 : return full_sync_;
171 : }
172 :
173 0 : void VCMDecodingState::UpdateSyncState(const VCMFrameBuffer* frame) {
174 0 : if (in_initial_state_)
175 0 : return;
176 0 : if (frame->TemporalId() == kNoTemporalIdx ||
177 0 : frame->Tl0PicId() == kNoTl0PicIdx) {
178 0 : full_sync_ = true;
179 0 : } else if (frame->FrameType() == kVideoFrameKey || frame->LayerSync()) {
180 0 : full_sync_ = true;
181 0 : } else if (full_sync_) {
182 : // Verify that we are still in sync.
183 : // Sync will be broken if continuity is true for layers but not for the
184 : // other methods (PictureId and SeqNum).
185 0 : if (UsingPictureId(frame)) {
186 : // First check for a valid tl0PicId.
187 0 : if (frame->Tl0PicId() - tl0_pic_id_ > 1) {
188 0 : full_sync_ = false;
189 : } else {
190 0 : full_sync_ = ContinuousPictureId(frame->PictureId());
191 : }
192 : } else {
193 0 : full_sync_ =
194 0 : ContinuousSeqNum(static_cast<uint16_t>(frame->GetLowSeqNum()));
195 : }
196 : }
197 : }
198 :
199 0 : bool VCMDecodingState::ContinuousFrame(const VCMFrameBuffer* frame) const {
200 : // Check continuity based on the following hierarchy:
201 : // - Temporal layers (stop here if out of sync).
202 : // - Picture Id when available.
203 : // - Sequence numbers.
204 : // Return true when in initial state.
205 : // Note that when a method is not applicable it will return false.
206 0 : assert(frame != NULL);
207 : // A key frame is always considered continuous as it doesn't refer to any
208 : // frames and therefore won't introduce any errors even if prior frames are
209 : // missing.
210 0 : if (frame->FrameType() == kVideoFrameKey &&
211 0 : HaveSpsAndPps(frame->GetNaluInfos())) {
212 0 : return true;
213 : }
214 : // When in the initial state we always require a key frame to start decoding.
215 0 : if (in_initial_state_)
216 0 : return false;
217 0 : if (ContinuousLayer(frame->TemporalId(), frame->Tl0PicId()))
218 0 : return true;
219 : // tl0picId is either not used, or should remain unchanged.
220 0 : if (frame->Tl0PicId() != tl0_pic_id_)
221 0 : return false;
222 : // Base layers are not continuous or temporal layers are inactive.
223 : // In the presence of temporal layers, check for Picture ID/sequence number
224 : // continuity if sync can be restored by this frame.
225 0 : if (!full_sync_ && !frame->LayerSync())
226 0 : return false;
227 0 : if (UsingPictureId(frame)) {
228 0 : if (UsingFlexibleMode(frame)) {
229 0 : return ContinuousFrameRefs(frame);
230 : } else {
231 0 : return ContinuousPictureId(frame->PictureId());
232 : }
233 : } else {
234 0 : return ContinuousSeqNum(static_cast<uint16_t>(frame->GetLowSeqNum())) &&
235 0 : HaveSpsAndPps(frame->GetNaluInfos());
236 : }
237 : }
238 :
239 0 : bool VCMDecodingState::ContinuousPictureId(int picture_id) const {
240 0 : int next_picture_id = picture_id_ + 1;
241 0 : if (picture_id < picture_id_) {
242 : // Wrap
243 0 : if (picture_id_ >= 0x80) {
244 : // 15 bits used for picture id
245 0 : return ((next_picture_id & 0x7FFF) == picture_id);
246 : } else {
247 : // 7 bits used for picture id
248 0 : return ((next_picture_id & 0x7F) == picture_id);
249 : }
250 : }
251 : // No wrap
252 0 : return (next_picture_id == picture_id);
253 : }
254 :
255 0 : bool VCMDecodingState::ContinuousSeqNum(uint16_t seq_num) const {
256 0 : return seq_num == static_cast<uint16_t>(sequence_num_ + 1);
257 : }
258 :
259 0 : bool VCMDecodingState::ContinuousLayer(int temporal_id, int tl0_pic_id) const {
260 : // First, check if applicable.
261 0 : if (temporal_id == kNoTemporalIdx || tl0_pic_id == kNoTl0PicIdx)
262 0 : return false;
263 : // If this is the first frame to use temporal layers, make sure we start
264 : // from base.
265 0 : else if (tl0_pic_id_ == kNoTl0PicIdx && temporal_id_ == kNoTemporalIdx &&
266 : temporal_id == 0)
267 0 : return true;
268 :
269 : // Current implementation: Look for base layer continuity.
270 0 : if (temporal_id != 0)
271 0 : return false;
272 0 : return (static_cast<uint8_t>(tl0_pic_id_ + 1) == tl0_pic_id);
273 : }
274 :
275 0 : bool VCMDecodingState::ContinuousFrameRefs(const VCMFrameBuffer* frame) const {
276 0 : uint8_t num_refs = frame->CodecSpecific()->codecSpecific.VP9.num_ref_pics;
277 0 : for (uint8_t r = 0; r < num_refs; ++r) {
278 0 : uint16_t frame_ref = frame->PictureId() -
279 0 : frame->CodecSpecific()->codecSpecific.VP9.p_diff[r];
280 0 : uint16_t frame_index = frame_ref % kFrameDecodedLength;
281 0 : if (AheadOfFramesDecodedClearedTo(frame_index) ||
282 0 : !frame_decoded_[frame_index]) {
283 0 : return false;
284 : }
285 : }
286 0 : return true;
287 : }
288 :
289 0 : bool VCMDecodingState::UsingPictureId(const VCMFrameBuffer* frame) const {
290 0 : return (frame->PictureId() != kNoPictureId && picture_id_ != kNoPictureId);
291 : }
292 :
293 0 : bool VCMDecodingState::UsingFlexibleMode(const VCMFrameBuffer* frame) const {
294 : bool is_flexible_mode =
295 0 : frame->CodecSpecific()->codecType == kVideoCodecVP9 &&
296 0 : frame->CodecSpecific()->codecSpecific.VP9.flexible_mode;
297 0 : if (is_flexible_mode && frame->PictureId() == kNoPictureId) {
298 0 : LOG(LS_WARNING) << "Frame is marked as using flexible mode but no"
299 0 : << "picture id is set.";
300 0 : return false;
301 : }
302 0 : return is_flexible_mode;
303 : }
304 :
305 : // TODO(philipel): change how check work, this check practially
306 : // limits the max p_diff to 64.
307 0 : bool VCMDecodingState::AheadOfFramesDecodedClearedTo(uint16_t index) const {
308 : // No way of knowing for sure if we are actually ahead of
309 : // frame_decoded_cleared_to_. We just make the assumption
310 : // that we are not trying to reference back to a very old
311 : // index, but instead are referencing a newer index.
312 : uint16_t diff =
313 0 : index > frame_decoded_cleared_to_
314 0 : ? kFrameDecodedLength - (index - frame_decoded_cleared_to_)
315 0 : : frame_decoded_cleared_to_ - index;
316 0 : return diff > kFrameDecodedLength / 2;
317 : }
318 :
319 0 : bool VCMDecodingState::HaveSpsAndPps(const std::vector<NaluInfo>& nalus) const {
320 0 : std::set<int> new_sps;
321 0 : std::map<int, int> new_pps;
322 0 : for (const NaluInfo& nalu : nalus) {
323 : // Check if this nalu actually contains sps/pps information or dependencies.
324 0 : if (nalu.sps_id == -1 && nalu.pps_id == -1)
325 0 : continue;
326 0 : switch (nalu.type) {
327 : case H264::NaluType::kPps:
328 0 : if (nalu.pps_id < 0) {
329 0 : LOG(LS_WARNING) << "Received pps without pps id.";
330 0 : } else if (nalu.sps_id < 0) {
331 0 : LOG(LS_WARNING) << "Received pps without sps id.";
332 : } else {
333 0 : new_pps[nalu.pps_id] = nalu.sps_id;
334 : }
335 0 : break;
336 : case H264::NaluType::kSps:
337 0 : if (nalu.sps_id < 0) {
338 0 : LOG(LS_WARNING) << "Received sps without sps id.";
339 : } else {
340 0 : new_sps.insert(nalu.sps_id);
341 : }
342 0 : break;
343 : default: {
344 0 : int needed_sps = -1;
345 0 : auto pps_it = new_pps.find(nalu.pps_id);
346 0 : if (pps_it != new_pps.end()) {
347 0 : needed_sps = pps_it->second;
348 : } else {
349 0 : auto pps_it2 = received_pps_.find(nalu.pps_id);
350 0 : if (pps_it2 == received_pps_.end()) {
351 0 : return false;
352 : }
353 0 : needed_sps = pps_it2->second;
354 : }
355 0 : if (new_sps.find(needed_sps) == new_sps.end() &&
356 0 : received_sps_.find(needed_sps) == received_sps_.end()) {
357 0 : return false;
358 : }
359 0 : break;
360 : }
361 : }
362 : }
363 0 : return true;
364 : }
365 :
366 : } // namespace webrtc
|