Line data Source code
1 : /*
2 : * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
3 : *
4 : * Use of this source code is governed by a BSD-style license
5 : * that can be found in the LICENSE file in the root of the source
6 : * tree. An additional intellectual property rights grant can be found
7 : * in the file PATENTS. All contributing project authors may
8 : * be found in the AUTHORS file in the root of the source tree.
9 : */
10 :
11 : #include "webrtc/video/overuse_frame_detector.h"
12 :
13 : #include <assert.h>
14 : #include <math.h>
15 :
16 : #include <algorithm>
17 : #include <list>
18 : #include <map>
19 :
20 : #include "webrtc/api/video/video_frame.h"
21 : #include "webrtc/base/checks.h"
22 : #include "webrtc/base/logging.h"
23 : #include "webrtc/base/numerics/exp_filter.h"
24 : #include "webrtc/common_video/include/frame_callback.h"
25 : #include "webrtc/system_wrappers/include/clock.h"
26 :
27 : #if defined(WEBRTC_MAC) && !defined(WEBRTC_IOS)
28 : #include <mach/mach.h>
29 : #endif // defined(WEBRTC_MAC) && !defined(WEBRTC_IOS)
30 :
31 : namespace webrtc {
32 :
33 : namespace {
34 : const int64_t kCheckForOveruseIntervalMs = 5000;
35 : const int64_t kTimeToFirstCheckForOveruseMs = 100;
36 :
37 : // Delay between consecutive rampups. (Used for quick recovery.)
38 : const int kQuickRampUpDelayMs = 10 * 1000;
39 : // Delay between rampup attempts. Initially uses standard, scales up to max.
40 : const int kStandardRampUpDelayMs = 40 * 1000;
41 : const int kMaxRampUpDelayMs = 240 * 1000;
42 : // Expontential back-off factor, to prevent annoying up-down behaviour.
43 : const double kRampUpBackoffFactor = 2.0;
44 :
45 : // Max number of overuses detected before always applying the rampup delay.
46 : const int kMaxOverusesBeforeApplyRampupDelay = 4;
47 :
48 : // The maximum exponent to use in VCMExpFilter.
49 : const float kSampleDiffMs = 33.0f;
50 : const float kMaxExp = 7.0f;
51 :
52 : const auto kScaleReasonCpu = ScalingObserverInterface::ScaleReason::kCpu;
53 : } // namespace
54 :
55 0 : CpuOveruseOptions::CpuOveruseOptions()
56 : : high_encode_usage_threshold_percent(85),
57 : frame_timeout_interval_ms(1500),
58 : min_frame_samples(120),
59 : min_process_count(3),
60 0 : high_threshold_consecutive_count(2) {
61 : #if defined(WEBRTC_MAC) && !defined(WEBRTC_IOS)
62 : // This is proof-of-concept code for letting the physical core count affect
63 : // the interval into which we attempt to scale. For now, the code is Mac OS
64 : // specific, since that's the platform were we saw most problems.
65 : // TODO(torbjorng): Enhance SystemInfo to return this metric.
66 :
67 : mach_port_t mach_host = mach_host_self();
68 : host_basic_info hbi = {};
69 : mach_msg_type_number_t info_count = HOST_BASIC_INFO_COUNT;
70 : kern_return_t kr =
71 : host_info(mach_host, HOST_BASIC_INFO, reinterpret_cast<host_info_t>(&hbi),
72 : &info_count);
73 : mach_port_deallocate(mach_task_self(), mach_host);
74 :
75 : int n_physical_cores;
76 : if (kr != KERN_SUCCESS) {
77 : // If we couldn't get # of physical CPUs, don't panic. Assume we have 1.
78 : n_physical_cores = 1;
79 : LOG(LS_ERROR) << "Failed to determine number of physical cores, assuming 1";
80 : } else {
81 : n_physical_cores = hbi.physical_cpu;
82 : LOG(LS_INFO) << "Number of physical cores:" << n_physical_cores;
83 : }
84 :
85 : // Change init list default for few core systems. The assumption here is that
86 : // encoding, which we measure here, takes about 1/4 of the processing of a
87 : // two-way call. This is roughly true for x86 using both vp8 and vp9 without
88 : // hardware encoding. Since we don't affect the incoming stream here, we only
89 : // control about 1/2 of the total processing needs, but this is not taken into
90 : // account.
91 : if (n_physical_cores == 1)
92 : high_encode_usage_threshold_percent = 20; // Roughly 1/4 of 100%.
93 : else if (n_physical_cores == 2)
94 : high_encode_usage_threshold_percent = 40; // Roughly 1/4 of 200%.
95 : #endif // defined(WEBRTC_MAC) && !defined(WEBRTC_IOS)
96 :
97 : // Note that we make the interval 2x+epsilon wide, since libyuv scaling steps
98 : // are close to that (when squared). This wide interval makes sure that
99 : // scaling up or down does not jump all the way across the interval.
100 0 : low_encode_usage_threshold_percent =
101 0 : (high_encode_usage_threshold_percent - 1) / 2;
102 0 : }
103 :
104 : // Class for calculating the processing usage on the send-side (the average
105 : // processing time of a frame divided by the average time difference between
106 : // captured frames).
107 : class OveruseFrameDetector::SendProcessingUsage {
108 : public:
109 0 : explicit SendProcessingUsage(const CpuOveruseOptions& options)
110 0 : : kWeightFactorFrameDiff(0.998f),
111 : kWeightFactorProcessing(0.995f),
112 : kInitialSampleDiffMs(40.0f),
113 : kMaxSampleDiffMs(45.0f),
114 : count_(0),
115 : options_(options),
116 0 : filtered_processing_ms_(new rtc::ExpFilter(kWeightFactorProcessing)),
117 0 : filtered_frame_diff_ms_(new rtc::ExpFilter(kWeightFactorFrameDiff)) {
118 0 : Reset();
119 0 : }
120 0 : ~SendProcessingUsage() {}
121 :
122 0 : void Reset() {
123 0 : count_ = 0;
124 0 : filtered_frame_diff_ms_->Reset(kWeightFactorFrameDiff);
125 0 : filtered_frame_diff_ms_->Apply(1.0f, kInitialSampleDiffMs);
126 0 : filtered_processing_ms_->Reset(kWeightFactorProcessing);
127 0 : filtered_processing_ms_->Apply(1.0f, InitialProcessingMs());
128 0 : }
129 :
130 0 : void AddCaptureSample(float sample_ms) {
131 0 : float exp = sample_ms / kSampleDiffMs;
132 0 : exp = std::min(exp, kMaxExp);
133 0 : filtered_frame_diff_ms_->Apply(exp, sample_ms);
134 0 : }
135 :
136 0 : void AddSample(float processing_ms, int64_t diff_last_sample_ms) {
137 0 : ++count_;
138 0 : float exp = diff_last_sample_ms / kSampleDiffMs;
139 0 : exp = std::min(exp, kMaxExp);
140 0 : filtered_processing_ms_->Apply(exp, processing_ms);
141 0 : }
142 :
143 0 : int Value() const {
144 0 : if (count_ < static_cast<uint32_t>(options_.min_frame_samples)) {
145 0 : return static_cast<int>(InitialUsageInPercent() + 0.5f);
146 : }
147 0 : float frame_diff_ms = std::max(filtered_frame_diff_ms_->filtered(), 1.0f);
148 0 : frame_diff_ms = std::min(frame_diff_ms, kMaxSampleDiffMs);
149 : float encode_usage_percent =
150 0 : 100.0f * filtered_processing_ms_->filtered() / frame_diff_ms;
151 0 : return static_cast<int>(encode_usage_percent + 0.5);
152 : }
153 :
154 : private:
155 0 : float InitialUsageInPercent() const {
156 : // Start in between the underuse and overuse threshold.
157 0 : return (options_.low_encode_usage_threshold_percent +
158 0 : options_.high_encode_usage_threshold_percent) / 2.0f;
159 : }
160 :
161 0 : float InitialProcessingMs() const {
162 0 : return InitialUsageInPercent() * kInitialSampleDiffMs / 100;
163 : }
164 :
165 : const float kWeightFactorFrameDiff;
166 : const float kWeightFactorProcessing;
167 : const float kInitialSampleDiffMs;
168 : const float kMaxSampleDiffMs;
169 : uint64_t count_;
170 : const CpuOveruseOptions options_;
171 : std::unique_ptr<rtc::ExpFilter> filtered_processing_ms_;
172 : std::unique_ptr<rtc::ExpFilter> filtered_frame_diff_ms_;
173 : };
174 :
175 0 : class OveruseFrameDetector::CheckOveruseTask : public rtc::QueuedTask {
176 : public:
177 0 : explicit CheckOveruseTask(OveruseFrameDetector* overuse_detector)
178 0 : : overuse_detector_(overuse_detector) {
179 0 : rtc::TaskQueue::Current()->PostDelayedTask(
180 0 : std::unique_ptr<rtc::QueuedTask>(this), kTimeToFirstCheckForOveruseMs);
181 0 : }
182 :
183 0 : void Stop() {
184 0 : RTC_CHECK(task_checker_.CalledSequentially());
185 0 : overuse_detector_ = nullptr;
186 0 : }
187 :
188 : private:
189 0 : bool Run() override {
190 0 : RTC_CHECK(task_checker_.CalledSequentially());
191 0 : if (!overuse_detector_)
192 0 : return true; // This will make the task queue delete this task.
193 0 : overuse_detector_->CheckForOveruse();
194 :
195 0 : rtc::TaskQueue::Current()->PostDelayedTask(
196 0 : std::unique_ptr<rtc::QueuedTask>(this), kCheckForOveruseIntervalMs);
197 : // Return false to prevent this task from being deleted. Ownership has been
198 : // transferred to the task queue when PostDelayedTask was called.
199 0 : return false;
200 : }
201 : rtc::SequencedTaskChecker task_checker_;
202 : OveruseFrameDetector* overuse_detector_;
203 : };
204 :
205 0 : OveruseFrameDetector::OveruseFrameDetector(
206 : Clock* clock,
207 : const CpuOveruseOptions& options,
208 : ScalingObserverInterface* observer,
209 : EncodedFrameObserver* encoder_timing,
210 0 : CpuOveruseMetricsObserver* metrics_observer)
211 : : check_overuse_task_(nullptr),
212 : options_(options),
213 : observer_(observer),
214 : encoder_timing_(encoder_timing),
215 : metrics_observer_(metrics_observer),
216 : clock_(clock),
217 : num_process_times_(0),
218 : last_capture_time_ms_(-1),
219 : last_processed_capture_time_ms_(-1),
220 : num_pixels_(0),
221 : last_overuse_time_ms_(-1),
222 : checks_above_threshold_(0),
223 : num_overuse_detections_(0),
224 : last_rampup_time_ms_(-1),
225 : in_quick_rampup_(false),
226 : current_rampup_delay_ms_(kStandardRampUpDelayMs),
227 0 : usage_(new SendProcessingUsage(options)) {
228 0 : task_checker_.Detach();
229 0 : }
230 :
231 0 : OveruseFrameDetector::~OveruseFrameDetector() {
232 0 : RTC_DCHECK(!check_overuse_task_) << "StopCheckForOverUse must be called.";
233 0 : }
234 :
235 0 : void OveruseFrameDetector::StartCheckForOveruse() {
236 0 : RTC_DCHECK_CALLED_SEQUENTIALLY(&task_checker_);
237 0 : RTC_DCHECK(!check_overuse_task_);
238 0 : check_overuse_task_ = new CheckOveruseTask(this);
239 0 : }
240 0 : void OveruseFrameDetector::StopCheckForOveruse() {
241 0 : RTC_DCHECK_CALLED_SEQUENTIALLY(&task_checker_);
242 0 : check_overuse_task_->Stop();
243 0 : check_overuse_task_ = nullptr;
244 0 : }
245 :
246 0 : void OveruseFrameDetector::EncodedFrameTimeMeasured(int encode_duration_ms) {
247 0 : RTC_DCHECK_CALLED_SEQUENTIALLY(&task_checker_);
248 0 : if (!metrics_)
249 0 : metrics_ = rtc::Optional<CpuOveruseMetrics>(CpuOveruseMetrics());
250 0 : metrics_->encode_usage_percent = usage_->Value();
251 :
252 0 : metrics_observer_->OnEncodedFrameTimeMeasured(encode_duration_ms, *metrics_);
253 0 : }
254 :
255 0 : bool OveruseFrameDetector::FrameSizeChanged(int num_pixels) const {
256 0 : RTC_DCHECK_CALLED_SEQUENTIALLY(&task_checker_);
257 0 : if (num_pixels != num_pixels_) {
258 0 : return true;
259 : }
260 0 : return false;
261 : }
262 :
263 0 : bool OveruseFrameDetector::FrameTimeoutDetected(int64_t now) const {
264 0 : RTC_DCHECK_CALLED_SEQUENTIALLY(&task_checker_);
265 0 : if (last_capture_time_ms_ == -1)
266 0 : return false;
267 0 : return (now - last_capture_time_ms_) > options_.frame_timeout_interval_ms;
268 : }
269 :
270 0 : void OveruseFrameDetector::ResetAll(int num_pixels) {
271 0 : RTC_DCHECK_CALLED_SEQUENTIALLY(&task_checker_);
272 0 : num_pixels_ = num_pixels;
273 0 : usage_->Reset();
274 0 : frame_timing_.clear();
275 0 : last_capture_time_ms_ = -1;
276 0 : last_processed_capture_time_ms_ = -1;
277 0 : num_process_times_ = 0;
278 0 : metrics_ = rtc::Optional<CpuOveruseMetrics>();
279 0 : }
280 :
281 0 : void OveruseFrameDetector::FrameCaptured(const VideoFrame& frame,
282 : int64_t time_when_first_seen_ms) {
283 0 : RTC_DCHECK_CALLED_SEQUENTIALLY(&task_checker_);
284 :
285 0 : if (FrameSizeChanged(frame.width() * frame.height()) ||
286 0 : FrameTimeoutDetected(time_when_first_seen_ms)) {
287 0 : ResetAll(frame.width() * frame.height());
288 : }
289 :
290 0 : if (last_capture_time_ms_ != -1)
291 0 : usage_->AddCaptureSample(time_when_first_seen_ms - last_capture_time_ms_);
292 :
293 0 : last_capture_time_ms_ = time_when_first_seen_ms;
294 :
295 0 : frame_timing_.push_back(FrameTiming(frame.ntp_time_ms(), frame.timestamp(),
296 0 : time_when_first_seen_ms));
297 0 : }
298 :
299 0 : void OveruseFrameDetector::FrameSent(uint32_t timestamp,
300 : int64_t time_sent_in_ms) {
301 0 : RTC_DCHECK_CALLED_SEQUENTIALLY(&task_checker_);
302 : // Delay before reporting actual encoding time, used to have the ability to
303 : // detect total encoding time when encoding more than one layer. Encoding is
304 : // here assumed to finish within a second (or that we get enough long-time
305 : // samples before one second to trigger an overuse even when this is not the
306 : // case).
307 : static const int64_t kEncodingTimeMeasureWindowMs = 1000;
308 0 : for (auto& it : frame_timing_) {
309 0 : if (it.timestamp == timestamp) {
310 0 : it.last_send_ms = time_sent_in_ms;
311 0 : break;
312 : }
313 : }
314 : // TODO(pbos): Handle the case/log errors when not finding the corresponding
315 : // frame (either very slow encoding or incorrect wrong timestamps returned
316 : // from the encoder).
317 : // This is currently the case for all frames on ChromeOS, so logging them
318 : // would be spammy, and triggering overuse would be wrong.
319 : // https://crbug.com/350106
320 0 : while (!frame_timing_.empty()) {
321 0 : FrameTiming timing = frame_timing_.front();
322 0 : if (time_sent_in_ms - timing.capture_ms < kEncodingTimeMeasureWindowMs)
323 0 : break;
324 0 : if (timing.last_send_ms != -1) {
325 : int encode_duration_ms =
326 0 : static_cast<int>(timing.last_send_ms - timing.capture_ms);
327 0 : if (encoder_timing_) {
328 0 : encoder_timing_->OnEncodeTiming(timing.capture_ntp_ms,
329 0 : encode_duration_ms);
330 : }
331 0 : if (last_processed_capture_time_ms_ != -1) {
332 0 : int64_t diff_ms = timing.capture_ms - last_processed_capture_time_ms_;
333 0 : usage_->AddSample(encode_duration_ms, diff_ms);
334 : }
335 0 : last_processed_capture_time_ms_ = timing.capture_ms;
336 0 : EncodedFrameTimeMeasured(encode_duration_ms);
337 : }
338 0 : frame_timing_.pop_front();
339 : }
340 0 : }
341 :
342 0 : void OveruseFrameDetector::CheckForOveruse() {
343 0 : RTC_DCHECK_CALLED_SEQUENTIALLY(&task_checker_);
344 0 : ++num_process_times_;
345 0 : if (num_process_times_ <= options_.min_process_count || !metrics_)
346 0 : return;
347 :
348 0 : int64_t now = clock_->TimeInMilliseconds();
349 :
350 0 : if (IsOverusing(*metrics_)) {
351 : // If the last thing we did was going up, and now have to back down, we need
352 : // to check if this peak was short. If so we should back off to avoid going
353 : // back and forth between this load, the system doesn't seem to handle it.
354 0 : bool check_for_backoff = last_rampup_time_ms_ > last_overuse_time_ms_;
355 0 : if (check_for_backoff) {
356 0 : if (now - last_rampup_time_ms_ < kStandardRampUpDelayMs ||
357 0 : num_overuse_detections_ > kMaxOverusesBeforeApplyRampupDelay) {
358 : // Going up was not ok for very long, back off.
359 0 : current_rampup_delay_ms_ *= kRampUpBackoffFactor;
360 0 : if (current_rampup_delay_ms_ > kMaxRampUpDelayMs)
361 0 : current_rampup_delay_ms_ = kMaxRampUpDelayMs;
362 : } else {
363 : // Not currently backing off, reset rampup delay.
364 0 : current_rampup_delay_ms_ = kStandardRampUpDelayMs;
365 : }
366 : }
367 :
368 0 : last_overuse_time_ms_ = now;
369 0 : in_quick_rampup_ = false;
370 0 : checks_above_threshold_ = 0;
371 0 : ++num_overuse_detections_;
372 :
373 0 : if (observer_)
374 0 : observer_->ScaleDown(kScaleReasonCpu);
375 0 : } else if (IsUnderusing(*metrics_, now)) {
376 0 : last_rampup_time_ms_ = now;
377 0 : in_quick_rampup_ = true;
378 :
379 0 : if (observer_)
380 0 : observer_->ScaleUp(kScaleReasonCpu);
381 : }
382 :
383 : int rampup_delay =
384 0 : in_quick_rampup_ ? kQuickRampUpDelayMs : current_rampup_delay_ms_;
385 :
386 0 : LOG(LS_VERBOSE) << " Frame stats: "
387 0 : << " encode usage " << metrics_->encode_usage_percent
388 0 : << " overuse detections " << num_overuse_detections_
389 0 : << " rampup delay " << rampup_delay;
390 : }
391 :
392 0 : bool OveruseFrameDetector::IsOverusing(const CpuOveruseMetrics& metrics) {
393 0 : RTC_DCHECK_CALLED_SEQUENTIALLY(&task_checker_);
394 0 : if (metrics.encode_usage_percent >=
395 0 : options_.high_encode_usage_threshold_percent) {
396 0 : ++checks_above_threshold_;
397 : } else {
398 0 : checks_above_threshold_ = 0;
399 : }
400 0 : return checks_above_threshold_ >= options_.high_threshold_consecutive_count;
401 : }
402 :
403 0 : bool OveruseFrameDetector::IsUnderusing(const CpuOveruseMetrics& metrics,
404 : int64_t time_now) {
405 0 : RTC_DCHECK_CALLED_SEQUENTIALLY(&task_checker_);
406 0 : int delay = in_quick_rampup_ ? kQuickRampUpDelayMs : current_rampup_delay_ms_;
407 0 : if (time_now < last_rampup_time_ms_ + delay)
408 0 : return false;
409 :
410 0 : return metrics.encode_usage_percent <
411 0 : options_.low_encode_usage_threshold_percent;
412 : }
413 : } // namespace webrtc
|