diff options
Diffstat (limited to 'cast/standalone_sender')
-rw-r--r-- | cast/standalone_sender/BUILD.gn | 23 | ||||
-rw-r--r-- | cast/standalone_sender/looping_file_sender.cc | 46 | ||||
-rw-r--r-- | cast/standalone_sender/looping_file_sender.h | 10 | ||||
-rw-r--r-- | cast/standalone_sender/remoting_sender.cc | 2 | ||||
-rw-r--r-- | cast/standalone_sender/streaming_av1_encoder.cc | 425 | ||||
-rw-r--r-- | cast/standalone_sender/streaming_av1_encoder.h | 169 | ||||
-rw-r--r-- | cast/standalone_sender/streaming_encoder_util.cc | 30 | ||||
-rw-r--r-- | cast/standalone_sender/streaming_encoder_util.h | 20 | ||||
-rw-r--r-- | cast/standalone_sender/streaming_video_encoder.cc | 57 | ||||
-rw-r--r-- | cast/standalone_sender/streaming_video_encoder.h | 194 | ||||
-rw-r--r-- | cast/standalone_sender/streaming_vpx_encoder.cc | 99 | ||||
-rw-r--r-- | cast/standalone_sender/streaming_vpx_encoder.h | 160 |
12 files changed, 995 insertions, 240 deletions
diff --git a/cast/standalone_sender/BUILD.gn b/cast/standalone_sender/BUILD.gn index b83028d4..a65c8bbf 100644 --- a/cast/standalone_sender/BUILD.gn +++ b/cast/standalone_sender/BUILD.gn @@ -18,6 +18,9 @@ if (!build_with_chromium) { if (have_external_libs) { defines += [ "CAST_STANDALONE_SENDER_HAVE_EXTERNAL_LIBS" ] } + if (have_libaom) { + defines += [ "CAST_STANDALONE_SENDER_HAVE_LIBAOM" ] + } } executable("cast_sender") { @@ -40,7 +43,7 @@ if (!build_with_chromium) { include_dirs = [] lib_dirs = [] libs = [] - if (have_ffmpeg && have_libopus && have_libvpx) { + if (have_external_libs) { sources += [ "connection_settings.h", "ffmpeg_glue.cc", @@ -55,15 +58,33 @@ if (!build_with_chromium) { "remoting_sender.h", "simulated_capturer.cc", "simulated_capturer.h", + "streaming_encoder_util.cc", + "streaming_encoder_util.h", "streaming_opus_encoder.cc", "streaming_opus_encoder.h", + "streaming_video_encoder.cc", + "streaming_video_encoder.h", "streaming_vpx_encoder.cc", "streaming_vpx_encoder.h", ] + include_dirs += ffmpeg_include_dirs + libopus_include_dirs + libvpx_include_dirs lib_dirs += ffmpeg_lib_dirs + libopus_lib_dirs + libvpx_lib_dirs libs += ffmpeg_libs + libopus_libs + libvpx_libs + + # LibAOM support currently recommends building from source, so is included + # separately here. + if (have_libaom) { + sources += [ + "streaming_av1_encoder.cc", + "streaming_av1_encoder.h", + ] + + include_dirs += libaom_include_dirs + lib_dirs += libaom_lib_dirs + libs += libaom_libs + } } configs += [ "../common:certificate_config" ] diff --git a/cast/standalone_sender/looping_file_sender.cc b/cast/standalone_sender/looping_file_sender.cc index 7ed5e3fe..4362add6 100644 --- a/cast/standalone_sender/looping_file_sender.cc +++ b/cast/standalone_sender/looping_file_sender.cc @@ -6,6 +6,11 @@ #include <utility> +#if defined(CAST_STANDALONE_SENDER_HAVE_LIBAOM) +#include "cast/standalone_sender/streaming_av1_encoder.h" +#endif +#include "cast/standalone_sender/streaming_vpx_encoder.h" +#include "util/osp_logging.h" #include "util/trace_logging.h" namespace openscreen { @@ -23,9 +28,10 @@ LoopingFileSender::LoopingFileSender(Environment* environment, audio_encoder_(senders.audio_sender->config().channels, StreamingOpusEncoder::kDefaultCastAudioFramesPerSecond, senders.audio_sender), - video_encoder_(StreamingVpxEncoder::Parameters{.codec = settings.codec}, - env_->task_runner(), - senders.video_sender), + video_encoder_(CreateVideoEncoder( + StreamingVideoEncoder::Parameters{.codec = settings.codec}, + env_->task_runner(), + senders.video_sender)), next_task_(env_->now_function(), env_->task_runner()), console_update_task_(env_->now_function(), env_->task_runner()) { // Opus and Vp8 are the default values for the config, and if these are set @@ -33,7 +39,8 @@ LoopingFileSender::LoopingFileSender(Environment* environment, // support, which is a developer error. OSP_CHECK(senders.audio_config.codec == AudioCodec::kOpus); OSP_CHECK(senders.video_config.codec == VideoCodec::kVp8 || - senders.video_config.codec == VideoCodec::kVp9); + senders.video_config.codec == VideoCodec::kVp9 || + senders.video_config.codec == VideoCodec::kAv1); OSP_LOG_INFO << "Max allowed media bitrate (audio + video) will be " << settings_.max_bitrate; bandwidth_being_utilized_ = settings_.max_bitrate / 2; @@ -55,8 +62,8 @@ void LoopingFileSender::UpdateEncoderBitrates() { } else { audio_encoder_.UseStandardQuality(); } - video_encoder_.SetTargetBitrate(bandwidth_being_utilized_ - - audio_encoder_.GetBitrate()); + video_encoder_->SetTargetBitrate(bandwidth_being_utilized_ - + audio_encoder_.GetBitrate()); } void LoopingFileSender::ControlForNetworkCongestion() { @@ -123,7 +130,7 @@ void LoopingFileSender::OnVideoFrame(const AVFrame& av_frame, Clock::time_point capture_time) { TRACE_DEFAULT_SCOPED(TraceCategory::kStandaloneSender); latest_frame_time_ = std::max(capture_time, latest_frame_time_); - StreamingVpxEncoder::VideoFrame frame{}; + StreamingVideoEncoder::VideoFrame frame{}; frame.width = av_frame.width - av_frame.crop_left - av_frame.crop_right; frame.height = av_frame.height - av_frame.crop_top - av_frame.crop_bottom; frame.yuv_planes[0] = av_frame.data[0] + av_frame.crop_left + @@ -137,7 +144,7 @@ void LoopingFileSender::OnVideoFrame(const AVFrame& av_frame, } // TODO(jophba): Add performance metrics visual overlay (based on Stats // callback). - video_encoder_.EncodeAndSend(frame, capture_time, {}); + video_encoder_->EncodeAndSend(frame, capture_time, {}); } void LoopingFileSender::UpdateStatusOnConsole() { @@ -200,5 +207,28 @@ const char* LoopingFileSender::ToTrackName(SimulatedCapturer* capturer) const { return which; } +std::unique_ptr<StreamingVideoEncoder> LoopingFileSender::CreateVideoEncoder( + const StreamingVideoEncoder::Parameters& params, + TaskRunner* task_runner, + Sender* sender) { + switch (params.codec) { + case VideoCodec::kVp8: + case VideoCodec::kVp9: + return std::make_unique<StreamingVpxEncoder>(params, task_runner, sender); + case VideoCodec::kAv1: +#if defined(CAST_STANDALONE_SENDER_HAVE_LIBAOM) + return std::make_unique<StreamingAv1Encoder>(params, task_runner, sender); +#else + OSP_LOG_FATAL << "AV1 codec selected, but could not be used because " + "LibAOM not installed."; +#endif + default: + // Since we only support VP8, VP9, and AV1, any other codec value here + // should be due only to developer error. + OSP_LOG_ERROR << "Unsupported codec " << CodecToString(params.codec); + OSP_NOTREACHED(); + } +} + } // namespace cast } // namespace openscreen diff --git a/cast/standalone_sender/looping_file_sender.h b/cast/standalone_sender/looping_file_sender.h index 7ad784b9..75508e8e 100644 --- a/cast/standalone_sender/looping_file_sender.h +++ b/cast/standalone_sender/looping_file_sender.h @@ -6,13 +6,14 @@ #define CAST_STANDALONE_SENDER_LOOPING_FILE_SENDER_H_ #include <algorithm> +#include <memory> #include <string> #include "cast/standalone_sender/connection_settings.h" #include "cast/standalone_sender/constants.h" #include "cast/standalone_sender/simulated_capturer.h" #include "cast/standalone_sender/streaming_opus_encoder.h" -#include "cast/standalone_sender/streaming_vpx_encoder.h" +#include "cast/standalone_sender/streaming_video_encoder.h" #include "cast/streaming/sender_session.h" namespace openscreen { @@ -57,6 +58,11 @@ class LoopingFileSender final : public SimulatedAudioCapturer::Client, const char* ToTrackName(SimulatedCapturer* capturer) const; + std::unique_ptr<StreamingVideoEncoder> CreateVideoEncoder( + const StreamingVideoEncoder::Parameters& params, + TaskRunner* task_runner, + Sender* sender); + // Holds the required injected dependencies (clock, task runner) used for Cast // Streaming, and owns the UDP socket over which all communications occur with // the remote's Receivers. @@ -75,7 +81,7 @@ class LoopingFileSender final : public SimulatedAudioCapturer::Client, int bandwidth_being_utilized_; StreamingOpusEncoder audio_encoder_; - StreamingVpxEncoder video_encoder_; + std::unique_ptr<StreamingVideoEncoder> video_encoder_; int num_capturers_running_ = 0; Clock::time_point capture_start_time_{}; diff --git a/cast/standalone_sender/remoting_sender.cc b/cast/standalone_sender/remoting_sender.cc index 741fb190..e28c9ae1 100644 --- a/cast/standalone_sender/remoting_sender.cc +++ b/cast/standalone_sender/remoting_sender.cc @@ -23,6 +23,8 @@ VideoDecoderConfig::Codec ToProtoCodec(VideoCodec value) { return VideoDecoderConfig_Codec_kCodecVP8; case VideoCodec::kVp9: return VideoDecoderConfig_Codec_kCodecVP9; + case VideoCodec::kAv1: + return VideoDecoderConfig_Codec_kCodecAV1; default: return VideoDecoderConfig_Codec_kUnknownVideoCodec; } diff --git a/cast/standalone_sender/streaming_av1_encoder.cc b/cast/standalone_sender/streaming_av1_encoder.cc new file mode 100644 index 00000000..7552f14f --- /dev/null +++ b/cast/standalone_sender/streaming_av1_encoder.cc @@ -0,0 +1,425 @@ +// Copyright 2021 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "cast/standalone_sender/streaming_av1_encoder.h" + +#include <aom/aomcx.h> + +#include <chrono> +#include <cmath> +#include <utility> + +#include "cast/standalone_sender/streaming_encoder_util.h" +#include "cast/streaming/encoded_frame.h" +#include "cast/streaming/environment.h" +#include "cast/streaming/sender.h" +#include "util/chrono_helpers.h" +#include "util/osp_logging.h" +#include "util/saturate_cast.h" + +namespace openscreen { +namespace cast { + +// TODO(issuetracker.google.com/issues/155336511): Fix the declarations and then +// remove this: +using openscreen::operator<<; // For std::chrono::duration pretty-printing. + +namespace { + +constexpr int kBytesPerKilobyte = 1024; + +// Lower and upper bounds to the frame duration passed to aom_codec_encode(), to +// ensure sanity. Note that the upper-bound is especially important in cases +// where the video paused for some lengthy amount of time. +constexpr Clock::duration kMinFrameDuration = milliseconds(1); +constexpr Clock::duration kMaxFrameDuration = milliseconds(125); + +// Highest/lowest allowed encoding speed set to the encoder. +constexpr int kHighestEncodingSpeed = 9; +constexpr int kLowestEncodingSpeed = 0; + +} // namespace + +StreamingAv1Encoder::StreamingAv1Encoder(const Parameters& params, + TaskRunner* task_runner, + Sender* sender) + : StreamingVideoEncoder(params, task_runner, sender) { + ideal_speed_setting_ = kHighestEncodingSpeed; + encode_thread_ = std::thread([this] { ProcessWorkUnitsUntilTimeToQuit(); }); + + OSP_DCHECK(params_.codec == VideoCodec::kAv1); + const auto result = + aom_codec_enc_config_default(aom_codec_av1_cx(), &config_, 0); + OSP_CHECK_EQ(result, AOM_CODEC_OK); + + // This is set to non-zero in ConfigureForNewFrameSize() later, to flag that + // the encoder has been initialized. + config_.g_threads = 0; + + // Set the timebase to match that of openscreen::Clock::duration. + config_.g_timebase.num = Clock::duration::period::num; + config_.g_timebase.den = Clock::duration::period::den; + + // |g_pass| and |g_lag_in_frames| must be "one pass" and zero, respectively, + // because of the way the libaom API is used. + config_.g_pass = AOM_RC_ONE_PASS; + config_.g_lag_in_frames = 0; + + // Rate control settings. + config_.rc_dropframe_thresh = 0; // The encoder may not drop any frames. + config_.rc_resize_mode = 0; + config_.rc_end_usage = AOM_CBR; + config_.rc_target_bitrate = target_bitrate_ / kBytesPerKilobyte; + config_.rc_min_quantizer = params_.min_quantizer; + config_.rc_max_quantizer = params_.max_quantizer; + + // The reasons for the values chosen here (rc_*shoot_pct and rc_buf_*_sz) are + // lost in history. They were brought-over from the legacy Chrome Cast + // Streaming Sender implemenation. + config_.rc_undershoot_pct = 100; + config_.rc_overshoot_pct = 15; + config_.rc_buf_initial_sz = 500; + config_.rc_buf_optimal_sz = 600; + config_.rc_buf_sz = 1000; + + config_.kf_mode = AOM_KF_DISABLED; +} + +StreamingAv1Encoder::~StreamingAv1Encoder() { + { + std::unique_lock<std::mutex> lock(mutex_); + target_bitrate_ = 0; + cv_.notify_one(); + } + encode_thread_.join(); +} + +int StreamingAv1Encoder::GetTargetBitrate() const { + // Note: No need to lock the |mutex_| since this method should be called on + // the same thread as SetTargetBitrate(). + return target_bitrate_; +} + +void StreamingAv1Encoder::SetTargetBitrate(int new_bitrate) { + // Ensure that, when bps is converted to kbps downstream, that the encoder + // bitrate will not be zero. + new_bitrate = std::max(new_bitrate, kBytesPerKilobyte); + + std::unique_lock<std::mutex> lock(mutex_); + // Only assign the new target bitrate if |target_bitrate_| has not yet been + // used to signal the |encode_thread_| to end. + if (target_bitrate_ > 0) { + target_bitrate_ = new_bitrate; + } +} + +void StreamingAv1Encoder::EncodeAndSend( + const VideoFrame& frame, + Clock::time_point reference_time, + std::function<void(Stats)> stats_callback) { + WorkUnit work_unit; + + // TODO(jophba): The |VideoFrame| struct should provide the media timestamp, + // instead of this code inferring it from the reference timestamps, since: 1) + // the video capturer's clock may tick at a different rate than the system + // clock; and 2) to reduce jitter. + if (start_time_ == Clock::time_point::min()) { + start_time_ = reference_time; + work_unit.rtp_timestamp = RtpTimeTicks(); + } else { + work_unit.rtp_timestamp = RtpTimeTicks::FromTimeSinceOrigin( + reference_time - start_time_, sender_->rtp_timebase()); + if (work_unit.rtp_timestamp <= last_enqueued_rtp_timestamp_) { + OSP_LOG_WARN << "VIDEO[" << sender_->ssrc() + << "] Dropping: RTP timestamp is not monotonically " + "increasing from last frame."; + return; + } + } + if (sender_->GetInFlightMediaDuration(work_unit.rtp_timestamp) > + sender_->GetMaxInFlightMediaDuration()) { + OSP_LOG_WARN << "VIDEO[" << sender_->ssrc() + << "] Dropping: In-flight media duration would be too high."; + return; + } + + Clock::duration frame_duration = frame.duration; + if (frame_duration <= Clock::duration::zero()) { + // The caller did not provide the frame duration in |frame|. + if (reference_time == start_time_) { + // Use the max for the first frame so libaom will spend extra effort on + // its quality. + frame_duration = kMaxFrameDuration; + } else { + // Use the actual amount of time between the current and previous frame as + // a prediction for the next frame's duration. + frame_duration = + (work_unit.rtp_timestamp - last_enqueued_rtp_timestamp_) + .ToDuration<Clock::duration>(sender_->rtp_timebase()); + } + } + work_unit.duration = + std::max(std::min(frame_duration, kMaxFrameDuration), kMinFrameDuration); + + last_enqueued_rtp_timestamp_ = work_unit.rtp_timestamp; + + work_unit.image = CloneAsAv1Image(frame); + work_unit.reference_time = reference_time; + work_unit.stats_callback = std::move(stats_callback); + const bool force_key_frame = sender_->NeedsKeyFrame(); + { + std::unique_lock<std::mutex> lock(mutex_); + needs_key_frame_ |= force_key_frame; + encode_queue_.push(std::move(work_unit)); + cv_.notify_one(); + } +} + +void StreamingAv1Encoder::DestroyEncoder() { + OSP_DCHECK_EQ(std::this_thread::get_id(), encode_thread_.get_id()); + + if (is_encoder_initialized()) { + aom_codec_destroy(&encoder_); + // Flag that the encoder is not initialized. See header comments for + // is_encoder_initialized(). + config_.g_threads = 0; + } +} + +void StreamingAv1Encoder::ProcessWorkUnitsUntilTimeToQuit() { + OSP_DCHECK_EQ(std::this_thread::get_id(), encode_thread_.get_id()); + + for (;;) { + WorkUnitWithResults work_unit{}; + bool force_key_frame; + int target_bitrate; + { + std::unique_lock<std::mutex> lock(mutex_); + if (target_bitrate_ <= 0) { + break; // Time to end this thread. + } + if (encode_queue_.empty()) { + cv_.wait(lock); + if (encode_queue_.empty()) { + continue; + } + } + static_cast<WorkUnit&>(work_unit) = std::move(encode_queue_.front()); + encode_queue_.pop(); + force_key_frame = needs_key_frame_; + needs_key_frame_ = false; + target_bitrate = target_bitrate_; + } + + // Clock::now() is being called directly, instead of using a + // dependency-injected "now function," since actual wall time is being + // measured. + const Clock::time_point encode_start_time = Clock::now(); + PrepareEncoder(work_unit.image->d_w, work_unit.image->d_h, target_bitrate); + EncodeFrame(force_key_frame, work_unit); + ComputeFrameEncodeStats(Clock::now() - encode_start_time, target_bitrate, + work_unit); + UpdateSpeedSettingForNextFrame(work_unit.stats); + + main_task_runner_->PostTask( + [this, results = std::move(work_unit)]() mutable { + SendEncodedFrame(std::move(results)); + }); + } + + DestroyEncoder(); +} + +void StreamingAv1Encoder::PrepareEncoder(int width, + int height, + int target_bitrate) { + OSP_DCHECK_EQ(std::this_thread::get_id(), encode_thread_.get_id()); + + const int target_kbps = target_bitrate / kBytesPerKilobyte; + + // Translate the |ideal_speed_setting_| into the AOME_SET_CPUUSED setting and + // the minimum quantizer to use. + int speed; + int min_quantizer; + if (ideal_speed_setting_ > kHighestEncodingSpeed) { + speed = kHighestEncodingSpeed; + const double remainder = ideal_speed_setting_ - speed; + min_quantizer = rounded_saturate_cast<int>( + remainder / kEquivalentEncodingSpeedStepPerQuantizerStep + + params_.min_quantizer); + min_quantizer = std::min(min_quantizer, params_.max_cpu_saver_quantizer); + } else { + speed = std::max(rounded_saturate_cast<int>(ideal_speed_setting_), + kLowestEncodingSpeed); + min_quantizer = params_.min_quantizer; + } + + if (static_cast<int>(config_.g_w) != width || + static_cast<int>(config_.g_h) != height) { + DestroyEncoder(); + } + + if (!is_encoder_initialized()) { + config_.g_threads = params_.num_encode_threads; + config_.g_w = width; + config_.g_h = height; + config_.rc_target_bitrate = target_kbps; + config_.rc_min_quantizer = min_quantizer; + + encoder_ = {}; + const aom_codec_flags_t flags = 0; + + const auto init_result = + aom_codec_enc_init(&encoder_, aom_codec_av1_cx(), &config_, flags); + OSP_CHECK_EQ(init_result, AOM_CODEC_OK); + + // Raise the threshold for considering macroblocks as static. The default is + // zero, so this setting makes the encoder less sensitive to motion. This + // lowers the probability of needing to utilize more CPU to search for + // motion vectors. + const auto ctl_result = + aom_codec_control(&encoder_, AOME_SET_STATIC_THRESHOLD, 1); + OSP_CHECK_EQ(ctl_result, AOM_CODEC_OK); + + // Ensure the speed will be set (below). + current_speed_setting_ = ~speed; + } else if (static_cast<int>(config_.rc_target_bitrate) != target_kbps || + static_cast<int>(config_.rc_min_quantizer) != min_quantizer) { + config_.rc_target_bitrate = target_kbps; + config_.rc_min_quantizer = min_quantizer; + const auto update_config_result = + aom_codec_enc_config_set(&encoder_, &config_); + OSP_CHECK_EQ(update_config_result, AOM_CODEC_OK); + } + + if (current_speed_setting_ != speed) { + const auto ctl_result = + aom_codec_control(&encoder_, AOME_SET_CPUUSED, speed); + OSP_CHECK_EQ(ctl_result, AOM_CODEC_OK); + current_speed_setting_ = speed; + } +} + +void StreamingAv1Encoder::EncodeFrame(bool force_key_frame, + WorkUnitWithResults& work_unit) { + OSP_DCHECK_EQ(std::this_thread::get_id(), encode_thread_.get_id()); + + // The presentation timestamp argument here is fixed to zero to force the + // encoder to base its single-frame bandwidth calculations entirely on + // |frame_duration| and the target bitrate setting. + const aom_codec_pts_t pts = 0; + const aom_enc_frame_flags_t flags = force_key_frame ? AOM_EFLAG_FORCE_KF : 0; + const auto encode_result = aom_codec_encode( + &encoder_, work_unit.image.get(), pts, work_unit.duration.count(), flags); + OSP_CHECK_EQ(encode_result, AOM_CODEC_OK); + + const aom_codec_cx_pkt_t* pkt; + for (aom_codec_iter_t iter = nullptr;;) { + pkt = aom_codec_get_cx_data(&encoder_, &iter); + // aom_codec_get_cx_data() returns null once the "iteration" is complete. + // However, that point should never be reached because a + // AOM_CODEC_CX_FRAME_PKT must be encountered before that. + OSP_CHECK(pkt); + if (pkt->kind == AOM_CODEC_CX_FRAME_PKT) { + break; + } + } + + // A copy of the payload data is being made here. That's okay since it has to + // be copied at some point anyway, to be passed back to the main thread. + auto* const begin = static_cast<const uint8_t*>(pkt->data.frame.buf); + auto* const end = begin + pkt->data.frame.sz; + work_unit.payload.assign(begin, end); + work_unit.is_key_frame = !!(pkt->data.frame.flags & AOM_FRAME_IS_KEY); +} + +void StreamingAv1Encoder::ComputeFrameEncodeStats( + Clock::duration encode_wall_time, + int target_bitrate, + WorkUnitWithResults& work_unit) { + OSP_DCHECK_EQ(std::this_thread::get_id(), encode_thread_.get_id()); + + Stats& stats = work_unit.stats; + + // Note: stats.frame_id is set later, in SendEncodedFrame(). + stats.rtp_timestamp = work_unit.rtp_timestamp; + stats.encode_wall_time = encode_wall_time; + stats.frame_duration = work_unit.duration; + stats.encoded_size = work_unit.payload.size(); + + constexpr double kBytesPerBit = 1.0 / CHAR_BIT; + constexpr double kSecondsPerClockTick = + 1.0 / Clock::to_duration(seconds(1)).count(); + const double target_bytes_per_clock_tick = + target_bitrate * (kBytesPerBit * kSecondsPerClockTick); + stats.target_size = target_bytes_per_clock_tick * work_unit.duration.count(); + + // The quantizer the encoder used. This is the result of the AV1 encoder + // taking a guess at what quantizer value would produce an encoded frame size + // as close to the target as possible. + const auto get_quantizer_result = aom_codec_control( + &encoder_, AOME_GET_LAST_QUANTIZER_64, &stats.quantizer); + OSP_CHECK_EQ(get_quantizer_result, AOM_CODEC_OK); + + // Now that the frame has been encoded and the number of bytes is known, the + // perfect quantizer value (i.e., the one that should have been used) can be + // determined. + stats.perfect_quantizer = stats.quantizer * stats.space_utilization(); +} + +void StreamingAv1Encoder::SendEncodedFrame(WorkUnitWithResults results) { + OSP_DCHECK(main_task_runner_->IsRunningOnTaskRunner()); + + EncodedFrame frame; + frame.frame_id = sender_->GetNextFrameId(); + if (results.is_key_frame) { + frame.dependency = EncodedFrame::KEY_FRAME; + frame.referenced_frame_id = frame.frame_id; + } else { + frame.dependency = EncodedFrame::DEPENDS_ON_ANOTHER; + frame.referenced_frame_id = frame.frame_id - 1; + } + frame.rtp_timestamp = results.rtp_timestamp; + frame.reference_time = results.reference_time; + frame.data = absl::Span<uint8_t>(results.payload); + + if (sender_->EnqueueFrame(frame) != Sender::OK) { + // Since the frame will not be sent, the encoder's frame dependency chain + // has been broken. Force a key frame for the next frame. + std::unique_lock<std::mutex> lock(mutex_); + needs_key_frame_ = true; + } + + if (results.stats_callback) { + results.stats.frame_id = frame.frame_id; + results.stats_callback(results.stats); + } +} + +// static +StreamingAv1Encoder::Av1ImageUniquePtr StreamingAv1Encoder::CloneAsAv1Image( + const VideoFrame& frame) { + OSP_DCHECK_GE(frame.width, 0); + OSP_DCHECK_GE(frame.height, 0); + OSP_DCHECK_GE(frame.yuv_strides[0], 0); + OSP_DCHECK_GE(frame.yuv_strides[1], 0); + OSP_DCHECK_GE(frame.yuv_strides[2], 0); + + constexpr int kAlignment = 32; + Av1ImageUniquePtr image(aom_img_alloc(nullptr, AOM_IMG_FMT_I420, frame.width, + frame.height, kAlignment)); + OSP_CHECK(image); + + CopyPlane(frame.yuv_planes[0], frame.yuv_strides[0], frame.height, + image->planes[AOM_PLANE_Y], image->stride[AOM_PLANE_Y]); + CopyPlane(frame.yuv_planes[1], frame.yuv_strides[1], (frame.height + 1) / 2, + image->planes[AOM_PLANE_U], image->stride[AOM_PLANE_U]); + CopyPlane(frame.yuv_planes[2], frame.yuv_strides[2], (frame.height + 1) / 2, + image->planes[AOM_PLANE_V], image->stride[AOM_PLANE_V]); + + return image; +} + +} // namespace cast +} // namespace openscreen diff --git a/cast/standalone_sender/streaming_av1_encoder.h b/cast/standalone_sender/streaming_av1_encoder.h new file mode 100644 index 00000000..c40ab019 --- /dev/null +++ b/cast/standalone_sender/streaming_av1_encoder.h @@ -0,0 +1,169 @@ +// Copyright 2021 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef CAST_STANDALONE_SENDER_STREAMING_AV1_ENCODER_H_ +#define CAST_STANDALONE_SENDER_STREAMING_AV1_ENCODER_H_ + +#include <aom/aom_encoder.h> +#include <aom/aom_image.h> + +#include <algorithm> +#include <condition_variable> // NOLINT +#include <functional> +#include <memory> +#include <mutex> +#include <queue> +#include <thread> +#include <vector> + +#include "absl/base/thread_annotations.h" +#include "cast/standalone_sender/streaming_video_encoder.h" +#include "cast/streaming/constants.h" +#include "cast/streaming/frame_id.h" +#include "cast/streaming/rtp_time.h" +#include "platform/api/task_runner.h" +#include "platform/api/time.h" + +namespace openscreen { + +class TaskRunner; + +namespace cast { + +class Sender; + +// Uses libaom to encode AV1 video and streams it to a Sender. Includes +// extensive logic for fine-tuning the encoder parameters in real-time, to +// provide the best quality results given external, uncontrollable factors: +// CPU/network availability, and the complexity of the video frame content. +// +// Internally, a separate encode thread is created and used to prevent blocking +// the main thread while frames are being encoded. All public API methods are +// assumed to be called on the same sequence/thread as the main TaskRunner +// (injected via the constructor). +// +// Usage: +// +// 1. EncodeAndSend() is used to queue-up video frames for encoding and sending, +// which will be done on a best-effort basis. +// +// 2. The client is expected to call SetTargetBitrate() frequently based on its +// own bandwidth estimates and congestion control logic. In addition, a client +// may provide a callback for each frame's encode statistics, which can be used +// to further optimize the user experience. For example, the stats can be used +// as a signal to reduce the data volume (i.e., resolution and/or frame rate) +// coming from the video capture source. +class StreamingAv1Encoder : public StreamingVideoEncoder { + public: + StreamingAv1Encoder(const Parameters& params, + TaskRunner* task_runner, + Sender* sender); + + ~StreamingAv1Encoder(); + + int GetTargetBitrate() const override; + void SetTargetBitrate(int new_bitrate) override; + void EncodeAndSend(const VideoFrame& frame, + Clock::time_point reference_time, + std::function<void(Stats)> stats_callback) override; + + private: + // Syntactic convenience to wrap the aom_image_t alloc/free API in a smart + // pointer. + struct Av1ImageDeleter { + void operator()(aom_image_t* ptr) const { aom_img_free(ptr); } + }; + using Av1ImageUniquePtr = std::unique_ptr<aom_image_t, Av1ImageDeleter>; + + // Represents the state of one frame encode. This is created in + // EncodeAndSend(), and passed to the encode thread via the |encode_queue_|. + struct WorkUnit { + Av1ImageUniquePtr image; + Clock::duration duration; + Clock::time_point reference_time; + RtpTimeTicks rtp_timestamp; + std::function<void(Stats)> stats_callback; + }; + + // Same as WorkUnit, but with additional fields to carry the encode results. + struct WorkUnitWithResults : public WorkUnit { + std::vector<uint8_t> payload; + bool is_key_frame = false; + Stats stats; + }; + + bool is_encoder_initialized() const { return config_.g_threads != 0; } + + // Destroys the AV1 encoder context if it has been initialized. + void DestroyEncoder(); + + // The procedure for the |encode_thread_| that loops, processing work units + // from the |encode_queue_| by calling Encode() until it's time to end the + // thread. + void ProcessWorkUnitsUntilTimeToQuit(); + + // If the |encoder_| is live, attempt reconfiguration to allow it to encode + // frames at a new frame size or target bitrate. If reconfiguration is not + // possible, destroy the existing instance and re-create a new |encoder_| + // instance. + void PrepareEncoder(int width, int height, int target_bitrate); + + // Wraps the complex libaom aom_codec_encode() call using inputs from + // |work_unit| and populating results there. + void EncodeFrame(bool force_key_frame, WorkUnitWithResults& work_unit); + + // Computes and populates |work_unit.stats| after the last call to + // EncodeFrame(). + void ComputeFrameEncodeStats(Clock::duration encode_wall_time, + int target_bitrate, + WorkUnitWithResults& work_unit); + + // Assembles and enqueues an EncodedFrame with the Sender on the main thread. + void SendEncodedFrame(WorkUnitWithResults results); + + // Allocates a aom_image_t and copies the content from |frame| to it. + static Av1ImageUniquePtr CloneAsAv1Image(const VideoFrame& frame); + + // The reference time of the first frame passed to EncodeAndSend(). + Clock::time_point start_time_ = Clock::time_point::min(); + + // The RTP timestamp of the last frame that was pushed into the + // |encode_queue_| by EncodeAndSend(). This is used to check whether + // timestamps are monotonically increasing. + RtpTimeTicks last_enqueued_rtp_timestamp_; + + // Guards a few members shared by both the main and encode threads. + std::mutex mutex_; + + // Used by the encode thread to sleep until more work is available. + std::condition_variable cv_ ABSL_GUARDED_BY(mutex_); + + // These encode parameters not passed in the WorkUnit struct because it is + // desirable for them to be applied as soon as possible, with the very next + // WorkUnit popped from the |encode_queue_| on the encode thread, and not to + // wait until some later WorkUnit is processed. + bool needs_key_frame_ ABSL_GUARDED_BY(mutex_) = true; + int target_bitrate_ ABSL_GUARDED_BY(mutex_) = 2 << 20; // Default: 2 Mbps. + + // The queue of frame encodes. The size of this queue is implicitly bounded by + // EncodeAndSend(), where it checks for the total in-flight media duration and + // maybe drops a frame. + std::queue<WorkUnit> encode_queue_ ABSL_GUARDED_BY(mutex_); + + // Current AV1 encoder configuration. Most of the fields are unchanging, and + // are populated in the ctor; but thereafter, only the encode thread accesses + // this struct. + // + // The speed setting is controlled via a separate libaom API (see members + // below). + aom_codec_enc_cfg_t config_{}; + + // libaom AV1 encoder instance. Only the encode thread accesses this. + aom_codec_ctx_t encoder_; +}; + +} // namespace cast +} // namespace openscreen + +#endif // CAST_STANDALONE_SENDER_STREAMING_AV1_ENCODER_H_ diff --git a/cast/standalone_sender/streaming_encoder_util.cc b/cast/standalone_sender/streaming_encoder_util.cc new file mode 100644 index 00000000..9ead2bd9 --- /dev/null +++ b/cast/standalone_sender/streaming_encoder_util.cc @@ -0,0 +1,30 @@ +// Copyright 2021 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "cast/standalone_sender/streaming_encoder_util.h" + +#include <string.h> + +#include <algorithm> + +namespace openscreen { +namespace cast { +void CopyPlane(const uint8_t* src, + int src_stride, + int num_rows, + uint8_t* dst, + int dst_stride) { + if (src_stride == dst_stride) { + memcpy(dst, src, src_stride * num_rows); + return; + } + const int bytes_per_row = std::min(src_stride, dst_stride); + while (--num_rows >= 0) { + memcpy(dst, src, bytes_per_row); + dst += dst_stride; + src += src_stride; + } +} +} // namespace cast +} // namespace openscreen diff --git a/cast/standalone_sender/streaming_encoder_util.h b/cast/standalone_sender/streaming_encoder_util.h new file mode 100644 index 00000000..d4d00b42 --- /dev/null +++ b/cast/standalone_sender/streaming_encoder_util.h @@ -0,0 +1,20 @@ +// Copyright 2021 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef CAST_STANDALONE_SENDER_STREAMING_ENCODER_UTIL_H_ +#define CAST_STANDALONE_SENDER_STREAMING_ENCODER_UTIL_H_ + +#include <stdint.h> + +namespace openscreen { +namespace cast { +void CopyPlane(const uint8_t* src, + int src_stride, + int num_rows, + uint8_t* dst, + int dst_stride); +} // namespace cast +} // namespace openscreen + +#endif // CAST_STANDALONE_SENDER_STREAMING_ENCODER_UTIL_H_ diff --git a/cast/standalone_sender/streaming_video_encoder.cc b/cast/standalone_sender/streaming_video_encoder.cc new file mode 100644 index 00000000..0e15ab2c --- /dev/null +++ b/cast/standalone_sender/streaming_video_encoder.cc @@ -0,0 +1,57 @@ +// Copyright 2021 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "cast/standalone_sender/streaming_video_encoder.h" + +#include "util/chrono_helpers.h" + +namespace openscreen { +namespace cast { + +StreamingVideoEncoder::StreamingVideoEncoder(const Parameters& params, + TaskRunner* task_runner, + Sender* sender) + : params_(params), main_task_runner_(task_runner), sender_(sender) { + OSP_DCHECK_LE(1, params_.num_encode_threads); + OSP_DCHECK_LE(kMinQuantizer, params_.min_quantizer); + OSP_DCHECK_LE(params_.min_quantizer, params_.max_cpu_saver_quantizer); + OSP_DCHECK_LE(params_.max_cpu_saver_quantizer, params_.max_quantizer); + OSP_DCHECK_LE(params_.max_quantizer, kMaxQuantizer); + OSP_DCHECK_LT(0.0, params_.max_time_utilization); + OSP_DCHECK_LE(params_.max_time_utilization, 1.0); + OSP_DCHECK(main_task_runner_); + OSP_DCHECK(sender_); +} + +StreamingVideoEncoder::~StreamingVideoEncoder() {} + +void StreamingVideoEncoder::UpdateSpeedSettingForNextFrame(const Stats& stats) { + OSP_DCHECK_EQ(std::this_thread::get_id(), encode_thread_.get_id()); + + // Combine the speed setting that was used to encode the last frame, and the + // quantizer the encoder chose into a single speed metric. + const double speed = current_speed_setting_ + + kEquivalentEncodingSpeedStepPerQuantizerStep * + std::max(0, stats.quantizer - params_.min_quantizer); + + // Like |Stats::perfect_quantizer|, this computes a "hindsight" speed setting + // for the last frame, one that may have potentially allowed for a + // better-quality quantizer choice by the encoder, while also keeping CPU + // utilization within budget. + const double perfect_speed = + speed * stats.time_utilization() / params_.max_time_utilization; + + // Update the ideal speed setting, to be used for the next frame. An + // exponentially-decaying weighted average is used here to smooth-out noise. + // The weight is based on the duration of the frame that was encoded. + constexpr Clock::duration kDecayHalfLife = milliseconds(120); + const double ticks = stats.frame_duration.count(); + const double weight = ticks / (ticks + kDecayHalfLife.count()); + ideal_speed_setting_ = + weight * perfect_speed + (1.0 - weight) * ideal_speed_setting_; + OSP_DCHECK(std::isfinite(ideal_speed_setting_)); +} + +} // namespace cast +} // namespace openscreen diff --git a/cast/standalone_sender/streaming_video_encoder.h b/cast/standalone_sender/streaming_video_encoder.h new file mode 100644 index 00000000..52fae9cc --- /dev/null +++ b/cast/standalone_sender/streaming_video_encoder.h @@ -0,0 +1,194 @@ +// Copyright 2021 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef CAST_STANDALONE_SENDER_STREAMING_VIDEO_ENCODER_H_ +#define CAST_STANDALONE_SENDER_STREAMING_VIDEO_ENCODER_H_ + +#include <algorithm> +#include <condition_variable> // NOLINT +#include <functional> +#include <memory> +#include <mutex> +#include <queue> +#include <thread> +#include <vector> + +#include "absl/base/thread_annotations.h" +#include "cast/streaming/constants.h" +#include "cast/streaming/frame_id.h" +#include "cast/streaming/rtp_time.h" +#include "platform/api/task_runner.h" +#include "platform/api/time.h" + +namespace openscreen { + +class TaskRunner; + +namespace cast { + +class Sender; + +class StreamingVideoEncoder { + public: + // Configurable parameters passed to the StreamingVpxEncoder constructor. + struct Parameters { + // Number of threads to parallelize frame encoding. This should be set based + // on the number of CPU cores available for encoding, but no more than 8. + int num_encode_threads = + std::min(std::max<int>(std::thread::hardware_concurrency(), 1), 8); + + // Best-quality quantizer (lower is better quality). Range: [0,63] + int min_quantizer = 4; + + // Worst-quality quantizer (lower is better quality). Range: [0,63] + int max_quantizer = kMaxQuantizer; + + // Worst-quality quantizer to use when the CPU is extremely constrained. + // Range: [min_quantizer,max_quantizer] + int max_cpu_saver_quantizer = 25; + + // Maximum amount of wall-time a frame's encode can take, relative to the + // frame's duration, before the CPU-saver logic is activated. The default + // (70%) is appropriate for systems with four or more cores, but should be + // reduced (e.g., 50%) for systems with fewer than three cores. + // + // Example: For 30 FPS (continuous) video, the frame duration is ~33.3ms, + // and a value of 0.5 here would mean that the CPU-saver logic starts + // sacrificing quality when frame encodes start taking longer than ~16.7ms. + double max_time_utilization = 0.7; + + // Determines which codec (VP8, VP9, or AV1) is to be used for encoding. + // Defaults to VP8. + VideoCodec codec = VideoCodec::kVp8; + }; + + // Represents an input VideoFrame, passed to EncodeAndSend(). + struct VideoFrame { + // Image width and height. + int width = 0; + int height = 0; + + // I420 format image pointers and row strides (the number of bytes between + // the start of successive rows). The pointers only need to remain valid + // until the EncodeAndSend() call returns. + const uint8_t* yuv_planes[3] = {}; + int yuv_strides[3] = {}; + + // How long this frame will be held before the next frame will be displayed, + // or zero if unknown. The frame duration is passed to the video codec, + // affecting a number of important behaviors, including: per-frame + // bandwidth, CPU time spent encoding, temporal quality trade-offs, and + // key/golden/alt-ref frame generation intervals. + Clock::duration duration; + }; + + // Performance statistics for a single frame's encode. + // + // For full details on how to use these stats in an end-to-end system, see: + // https://www.chromium.org/developers/design-documents/ + // auto-throttled-screen-capture-and-mirroring + // and https://source.chromium.org/chromium/chromium/src/+/master: + // media/cast/sender/performance_metrics_overlay.h + struct Stats { + // The Cast Streaming ID that was assigned to the frame. + FrameId frame_id; + + // The RTP timestamp of the frame. + RtpTimeTicks rtp_timestamp; + + // How long the frame took to encode. This is wall time, not CPU time or + // some other load metric. + Clock::duration encode_wall_time; + + // The frame's predicted duration; or, the actual duration if it was + // provided in the VideoFrame. + Clock::duration frame_duration; + + // The encoded frame's size in bytes. + int encoded_size = 0; + + // The average size of an encoded frame in bytes, having this + // |frame_duration| and current target bitrate. + double target_size = 0.0; + + // The actual quantizer the video encoder used, in the range [0,63]. + int quantizer = 0; + + // The "hindsight" quantizer value that would have produced the best quality + // encoding of the frame at the current target bitrate. The nominal range is + // [0.0,63.0]. If it is larger than 63.0, then it was impossible to + // encode the frame within the current target bitrate (e.g., too much + // "entropy" in the image, or too low a target bitrate). + double perfect_quantizer = 0.0; + + // Utilization feedback metrics. The nominal range for each of these is + // [0.0,1.0] where 1.0 means "the entire budget available for the frame was + // exhausted." Going above 1.0 is okay for one or a few frames, since it's + // the average over many frames that matters before the system is considered + // "redlining." + // + // The max of these three provides an overall utilization control signal. + // The usual approach is for upstream control logic to increase/decrease the + // data volume (e.g., video resolution and/or frame rate) to maintain a good + // target point. + double time_utilization() const { + return static_cast<double>(encode_wall_time.count()) / + frame_duration.count(); + } + double space_utilization() const { return encoded_size / target_size; } + double entropy_utilization() const { + return perfect_quantizer / kMaxQuantizer; + } + }; + + virtual ~StreamingVideoEncoder(); + + // Get/Set the target bitrate. This may be changed at any time, as frequently + // as desired, and it will take effect internally as soon as possible. + virtual int GetTargetBitrate() const = 0; + virtual void SetTargetBitrate(int new_bitrate) = 0; + + // Encode |frame| using the video encoder, assemble an EncodedFrame, and + // enqueue into the Sender. The frame may be dropped if too many frames are + // in-flight. If provided, the |stats_callback| is run after the frame is + // enqueued in the Sender (via the main TaskRunner). + virtual void EncodeAndSend(const VideoFrame& frame, + Clock::time_point reference_time, + std::function<void(Stats)> stats_callback) = 0; + + static constexpr int kMinQuantizer = 0; + static constexpr int kMaxQuantizer = 63; + + protected: + StreamingVideoEncoder(const Parameters& params, + TaskRunner* task_runner, + Sender* sender); + + // This is the equivalent change in encoding speed per one quantizer step. + static constexpr double kEquivalentEncodingSpeedStepPerQuantizerStep = + 1 / 20.0; + + // Updates the |ideal_speed_setting_|, to take effect with the next frame + // encode, based on the given performance |stats|. + void UpdateSpeedSettingForNextFrame(const Stats& stats); + + const Parameters params_; + TaskRunner* const main_task_runner_; + Sender* const sender_; + + // These represent the magnitude of the AV1 speed setting, where larger values + // (i.e., faster speed) request less CPU usage but will provide lower video + // quality. Only the encode thread accesses these. + double ideal_speed_setting_; // A time-weighted average, from measurements. + int current_speed_setting_; // Current |encoder_| speed setting. + + // This member should be last in the class since the thread should not start + // until all above members have been initialized by the constructor. + std::thread encode_thread_; +}; + +} // namespace cast +} // namespace openscreen + +#endif // CAST_STANDALONE_SENDER_STREAMING_VIDEO_ENCODER_H_ diff --git a/cast/standalone_sender/streaming_vpx_encoder.cc b/cast/standalone_sender/streaming_vpx_encoder.cc index 1c9de314..1b10f92b 100644 --- a/cast/standalone_sender/streaming_vpx_encoder.cc +++ b/cast/standalone_sender/streaming_vpx_encoder.cc @@ -4,14 +4,13 @@ #include "cast/standalone_sender/streaming_vpx_encoder.h" -#include <stdint.h> -#include <string.h> #include <vpx/vp8cx.h> #include <chrono> #include <cmath> #include <utility> +#include "cast/standalone_sender/streaming_encoder_util.h" #include "cast/streaming/encoded_frame.h" #include "cast/streaming/environment.h" #include "cast/streaming/sender.h" @@ -22,8 +21,8 @@ namespace openscreen { namespace cast { -// TODO(https://crbug.com/openscreen/123): Fix the declarations and then remove -// this: +// TODO(issuetracker.google.com/issues/155336511): Fix the declarations and then +// remove this: using openscreen::operator<<; // For std::chrono::duration pretty-printing. namespace { @@ -44,28 +43,14 @@ constexpr Clock::duration kMaxFrameDuration = milliseconds(125); constexpr int kHighestEncodingSpeed = 12; constexpr int kLowestEncodingSpeed = 6; -// This is the equivalent change in encoding speed per one quantizer step. -constexpr double kEquivalentEncodingSpeedStepPerQuantizerStep = 1 / 20.0; - } // namespace StreamingVpxEncoder::StreamingVpxEncoder(const Parameters& params, TaskRunner* task_runner, Sender* sender) - : params_(params), - main_task_runner_(task_runner), - sender_(sender), - ideal_speed_setting_(kHighestEncodingSpeed), - encode_thread_([this] { ProcessWorkUnitsUntilTimeToQuit(); }) { - OSP_DCHECK_LE(1, params_.num_encode_threads); - OSP_DCHECK_LE(kMinQuantizer, params_.min_quantizer); - OSP_DCHECK_LE(params_.min_quantizer, params_.max_cpu_saver_quantizer); - OSP_DCHECK_LE(params_.max_cpu_saver_quantizer, params_.max_quantizer); - OSP_DCHECK_LE(params_.max_quantizer, kMaxQuantizer); - OSP_DCHECK_LT(0.0, params_.max_time_utilization); - OSP_DCHECK_LE(params_.max_time_utilization, 1.0); - OSP_DCHECK(main_task_runner_); - OSP_DCHECK(sender_); + : StreamingVideoEncoder(params, task_runner, sender) { + ideal_speed_setting_ = kHighestEncodingSpeed; + encode_thread_ = std::thread([this] { ProcessWorkUnitsUntilTimeToQuit(); }); vpx_codec_iface_t* ctx; if (params_.codec == VideoCodec::kVp9) { @@ -242,9 +227,9 @@ void StreamingVpxEncoder::ProcessWorkUnitsUntilTimeToQuit() { // measured. const Clock::time_point encode_start_time = Clock::now(); PrepareEncoder(work_unit.image->d_w, work_unit.image->d_h, target_bitrate); - EncodeFrame(force_key_frame, &work_unit); + EncodeFrame(force_key_frame, work_unit); ComputeFrameEncodeStats(Clock::now() - encode_start_time, target_bitrate, - &work_unit); + work_unit); UpdateSpeedSettingForNextFrame(work_unit.stats); main_task_runner_->PostTask( @@ -337,7 +322,7 @@ void StreamingVpxEncoder::PrepareEncoder(int width, } void StreamingVpxEncoder::EncodeFrame(bool force_key_frame, - WorkUnitWithResults* work_unit) { + WorkUnitWithResults& work_unit) { OSP_DCHECK_EQ(std::this_thread::get_id(), encode_thread_.get_id()); // The presentation timestamp argument here is fixed to zero to force the @@ -346,8 +331,8 @@ void StreamingVpxEncoder::EncodeFrame(bool force_key_frame, const vpx_codec_pts_t pts = 0; const vpx_enc_frame_flags_t flags = force_key_frame ? VPX_EFLAG_FORCE_KF : 0; const auto encode_result = - vpx_codec_encode(&encoder_, work_unit->image.get(), pts, - work_unit->duration.count(), flags, VPX_DL_REALTIME); + vpx_codec_encode(&encoder_, work_unit.image.get(), pts, + work_unit.duration.count(), flags, VPX_DL_REALTIME); OSP_CHECK_EQ(encode_result, VPX_CODEC_OK); const vpx_codec_cx_pkt_t* pkt; @@ -366,30 +351,30 @@ void StreamingVpxEncoder::EncodeFrame(bool force_key_frame, // be copied at some point anyway, to be passed back to the main thread. auto* const begin = static_cast<const uint8_t*>(pkt->data.frame.buf); auto* const end = begin + pkt->data.frame.sz; - work_unit->payload.assign(begin, end); - work_unit->is_key_frame = !!(pkt->data.frame.flags & VPX_FRAME_IS_KEY); + work_unit.payload.assign(begin, end); + work_unit.is_key_frame = !!(pkt->data.frame.flags & VPX_FRAME_IS_KEY); } void StreamingVpxEncoder::ComputeFrameEncodeStats( Clock::duration encode_wall_time, int target_bitrate, - WorkUnitWithResults* work_unit) { + WorkUnitWithResults& work_unit) { OSP_DCHECK_EQ(std::this_thread::get_id(), encode_thread_.get_id()); - Stats& stats = work_unit->stats; + Stats& stats = work_unit.stats; // Note: stats.frame_id is set later, in SendEncodedFrame(). - stats.rtp_timestamp = work_unit->rtp_timestamp; + stats.rtp_timestamp = work_unit.rtp_timestamp; stats.encode_wall_time = encode_wall_time; - stats.frame_duration = work_unit->duration; - stats.encoded_size = work_unit->payload.size(); + stats.frame_duration = work_unit.duration; + stats.encoded_size = work_unit.payload.size(); constexpr double kBytesPerBit = 1.0 / CHAR_BIT; constexpr double kSecondsPerClockTick = 1.0 / Clock::to_duration(seconds(1)).count(); const double target_bytes_per_clock_tick = target_bitrate * (kBytesPerBit * kSecondsPerClockTick); - stats.target_size = target_bytes_per_clock_tick * work_unit->duration.count(); + stats.target_size = target_bytes_per_clock_tick * work_unit.duration.count(); // The quantizer the encoder used. This is the result of the VP8/9 encoder // taking a guess at what quantizer value would produce an encoded frame size @@ -404,33 +389,6 @@ void StreamingVpxEncoder::ComputeFrameEncodeStats( stats.perfect_quantizer = stats.quantizer * stats.space_utilization(); } -void StreamingVpxEncoder::UpdateSpeedSettingForNextFrame(const Stats& stats) { - OSP_DCHECK_EQ(std::this_thread::get_id(), encode_thread_.get_id()); - - // Combine the speed setting that was used to encode the last frame, and the - // quantizer the encoder chose into a single speed metric. - const double speed = current_speed_setting_ + - kEquivalentEncodingSpeedStepPerQuantizerStep * - std::max(0, stats.quantizer - params_.min_quantizer); - - // Like |Stats::perfect_quantizer|, this computes a "hindsight" speed setting - // for the last frame, one that may have potentially allowed for a - // better-quality quantizer choice by the encoder, while also keeping CPU - // utilization within budget. - const double perfect_speed = - speed * stats.time_utilization() / params_.max_time_utilization; - - // Update the ideal speed setting, to be used for the next frame. An - // exponentially-decaying weighted average is used here to smooth-out noise. - // The weight is based on the duration of the frame that was encoded. - constexpr Clock::duration kDecayHalfLife = milliseconds(120); - const double ticks = stats.frame_duration.count(); - const double weight = ticks / (ticks + kDecayHalfLife.count()); - ideal_speed_setting_ = - weight * perfect_speed + (1.0 - weight) * ideal_speed_setting_; - OSP_DCHECK(std::isfinite(ideal_speed_setting_)); -} - void StreamingVpxEncoder::SendEncodedFrame(WorkUnitWithResults results) { OSP_DCHECK(main_task_runner_->IsRunningOnTaskRunner()); @@ -460,25 +418,6 @@ void StreamingVpxEncoder::SendEncodedFrame(WorkUnitWithResults results) { } } -namespace { -void CopyPlane(const uint8_t* src, - int src_stride, - int num_rows, - uint8_t* dst, - int dst_stride) { - if (src_stride == dst_stride) { - memcpy(dst, src, src_stride * num_rows); - return; - } - const int bytes_per_row = std::min(src_stride, dst_stride); - while (--num_rows >= 0) { - memcpy(dst, src, bytes_per_row); - dst += dst_stride; - src += src_stride; - } -} -} // namespace - // static StreamingVpxEncoder::VpxImageUniquePtr StreamingVpxEncoder::CloneAsVpxImage( const VideoFrame& frame) { diff --git a/cast/standalone_sender/streaming_vpx_encoder.h b/cast/standalone_sender/streaming_vpx_encoder.h index 6935efdd..5c99309e 100644 --- a/cast/standalone_sender/streaming_vpx_encoder.h +++ b/cast/standalone_sender/streaming_vpx_encoder.h @@ -18,6 +18,7 @@ #include <vector> #include "absl/base/thread_annotations.h" +#include "cast/standalone_sender/streaming_video_encoder.h" #include "cast/streaming/constants.h" #include "cast/streaming/frame_id.h" #include "cast/streaming/rtp_time.h" @@ -53,140 +54,19 @@ class Sender; // to further optimize the user experience. For example, the stats can be used // as a signal to reduce the data volume (i.e., resolution and/or frame rate) // coming from the video capture source. -class StreamingVpxEncoder { +class StreamingVpxEncoder : public StreamingVideoEncoder { public: - // Configurable parameters passed to the StreamingVpxEncoder constructor. - struct Parameters { - // Number of threads to parallelize frame encoding. This should be set based - // on the number of CPU cores available for encoding, but no more than 8. - int num_encode_threads = - std::min(std::max<int>(std::thread::hardware_concurrency(), 1), 8); - - // Best-quality quantizer (lower is better quality). Range: [0,63] - int min_quantizer = 4; - - // Worst-quality quantizer (lower is better quality). Range: [0,63] - int max_quantizer = 63; - - // Worst-quality quantizer to use when the CPU is extremely constrained. - // Range: [min_quantizer,max_quantizer] - int max_cpu_saver_quantizer = 25; - - // Maximum amount of wall-time a frame's encode can take, relative to the - // frame's duration, before the CPU-saver logic is activated. The default - // (70%) is appropriate for systems with four or more cores, but should be - // reduced (e.g., 50%) for systems with fewer than three cores. - // - // Example: For 30 FPS (continuous) video, the frame duration is ~33.3ms, - // and a value of 0.5 here would mean that the CPU-saver logic starts - // sacrificing quality when frame encodes start taking longer than ~16.7ms. - double max_time_utilization = 0.7; - - // Determines which codec (VP8 or VP9) is to be used for encoding. Defaults - // to VP8. - VideoCodec codec = VideoCodec::kVp8; - }; - - // Represents an input VideoFrame, passed to EncodeAndSend(). - struct VideoFrame { - // Image width and height. - int width; - int height; - - // I420 format image pointers and row strides (the number of bytes between - // the start of successive rows). The pointers only need to remain valid - // until the EncodeAndSend() call returns. - const uint8_t* yuv_planes[3]; - int yuv_strides[3]; - - // How long this frame will be held before the next frame will be displayed, - // or zero if unknown. The frame duration is passed to the VP8/9 codec, - // affecting a number of important behaviors, including: per-frame - // bandwidth, CPU time spent encoding, temporal quality trade-offs, and - // key/golden/alt-ref frame generation intervals. - Clock::duration duration; - }; - - // Performance statistics for a single frame's encode. - // - // For full details on how to use these stats in an end-to-end system, see: - // https://www.chromium.org/developers/design-documents/ - // auto-throttled-screen-capture-and-mirroring - // and https://source.chromium.org/chromium/chromium/src/+/master: - // media/cast/sender/performance_metrics_overlay.h - struct Stats { - // The Cast Streaming ID that was assigned to the frame. - FrameId frame_id; - - // The RTP timestamp of the frame. - RtpTimeTicks rtp_timestamp; - - // How long the frame took to encode. This is wall time, not CPU time or - // some other load metric. - Clock::duration encode_wall_time; - - // The frame's predicted duration; or, the actual duration if it was - // provided in the VideoFrame. - Clock::duration frame_duration; - - // The encoded frame's size in bytes. - int encoded_size; - - // The average size of an encoded frame in bytes, having this - // |frame_duration| and current target bitrate. - double target_size; - - // The actual quantizer the VP8 encoder used, in the range [0,63]. - int quantizer; - - // The "hindsight" quantizer value that would have produced the best quality - // encoding of the frame at the current target bitrate. The nominal range is - // [0.0,63.0]. If it is larger than 63.0, then it was impossible for VP8 to - // encode the frame within the current target bitrate (e.g., too much - // "entropy" in the image, or too low a target bitrate). - double perfect_quantizer; - - // Utilization feedback metrics. The nominal range for each of these is - // [0.0,1.0] where 1.0 means "the entire budget available for the frame was - // exhausted." Going above 1.0 is okay for one or a few frames, since it's - // the average over many frames that matters before the system is considered - // "redlining." - // - // The max of these three provides an overall utilization control signal. - // The usual approach is for upstream control logic to increase/decrease the - // data volume (e.g., video resolution and/or frame rate) to maintain a good - // target point. - double time_utilization() const { - return static_cast<double>(encode_wall_time.count()) / - frame_duration.count(); - } - double space_utilization() const { return encoded_size / target_size; } - double entropy_utilization() const { - return perfect_quantizer / kMaxQuantizer; - } - }; - StreamingVpxEncoder(const Parameters& params, TaskRunner* task_runner, Sender* sender); ~StreamingVpxEncoder(); - // Get/Set the target bitrate. This may be changed at any time, as frequently - // as desired, and it will take effect internally as soon as possible. - int GetTargetBitrate() const; - void SetTargetBitrate(int new_bitrate); - - // Encode |frame| using the VP8 encoder, assemble an EncodedFrame, and enqueue - // into the Sender. The frame may be dropped if too many frames are in-flight. - // If provided, the |stats_callback| is run after the frame is enqueued in the - // Sender (via the main TaskRunner). + int GetTargetBitrate() const override; + void SetTargetBitrate(int new_bitrate) override; void EncodeAndSend(const VideoFrame& frame, Clock::time_point reference_time, - std::function<void(Stats)> stats_callback); - - static constexpr int kMinQuantizer = 0; - static constexpr int kMaxQuantizer = 63; + std::function<void(Stats)> stats_callback) override; private: // Syntactic convenience to wrap the vpx_image_t alloc/free API in a smart @@ -209,7 +89,7 @@ class StreamingVpxEncoder { // Same as WorkUnit, but with additional fields to carry the encode results. struct WorkUnitWithResults : public WorkUnit { std::vector<uint8_t> payload; - bool is_key_frame; + bool is_key_frame = false; Stats stats; }; @@ -224,24 +104,20 @@ class StreamingVpxEncoder { void ProcessWorkUnitsUntilTimeToQuit(); // If the |encoder_| is live, attempt reconfiguration to allow it to encode - // frames at a new frame size, target bitrate, or "CPU encoding speed." If - // reconfiguration is not possible, destroy the existing instance and - // re-create a new |encoder_| instance. + // frames at a new frame size or target bitrate. If reconfiguration is not + // possible, destroy the existing instance and re-create a new |encoder_| + // instance. void PrepareEncoder(int width, int height, int target_bitrate); // Wraps the complex libvpx vpx_codec_encode() call using inputs from // |work_unit| and populating results there. - void EncodeFrame(bool force_key_frame, WorkUnitWithResults* work_unit); + void EncodeFrame(bool force_key_frame, WorkUnitWithResults& work_unit); // Computes and populates |work_unit.stats| after the last call to // EncodeFrame(). void ComputeFrameEncodeStats(Clock::duration encode_wall_time, int target_bitrate, - WorkUnitWithResults* work_unit); - - // Updates the |ideal_speed_setting_|, to take effect with the next frame - // encode, based on the given performance |stats|. - void UpdateSpeedSettingForNextFrame(const Stats& stats); + WorkUnitWithResults& work_unit); // Assembles and enqueues an EncodedFrame with the Sender on the main thread. void SendEncodedFrame(WorkUnitWithResults results); @@ -249,10 +125,6 @@ class StreamingVpxEncoder { // Allocates a vpx_image_t and copies the content from |frame| to it. static VpxImageUniquePtr CloneAsVpxImage(const VideoFrame& frame); - const Parameters params_; - TaskRunner* const main_task_runner_; - Sender* const sender_; - // The reference time of the first frame passed to EncodeAndSend(). Clock::time_point start_time_ = Clock::time_point::min(); @@ -287,18 +159,8 @@ class StreamingVpxEncoder { // below). vpx_codec_enc_cfg_t config_{}; - // These represent the magnitude of the VP8 speed setting, where larger values - // (i.e., faster speed) request less CPU usage but will provide lower video - // quality. Only the encode thread accesses these. - double ideal_speed_setting_; // A time-weighted average, from measurements. - int current_speed_setting_; // Current |encoder_| speed setting. - // libvpx VP8/9 encoder instance. Only the encode thread accesses this. vpx_codec_ctx_t encoder_; - - // This member should be last in the class since the thread should not start - // until all above members have been initialized by the constructor. - std::thread encode_thread_; }; } // namespace cast |