aboutsummaryrefslogtreecommitdiff
path: root/cast/standalone_sender/streaming_vpx_encoder.h
diff options
context:
space:
mode:
Diffstat (limited to 'cast/standalone_sender/streaming_vpx_encoder.h')
-rw-r--r--cast/standalone_sender/streaming_vpx_encoder.h307
1 files changed, 307 insertions, 0 deletions
diff --git a/cast/standalone_sender/streaming_vpx_encoder.h b/cast/standalone_sender/streaming_vpx_encoder.h
new file mode 100644
index 00000000..6935efdd
--- /dev/null
+++ b/cast/standalone_sender/streaming_vpx_encoder.h
@@ -0,0 +1,307 @@
+// Copyright 2020 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef CAST_STANDALONE_SENDER_STREAMING_VPX_ENCODER_H_
+#define CAST_STANDALONE_SENDER_STREAMING_VPX_ENCODER_H_
+
+#include <vpx/vpx_encoder.h>
+#include <vpx/vpx_image.h>
+
+#include <algorithm>
+#include <condition_variable> // NOLINT
+#include <functional>
+#include <memory>
+#include <mutex>
+#include <queue>
+#include <thread>
+#include <vector>
+
+#include "absl/base/thread_annotations.h"
+#include "cast/streaming/constants.h"
+#include "cast/streaming/frame_id.h"
+#include "cast/streaming/rtp_time.h"
+#include "platform/api/task_runner.h"
+#include "platform/api/time.h"
+
+namespace openscreen {
+
+class TaskRunner;
+
+namespace cast {
+
+class Sender;
+
+// Uses libvpx to encode VP8/9 video and streams it to a Sender. Includes
+// extensive logic for fine-tuning the encoder parameters in real-time, to
+// provide the best quality results given external, uncontrollable factors:
+// CPU/network availability, and the complexity of the video frame content.
+//
+// Internally, a separate encode thread is created and used to prevent blocking
+// the main thread while frames are being encoded. All public API methods are
+// assumed to be called on the same sequence/thread as the main TaskRunner
+// (injected via the constructor).
+//
+// Usage:
+//
+// 1. EncodeAndSend() is used to queue-up video frames for encoding and sending,
+// which will be done on a best-effort basis.
+//
+// 2. The client is expected to call SetTargetBitrate() frequently based on its
+// own bandwidth estimates and congestion control logic. In addition, a client
+// may provide a callback for each frame's encode statistics, which can be used
+// to further optimize the user experience. For example, the stats can be used
+// as a signal to reduce the data volume (i.e., resolution and/or frame rate)
+// coming from the video capture source.
+class StreamingVpxEncoder {
+ public:
+ // Configurable parameters passed to the StreamingVpxEncoder constructor.
+ struct Parameters {
+ // Number of threads to parallelize frame encoding. This should be set based
+ // on the number of CPU cores available for encoding, but no more than 8.
+ int num_encode_threads =
+ std::min(std::max<int>(std::thread::hardware_concurrency(), 1), 8);
+
+ // Best-quality quantizer (lower is better quality). Range: [0,63]
+ int min_quantizer = 4;
+
+ // Worst-quality quantizer (lower is better quality). Range: [0,63]
+ int max_quantizer = 63;
+
+ // Worst-quality quantizer to use when the CPU is extremely constrained.
+ // Range: [min_quantizer,max_quantizer]
+ int max_cpu_saver_quantizer = 25;
+
+ // Maximum amount of wall-time a frame's encode can take, relative to the
+ // frame's duration, before the CPU-saver logic is activated. The default
+ // (70%) is appropriate for systems with four or more cores, but should be
+ // reduced (e.g., 50%) for systems with fewer than three cores.
+ //
+ // Example: For 30 FPS (continuous) video, the frame duration is ~33.3ms,
+ // and a value of 0.5 here would mean that the CPU-saver logic starts
+ // sacrificing quality when frame encodes start taking longer than ~16.7ms.
+ double max_time_utilization = 0.7;
+
+ // Determines which codec (VP8 or VP9) is to be used for encoding. Defaults
+ // to VP8.
+ VideoCodec codec = VideoCodec::kVp8;
+ };
+
+ // Represents an input VideoFrame, passed to EncodeAndSend().
+ struct VideoFrame {
+ // Image width and height.
+ int width;
+ int height;
+
+ // I420 format image pointers and row strides (the number of bytes between
+ // the start of successive rows). The pointers only need to remain valid
+ // until the EncodeAndSend() call returns.
+ const uint8_t* yuv_planes[3];
+ int yuv_strides[3];
+
+ // How long this frame will be held before the next frame will be displayed,
+ // or zero if unknown. The frame duration is passed to the VP8/9 codec,
+ // affecting a number of important behaviors, including: per-frame
+ // bandwidth, CPU time spent encoding, temporal quality trade-offs, and
+ // key/golden/alt-ref frame generation intervals.
+ Clock::duration duration;
+ };
+
+ // Performance statistics for a single frame's encode.
+ //
+ // For full details on how to use these stats in an end-to-end system, see:
+ // https://www.chromium.org/developers/design-documents/
+ // auto-throttled-screen-capture-and-mirroring
+ // and https://source.chromium.org/chromium/chromium/src/+/master:
+ // media/cast/sender/performance_metrics_overlay.h
+ struct Stats {
+ // The Cast Streaming ID that was assigned to the frame.
+ FrameId frame_id;
+
+ // The RTP timestamp of the frame.
+ RtpTimeTicks rtp_timestamp;
+
+ // How long the frame took to encode. This is wall time, not CPU time or
+ // some other load metric.
+ Clock::duration encode_wall_time;
+
+ // The frame's predicted duration; or, the actual duration if it was
+ // provided in the VideoFrame.
+ Clock::duration frame_duration;
+
+ // The encoded frame's size in bytes.
+ int encoded_size;
+
+ // The average size of an encoded frame in bytes, having this
+ // |frame_duration| and current target bitrate.
+ double target_size;
+
+ // The actual quantizer the VP8 encoder used, in the range [0,63].
+ int quantizer;
+
+ // The "hindsight" quantizer value that would have produced the best quality
+ // encoding of the frame at the current target bitrate. The nominal range is
+ // [0.0,63.0]. If it is larger than 63.0, then it was impossible for VP8 to
+ // encode the frame within the current target bitrate (e.g., too much
+ // "entropy" in the image, or too low a target bitrate).
+ double perfect_quantizer;
+
+ // Utilization feedback metrics. The nominal range for each of these is
+ // [0.0,1.0] where 1.0 means "the entire budget available for the frame was
+ // exhausted." Going above 1.0 is okay for one or a few frames, since it's
+ // the average over many frames that matters before the system is considered
+ // "redlining."
+ //
+ // The max of these three provides an overall utilization control signal.
+ // The usual approach is for upstream control logic to increase/decrease the
+ // data volume (e.g., video resolution and/or frame rate) to maintain a good
+ // target point.
+ double time_utilization() const {
+ return static_cast<double>(encode_wall_time.count()) /
+ frame_duration.count();
+ }
+ double space_utilization() const { return encoded_size / target_size; }
+ double entropy_utilization() const {
+ return perfect_quantizer / kMaxQuantizer;
+ }
+ };
+
+ StreamingVpxEncoder(const Parameters& params,
+ TaskRunner* task_runner,
+ Sender* sender);
+
+ ~StreamingVpxEncoder();
+
+ // Get/Set the target bitrate. This may be changed at any time, as frequently
+ // as desired, and it will take effect internally as soon as possible.
+ int GetTargetBitrate() const;
+ void SetTargetBitrate(int new_bitrate);
+
+ // Encode |frame| using the VP8 encoder, assemble an EncodedFrame, and enqueue
+ // into the Sender. The frame may be dropped if too many frames are in-flight.
+ // If provided, the |stats_callback| is run after the frame is enqueued in the
+ // Sender (via the main TaskRunner).
+ void EncodeAndSend(const VideoFrame& frame,
+ Clock::time_point reference_time,
+ std::function<void(Stats)> stats_callback);
+
+ static constexpr int kMinQuantizer = 0;
+ static constexpr int kMaxQuantizer = 63;
+
+ private:
+ // Syntactic convenience to wrap the vpx_image_t alloc/free API in a smart
+ // pointer.
+ struct VpxImageDeleter {
+ void operator()(vpx_image_t* ptr) const { vpx_img_free(ptr); }
+ };
+ using VpxImageUniquePtr = std::unique_ptr<vpx_image_t, VpxImageDeleter>;
+
+ // Represents the state of one frame encode. This is created in
+ // EncodeAndSend(), and passed to the encode thread via the |encode_queue_|.
+ struct WorkUnit {
+ VpxImageUniquePtr image;
+ Clock::duration duration;
+ Clock::time_point reference_time;
+ RtpTimeTicks rtp_timestamp;
+ std::function<void(Stats)> stats_callback;
+ };
+
+ // Same as WorkUnit, but with additional fields to carry the encode results.
+ struct WorkUnitWithResults : public WorkUnit {
+ std::vector<uint8_t> payload;
+ bool is_key_frame;
+ Stats stats;
+ };
+
+ bool is_encoder_initialized() const { return config_.g_threads != 0; }
+
+ // Destroys the VP8 encoder context if it has been initialized.
+ void DestroyEncoder();
+
+ // The procedure for the |encode_thread_| that loops, processing work units
+ // from the |encode_queue_| by calling Encode() until it's time to end the
+ // thread.
+ void ProcessWorkUnitsUntilTimeToQuit();
+
+ // If the |encoder_| is live, attempt reconfiguration to allow it to encode
+ // frames at a new frame size, target bitrate, or "CPU encoding speed." If
+ // reconfiguration is not possible, destroy the existing instance and
+ // re-create a new |encoder_| instance.
+ void PrepareEncoder(int width, int height, int target_bitrate);
+
+ // Wraps the complex libvpx vpx_codec_encode() call using inputs from
+ // |work_unit| and populating results there.
+ void EncodeFrame(bool force_key_frame, WorkUnitWithResults* work_unit);
+
+ // Computes and populates |work_unit.stats| after the last call to
+ // EncodeFrame().
+ void ComputeFrameEncodeStats(Clock::duration encode_wall_time,
+ int target_bitrate,
+ WorkUnitWithResults* work_unit);
+
+ // Updates the |ideal_speed_setting_|, to take effect with the next frame
+ // encode, based on the given performance |stats|.
+ void UpdateSpeedSettingForNextFrame(const Stats& stats);
+
+ // Assembles and enqueues an EncodedFrame with the Sender on the main thread.
+ void SendEncodedFrame(WorkUnitWithResults results);
+
+ // Allocates a vpx_image_t and copies the content from |frame| to it.
+ static VpxImageUniquePtr CloneAsVpxImage(const VideoFrame& frame);
+
+ const Parameters params_;
+ TaskRunner* const main_task_runner_;
+ Sender* const sender_;
+
+ // The reference time of the first frame passed to EncodeAndSend().
+ Clock::time_point start_time_ = Clock::time_point::min();
+
+ // The RTP timestamp of the last frame that was pushed into the
+ // |encode_queue_| by EncodeAndSend(). This is used to check whether
+ // timestamps are monotonically increasing.
+ RtpTimeTicks last_enqueued_rtp_timestamp_;
+
+ // Guards a few members shared by both the main and encode threads.
+ std::mutex mutex_;
+
+ // Used by the encode thread to sleep until more work is available.
+ std::condition_variable cv_ ABSL_GUARDED_BY(mutex_);
+
+ // These encode parameters not passed in the WorkUnit struct because it is
+ // desirable for them to be applied as soon as possible, with the very next
+ // WorkUnit popped from the |encode_queue_| on the encode thread, and not to
+ // wait until some later WorkUnit is processed.
+ bool needs_key_frame_ ABSL_GUARDED_BY(mutex_) = true;
+ int target_bitrate_ ABSL_GUARDED_BY(mutex_) = 2 << 20; // Default: 2 Mbps.
+
+ // The queue of frame encodes. The size of this queue is implicitly bounded by
+ // EncodeAndSend(), where it checks for the total in-flight media duration and
+ // maybe drops a frame.
+ std::queue<WorkUnit> encode_queue_ ABSL_GUARDED_BY(mutex_);
+
+ // Current VP8 encoder configuration. Most of the fields are unchanging, and
+ // are populated in the ctor; but thereafter, only the encode thread accesses
+ // this struct.
+ //
+ // The speed setting is controlled via a separate libvpx API (see members
+ // below).
+ vpx_codec_enc_cfg_t config_{};
+
+ // These represent the magnitude of the VP8 speed setting, where larger values
+ // (i.e., faster speed) request less CPU usage but will provide lower video
+ // quality. Only the encode thread accesses these.
+ double ideal_speed_setting_; // A time-weighted average, from measurements.
+ int current_speed_setting_; // Current |encoder_| speed setting.
+
+ // libvpx VP8/9 encoder instance. Only the encode thread accesses this.
+ vpx_codec_ctx_t encoder_;
+
+ // This member should be last in the class since the thread should not start
+ // until all above members have been initialized by the constructor.
+ std::thread encode_thread_;
+};
+
+} // namespace cast
+} // namespace openscreen
+
+#endif // CAST_STANDALONE_SENDER_STREAMING_VPX_ENCODER_H_