diff options
Diffstat (limited to 'webrtc/modules/video_coding/qm_select.h')
-rw-r--r-- | webrtc/modules/video_coding/qm_select.h | 356 |
1 files changed, 356 insertions, 0 deletions
diff --git a/webrtc/modules/video_coding/qm_select.h b/webrtc/modules/video_coding/qm_select.h new file mode 100644 index 0000000000..764b5ed8e3 --- /dev/null +++ b/webrtc/modules/video_coding/qm_select.h @@ -0,0 +1,356 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_VIDEO_CODING_QM_SELECT_H_ +#define WEBRTC_MODULES_VIDEO_CODING_QM_SELECT_H_ + +#include "webrtc/common_types.h" +#include "webrtc/typedefs.h" + +/******************************************************/ +/* Quality Modes: Resolution and Robustness settings */ +/******************************************************/ + +namespace webrtc { +struct VideoContentMetrics; + +struct VCMResolutionScale { + VCMResolutionScale() + : codec_width(640), + codec_height(480), + frame_rate(30.0f), + spatial_width_fact(1.0f), + spatial_height_fact(1.0f), + temporal_fact(1.0f), + change_resolution_spatial(false), + change_resolution_temporal(false) {} + uint16_t codec_width; + uint16_t codec_height; + float frame_rate; + float spatial_width_fact; + float spatial_height_fact; + float temporal_fact; + bool change_resolution_spatial; + bool change_resolution_temporal; +}; + +enum ImageType { + kQCIF = 0, // 176x144 + kHCIF, // 264x216 = half(~3/4x3/4) CIF. + kQVGA, // 320x240 = quarter VGA. + kCIF, // 352x288 + kHVGA, // 480x360 = half(~3/4x3/4) VGA. + kVGA, // 640x480 + kQFULLHD, // 960x540 = quarter FULLHD, and half(~3/4x3/4) WHD. + kWHD, // 1280x720 + kFULLHD, // 1920x1080 + kNumImageTypes +}; + +const uint32_t kSizeOfImageType[kNumImageTypes] = { + 25344, 57024, 76800, 101376, 172800, 307200, 518400, 921600, 2073600}; + +enum FrameRateLevelClass { + kFrameRateLow, + kFrameRateMiddle1, + kFrameRateMiddle2, + kFrameRateHigh +}; + +enum ContentLevelClass { kLow, kHigh, kDefault }; + +struct VCMContFeature { + VCMContFeature() : value(0.0f), level(kDefault) {} + void Reset() { + value = 0.0f; + level = kDefault; + } + float value; + ContentLevelClass level; +}; + +enum UpDownAction { kUpResolution, kDownResolution }; + +enum SpatialAction { + kNoChangeSpatial, + kOneHalfSpatialUniform, // 3/4 x 3/4: 9/6 ~1/2 pixel reduction. + kOneQuarterSpatialUniform, // 1/2 x 1/2: 1/4 pixel reduction. + kNumModesSpatial +}; + +enum TemporalAction { + kNoChangeTemporal, + kTwoThirdsTemporal, // 2/3 frame rate reduction + kOneHalfTemporal, // 1/2 frame rate reduction + kNumModesTemporal +}; + +struct ResolutionAction { + ResolutionAction() : spatial(kNoChangeSpatial), temporal(kNoChangeTemporal) {} + SpatialAction spatial; + TemporalAction temporal; +}; + +// Down-sampling factors for spatial (width and height), and temporal. +const float kFactorWidthSpatial[kNumModesSpatial] = {1.0f, 4.0f / 3.0f, 2.0f}; + +const float kFactorHeightSpatial[kNumModesSpatial] = {1.0f, 4.0f / 3.0f, 2.0f}; + +const float kFactorTemporal[kNumModesTemporal] = {1.0f, 1.5f, 2.0f}; + +enum EncoderState { + kStableEncoding, // Low rate mis-match, stable buffer levels. + kStressedEncoding, // Significant over-shooting of target rate, + // Buffer under-flow, etc. + kEasyEncoding // Significant under-shooting of target rate. +}; + +// QmMethod class: main class for resolution and robustness settings + +class VCMQmMethod { + public: + VCMQmMethod(); + virtual ~VCMQmMethod(); + + // Reset values + void ResetQM(); + virtual void Reset() = 0; + + // Compute content class. + uint8_t ComputeContentClass(); + + // Update with the content metrics. + void UpdateContent(const VideoContentMetrics* content_metrics); + + // Compute spatial texture magnitude and level. + // Spatial texture is a spatial prediction error measure. + void ComputeSpatial(); + + // Compute motion magnitude and level for NFD metric. + // NFD is normalized frame difference (normalized by spatial variance). + void ComputeMotionNFD(); + + // Get the imageType (CIF, VGA, HD, etc) for the system width/height. + ImageType GetImageType(uint16_t width, uint16_t height); + + // Return the closest image type. + ImageType FindClosestImageType(uint16_t width, uint16_t height); + + // Get the frame rate level. + FrameRateLevelClass FrameRateLevel(float frame_rate); + + protected: + // Content Data. + const VideoContentMetrics* content_metrics_; + + // Encoder frame sizes and native frame sizes. + uint16_t width_; + uint16_t height_; + float user_frame_rate_; + uint16_t native_width_; + uint16_t native_height_; + float native_frame_rate_; + float aspect_ratio_; + // Image type and frame rate leve, for the current encoder resolution. + ImageType image_type_; + FrameRateLevelClass framerate_level_; + // Content class data. + VCMContFeature motion_; + VCMContFeature spatial_; + uint8_t content_class_; + bool init_; +}; + +// Resolution settings class + +class VCMQmResolution : public VCMQmMethod { + public: + VCMQmResolution(); + virtual ~VCMQmResolution(); + + // Reset all quantities. + virtual void Reset(); + + // Reset rate quantities and counters after every SelectResolution() call. + void ResetRates(); + + // Reset down-sampling state. + void ResetDownSamplingState(); + + // Get the encoder state. + EncoderState GetEncoderState(); + + // Initialize after SetEncodingData in media_opt. + int Initialize(float bitrate, + float user_framerate, + uint16_t width, + uint16_t height, + int num_layers); + + // Update the encoder frame size. + void UpdateCodecParameters(float frame_rate, uint16_t width, uint16_t height); + + // Update with actual bit rate (size of the latest encoded frame) + // and frame type, after every encoded frame. + void UpdateEncodedSize(size_t encoded_size); + + // Update with new target bitrate, actual encoder sent rate, frame_rate, + // loss rate: every ~1 sec from SetTargetRates in media_opt. + void UpdateRates(float target_bitrate, + float encoder_sent_rate, + float incoming_framerate, + uint8_t packet_loss); + + // Extract ST (spatio-temporal) resolution action. + // Inputs: qm: Reference to the quality modes pointer. + // Output: the spatial and/or temporal scale change. + int SelectResolution(VCMResolutionScale** qm); + + private: + // Set the default resolution action. + void SetDefaultAction(); + + // Compute rates for the selection of down-sampling action. + void ComputeRatesForSelection(); + + // Compute the encoder state. + void ComputeEncoderState(); + + // Return true if the action is to go back up in resolution. + bool GoingUpResolution(); + + // Return true if the action is to go down in resolution. + bool GoingDownResolution(); + + // Check the condition for going up in resolution by the scale factors: + // |facWidth|, |facHeight|, |facTemp|. + // |scaleFac| is a scale factor for the transition rate. + bool ConditionForGoingUp(float fac_width, + float fac_height, + float fac_temp, + float scale_fac); + + // Get the bitrate threshold for the resolution action. + // The case |facWidth|=|facHeight|=|facTemp|==1 is for down-sampling action. + // |scaleFac| is a scale factor for the transition rate. + float GetTransitionRate(float fac_width, + float fac_height, + float fac_temp, + float scale_fac); + + // Update the down-sampling state. + void UpdateDownsamplingState(UpDownAction up_down); + + // Update the codec frame size and frame rate. + void UpdateCodecResolution(); + + // Return a state based on average target rate relative transition rate. + uint8_t RateClass(float transition_rate); + + // Adjust the action selected from the table. + void AdjustAction(); + + // Covert 2 stages of 3/4 (=9/16) spatial decimation to 1/2. + void ConvertSpatialFractionalToWhole(); + + // Returns true if the new frame sizes, under the selected spatial action, + // are of even size. + bool EvenFrameSize(); + + // Insert latest down-sampling action into the history list. + void InsertLatestDownAction(); + + // Remove the last (first element) down-sampling action from the list. + void RemoveLastDownAction(); + + // Check constraints on the amount of down-sampling allowed. + void ConstrainAmountOfDownSampling(); + + // For going up in resolution: pick spatial or temporal action, + // if both actions were separately selected. + void PickSpatialOrTemporal(); + + // Select the directional (1x2 or 2x1) spatial down-sampling action. + void SelectSpatialDirectionMode(float transition_rate); + + enum { kDownActionHistorySize = 10 }; + + VCMResolutionScale* qm_; + // Encoder rate control parameters. + float target_bitrate_; + float incoming_framerate_; + float per_frame_bandwidth_; + float buffer_level_; + + // Data accumulated every ~1sec from MediaOpt. + float sum_target_rate_; + float sum_incoming_framerate_; + float sum_rate_MM_; + float sum_rate_MM_sgn_; + float sum_packet_loss_; + // Counters. + uint32_t frame_cnt_; + uint32_t frame_cnt_delta_; + uint32_t update_rate_cnt_; + uint32_t low_buffer_cnt_; + + // Resolution state parameters. + float state_dec_factor_spatial_; + float state_dec_factor_temporal_; + + // Quantities used for selection. + float avg_target_rate_; + float avg_incoming_framerate_; + float avg_ratio_buffer_low_; + float avg_rate_mismatch_; + float avg_rate_mismatch_sgn_; + float avg_packet_loss_; + EncoderState encoder_state_; + ResolutionAction action_; + // Short history of the down-sampling actions from the Initialize() state. + // This is needed for going up in resolution. Since the total amount of + // down-sampling actions are constrained, the length of the list need not be + // large: i.e., (4/3) ^{kDownActionHistorySize} <= kMaxDownSample. + ResolutionAction down_action_history_[kDownActionHistorySize]; + int num_layers_; +}; + +// Robustness settings class. + +class VCMQmRobustness : public VCMQmMethod { + public: + VCMQmRobustness(); + ~VCMQmRobustness(); + + virtual void Reset(); + + // Adjust FEC rate based on content: every ~1 sec from SetTargetRates. + // Returns an adjustment factor. + float AdjustFecFactor(uint8_t code_rate_delta, + float total_rate, + float framerate, + int64_t rtt_time, + uint8_t packet_loss); + + // Set the UEP protection on/off. + bool SetUepProtection(uint8_t code_rate_delta, + float total_rate, + uint8_t packet_loss, + bool frame_type); + + private: + // Previous state of network parameters. + float prev_total_rate_; + int64_t prev_rtt_time_; + uint8_t prev_packet_loss_; + uint8_t prev_code_rate_delta_; +}; +} // namespace webrtc +#endif // WEBRTC_MODULES_VIDEO_CODING_QM_SELECT_H_ |