diff options
author | philipel <philipel@webrtc.org> | 2015-11-10 02:19:14 -0800 |
---|---|---|
committer | Commit bot <commit-bot@chromium.org> | 2015-11-10 10:19:20 +0000 |
commit | 77ccfb4d16c148e61a316746bb5d9705e8b39f4a (patch) | |
tree | e911ac7a1d968d6b84f92d0cd243bfec40c111d5 | |
parent | ce83ae1c19eb3fb8aea84d8e02c2c005115e0440 (diff) | |
download | webrtc-77ccfb4d16c148e61a316746bb5d9705e8b39f4a.tar.gz |
Work on flexible mode and screen sharing.
Implement VP8 style screensharing but with spatial layers.
Implement flexible mode.
Files from other patches:
generic_encoder.cc
layer_filtering_transport.cc
BUG=webrtc:4914
Review URL: https://codereview.webrtc.org/1328113004
Cr-Commit-Position: refs/heads/master@{#10572}
20 files changed, 1114 insertions, 39 deletions
diff --git a/webrtc/modules/include/module_common_types.h b/webrtc/modules/include/module_common_types.h index ea43e4f035..3a63af014f 100644 --- a/webrtc/modules/include/module_common_types.h +++ b/webrtc/modules/include/module_common_types.h @@ -38,6 +38,7 @@ const int16_t kNoTl0PicIdx = -1; const uint8_t kNoTemporalIdx = 0xFF; const uint8_t kNoSpatialIdx = 0xFF; const uint8_t kNoGofIdx = 0xFF; +const uint8_t kNumVp9Buffers = 8; const size_t kMaxVp9RefPics = 3; const size_t kMaxVp9FramesInGof = 0xFF; // 8 bits const size_t kMaxVp9NumberOfSpatialLayers = 8; diff --git a/webrtc/modules/modules.gyp b/webrtc/modules/modules.gyp index fc2945f87f..805cc2babd 100644 --- a/webrtc/modules/modules.gyp +++ b/webrtc/modules/modules.gyp @@ -279,6 +279,7 @@ 'video_coding/codecs/vp8/simulcast_encoder_adapter_unittest.cc', 'video_coding/codecs/vp8/simulcast_unittest.cc', 'video_coding/codecs/vp8/simulcast_unittest.h', + 'video_coding/codecs/vp9/screenshare_layers_unittest.cc', 'video_coding/main/interface/mock/mock_vcm_callbacks.h', 'video_coding/main/source/decoding_state_unittest.cc', 'video_coding/main/source/jitter_buffer_unittest.cc', diff --git a/webrtc/modules/rtp_rtcp/source/rtp_format_vp9.cc b/webrtc/modules/rtp_rtcp/source/rtp_format_vp9.cc index c9d0f3293b..d2f22d5044 100644 --- a/webrtc/modules/rtp_rtcp/source/rtp_format_vp9.cc +++ b/webrtc/modules/rtp_rtcp/source/rtp_format_vp9.cc @@ -725,7 +725,8 @@ bool RtpDepacketizerVp9::Parse(ParsedPayload* parsed_payload, parsed_payload->type.Video.height = vp9->height[0]; } } - parsed_payload->type.Video.isFirstPacket = b_bit && (vp9->spatial_idx == 0); + parsed_payload->type.Video.isFirstPacket = + b_bit && (!l_bit || !vp9->inter_layer_predicted); uint64_t rem_bits = parser.RemainingBitCount(); assert(rem_bits % 8 == 0); diff --git a/webrtc/modules/video_coding/BUILD.gn b/webrtc/modules/video_coding/BUILD.gn index 9e8cd47e53..9a4a528da0 100644 --- a/webrtc/modules/video_coding/BUILD.gn +++ b/webrtc/modules/video_coding/BUILD.gn @@ -212,6 +212,8 @@ source_set("webrtc_vp9") { if (rtc_build_vp9) { sources = [ "codecs/vp9/include/vp9.h", + "codecs/vp9/screenshare_layers.cc", + "codecs/vp9/screenshare_layers.h", "codecs/vp9/vp9_frame_buffer_pool.cc", "codecs/vp9/vp9_frame_buffer_pool.h", "codecs/vp9/vp9_impl.cc", diff --git a/webrtc/modules/video_coding/codecs/interface/video_codec_interface.h b/webrtc/modules/video_coding/codecs/interface/video_codec_interface.h index 5115c4bc9e..1e7de1f16b 100644 --- a/webrtc/modules/video_coding/codecs/interface/video_codec_interface.h +++ b/webrtc/modules/video_coding/codecs/interface/video_codec_interface.h @@ -68,6 +68,10 @@ struct CodecSpecificInfoVP9 { uint16_t width[kMaxVp9NumberOfSpatialLayers]; uint16_t height[kMaxVp9NumberOfSpatialLayers]; GofInfoVP9 gof; + + // Frame reference data. + uint8_t num_ref_pics; + uint8_t p_diff[kMaxVp9RefPics]; }; struct CodecSpecificInfoGeneric { diff --git a/webrtc/modules/video_coding/codecs/vp9/screenshare_layers.cc b/webrtc/modules/video_coding/codecs/vp9/screenshare_layers.cc new file mode 100644 index 0000000000..c7ed78a192 --- /dev/null +++ b/webrtc/modules/video_coding/codecs/vp9/screenshare_layers.cc @@ -0,0 +1,93 @@ +/* Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. +* +* Use of this source code is governed by a BSD-style license +* that can be found in the LICENSE file in the root of the source +* tree. An additional intellectual property rights grant can be found +* in the file PATENTS. All contributing project authors may +* be found in the AUTHORS file in the root of the source tree. +*/ + +#include <algorithm> +#include "webrtc/modules/video_coding/codecs/vp9/screenshare_layers.h" +#include "webrtc/base/checks.h" + +namespace webrtc { + +ScreenshareLayersVP9::ScreenshareLayersVP9(uint8_t num_layers) + : num_layers_(num_layers), + start_layer_(0), + last_timestamp_(0), + timestamp_initialized_(false) { + RTC_DCHECK_GT(num_layers, 0); + RTC_DCHECK_LE(num_layers, kMaxVp9NumberOfSpatialLayers); + memset(bits_used_, 0, sizeof(bits_used_)); + memset(threshold_kbps_, 0, sizeof(threshold_kbps_)); +} + +uint8_t ScreenshareLayersVP9::GetStartLayer() const { + return start_layer_; +} + +void ScreenshareLayersVP9::ConfigureBitrate(int threshold_kbps, + uint8_t layer_id) { + // The upper layer is always the layer we spill frames + // to when the bitrate becomes to high, therefore setting + // a max limit is not allowed. The top layer bitrate is + // never used either so configuring it makes no difference. + RTC_DCHECK_LT(layer_id, num_layers_ - 1); + threshold_kbps_[layer_id] = threshold_kbps; +} + +void ScreenshareLayersVP9::LayerFrameEncoded(unsigned int size_bytes, + uint8_t layer_id) { + RTC_DCHECK_LT(layer_id, num_layers_); + bits_used_[layer_id] += size_bytes * 8; +} + +VP9EncoderImpl::SuperFrameRefSettings +ScreenshareLayersVP9::GetSuperFrameSettings(uint32_t timestamp, + bool is_keyframe) { + VP9EncoderImpl::SuperFrameRefSettings settings; + if (!timestamp_initialized_) { + last_timestamp_ = timestamp; + timestamp_initialized_ = true; + } + float time_diff = (timestamp - last_timestamp_) / 90.f; + float total_bits_used = 0; + float total_threshold_kbps = 0; + start_layer_ = 0; + + // Up to (num_layers - 1) because we only have + // (num_layers - 1) thresholds to check. + for (int layer_id = 0; layer_id < num_layers_ - 1; ++layer_id) { + bits_used_[layer_id] = std::max( + 0.f, bits_used_[layer_id] - time_diff * threshold_kbps_[layer_id]); + total_bits_used += bits_used_[layer_id]; + total_threshold_kbps += threshold_kbps_[layer_id]; + + // If this is a keyframe then there should be no + // references to any previous frames. + if (!is_keyframe) { + settings.layer[layer_id].ref_buf1 = layer_id; + if (total_bits_used > total_threshold_kbps * 1000) + start_layer_ = layer_id + 1; + } + + settings.layer[layer_id].upd_buf = layer_id; + } + // Since the above loop does not iterate over the last layer + // the reference of the last layer has to be set after the loop, + // and if this is a keyframe there should be no references to + // any previous frames. + if (!is_keyframe) + settings.layer[num_layers_ - 1].ref_buf1 = num_layers_ - 1; + + settings.layer[num_layers_ - 1].upd_buf = num_layers_ - 1; + settings.is_keyframe = is_keyframe; + settings.start_layer = start_layer_; + settings.stop_layer = num_layers_ - 1; + last_timestamp_ = timestamp; + return settings; +} + +} // namespace webrtc diff --git a/webrtc/modules/video_coding/codecs/vp9/screenshare_layers.h b/webrtc/modules/video_coding/codecs/vp9/screenshare_layers.h new file mode 100644 index 0000000000..5a901ae359 --- /dev/null +++ b/webrtc/modules/video_coding/codecs/vp9/screenshare_layers.h @@ -0,0 +1,66 @@ +/* Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. +* +* Use of this source code is governed by a BSD-style license +* that can be found in the LICENSE file in the root of the source +* tree. An additional intellectual property rights grant can be found +* in the file PATENTS. All contributing project authors may +* be found in the AUTHORS file in the root of the source tree. +*/ + +#ifndef WEBRTC_MODULES_VIDEO_CODING_CODECS_VP9_SCREENSHARE_LAYERS_H_ +#define WEBRTC_MODULES_VIDEO_CODING_CODECS_VP9_SCREENSHARE_LAYERS_H_ + +#include "webrtc/modules/video_coding/codecs/vp9/vp9_impl.h" + +namespace webrtc { + +class ScreenshareLayersVP9 { + public: + explicit ScreenshareLayersVP9(uint8_t num_layers); + + // The target bitrate for layer with id layer_id. + void ConfigureBitrate(int threshold_kbps, uint8_t layer_id); + + // The current start layer. + uint8_t GetStartLayer() const; + + // Update the layer with the size of the layer frame. + void LayerFrameEncoded(unsigned int size_bytes, uint8_t layer_id); + + // Get the layer settings for the next superframe. + // + // In short, each time the GetSuperFrameSettings is called the + // bitrate of every layer is calculated and if the cummulative + // bitrate exceeds the configured cummulative bitrates + // (ConfigureBitrate to configure) up to and including that + // layer then the resulting encoding settings for the + // superframe will only encode layers above that layer. + VP9EncoderImpl::SuperFrameRefSettings GetSuperFrameSettings( + uint32_t timestamp, + bool is_keyframe); + + private: + // How many layers that are used. + uint8_t num_layers_; + + // The index of the first layer to encode. + uint8_t start_layer_; + + // Cummulative target kbps for the different layers. + float threshold_kbps_[kMaxVp9NumberOfSpatialLayers - 1]; + + // How many bits that has been used for a certain layer. Increased in + // FrameEncoded() by the size of the encoded frame and decreased in + // GetSuperFrameSettings() depending on the time between frames. + float bits_used_[kMaxVp9NumberOfSpatialLayers]; + + // Timestamp of last frame. + uint32_t last_timestamp_; + + // If the last_timestamp_ has been set. + bool timestamp_initialized_; +}; + +} // namespace webrtc + +#endif // WEBRTC_MODULES_VIDEO_CODING_CODECS_VP9_SCREENSHARE_LAYERS_H_ diff --git a/webrtc/modules/video_coding/codecs/vp9/screenshare_layers_unittest.cc b/webrtc/modules/video_coding/codecs/vp9/screenshare_layers_unittest.cc new file mode 100644 index 0000000000..5eb7b237ac --- /dev/null +++ b/webrtc/modules/video_coding/codecs/vp9/screenshare_layers_unittest.cc @@ -0,0 +1,323 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <limits> + +#include "testing/gtest/include/gtest/gtest.h" +#include "vpx/vp8cx.h" +#include "webrtc/base/logging.h" +#include "webrtc/modules/video_coding/codecs/vp9/screenshare_layers.h" +#include "webrtc/modules/video_coding/codecs/vp9/vp9_impl.h" +#include "webrtc/system_wrappers/include/clock.h" + +namespace webrtc { + +typedef VP9EncoderImpl::SuperFrameRefSettings Settings; + +const uint32_t kTickFrequency = 90000; + +class ScreenshareLayerTestVP9 : public ::testing::Test { + protected: + ScreenshareLayerTestVP9() : clock_(0) {} + virtual ~ScreenshareLayerTestVP9() {} + + void InitScreenshareLayers(int layers) { + layers_.reset(new ScreenshareLayersVP9(layers)); + } + + void ConfigureBitrateForLayer(int kbps, uint8_t layer_id) { + layers_->ConfigureBitrate(kbps, layer_id); + } + + void AdvanceTime(int64_t milliseconds) { + clock_.AdvanceTimeMilliseconds(milliseconds); + } + + void AddKilobitsToLayer(int kilobits, uint8_t layer_id) { + layers_->LayerFrameEncoded(kilobits * 1000 / 8, layer_id); + } + + void EqualRefsForLayer(const Settings& actual, uint8_t layer_id) { + EXPECT_EQ(expected_.layer[layer_id].upd_buf, + actual.layer[layer_id].upd_buf); + EXPECT_EQ(expected_.layer[layer_id].ref_buf1, + actual.layer[layer_id].ref_buf1); + EXPECT_EQ(expected_.layer[layer_id].ref_buf2, + actual.layer[layer_id].ref_buf2); + EXPECT_EQ(expected_.layer[layer_id].ref_buf3, + actual.layer[layer_id].ref_buf3); + } + + void EqualRefs(const Settings& actual) { + for (unsigned int layer_id = 0; layer_id < kMaxVp9NumberOfSpatialLayers; + ++layer_id) { + EqualRefsForLayer(actual, layer_id); + } + } + + void EqualStartStopKeyframe(const Settings& actual) { + EXPECT_EQ(expected_.start_layer, actual.start_layer); + EXPECT_EQ(expected_.stop_layer, actual.stop_layer); + EXPECT_EQ(expected_.is_keyframe, actual.is_keyframe); + } + + // Check that the settings returned by GetSuperFrameSettings() is + // equal to the expected_ settings. + void EqualToExpected() { + uint32_t frame_timestamp_ = + clock_.TimeInMilliseconds() * (kTickFrequency / 1000); + Settings actual = + layers_->GetSuperFrameSettings(frame_timestamp_, expected_.is_keyframe); + EqualRefs(actual); + EqualStartStopKeyframe(actual); + } + + Settings expected_; + SimulatedClock clock_; + rtc::scoped_ptr<ScreenshareLayersVP9> layers_; +}; + +TEST_F(ScreenshareLayerTestVP9, NoRefsOnKeyFrame) { + const int kNumLayers = kMaxVp9NumberOfSpatialLayers; + InitScreenshareLayers(kNumLayers); + expected_.start_layer = 0; + expected_.stop_layer = kNumLayers - 1; + + for (int l = 0; l < kNumLayers; ++l) { + expected_.layer[l].upd_buf = l; + } + expected_.is_keyframe = true; + EqualToExpected(); + + for (int l = 0; l < kNumLayers; ++l) { + expected_.layer[l].ref_buf1 = l; + } + expected_.is_keyframe = false; + EqualToExpected(); +} + +// Test if it is possible to send at a high bitrate (over the threshold) +// after a longer period of low bitrate. This should not be possible. +TEST_F(ScreenshareLayerTestVP9, DontAccumelateAvailableBitsOverTime) { + InitScreenshareLayers(2); + ConfigureBitrateForLayer(100, 0); + + expected_.layer[0].upd_buf = 0; + expected_.layer[0].ref_buf1 = 0; + expected_.layer[1].upd_buf = 1; + expected_.layer[1].ref_buf1 = 1; + expected_.start_layer = 0; + expected_.stop_layer = 1; + + // Send 10 frames at a low bitrate (50 kbps) + for (int i = 0; i < 10; ++i) { + AdvanceTime(200); + EqualToExpected(); + AddKilobitsToLayer(10, 0); + } + + AdvanceTime(200); + EqualToExpected(); + AddKilobitsToLayer(301, 0); + + // Send 10 frames at a high bitrate (200 kbps) + expected_.start_layer = 1; + for (int i = 0; i < 10; ++i) { + AdvanceTime(200); + EqualToExpected(); + AddKilobitsToLayer(40, 1); + } +} + +// Test if used bits are accumelated over layers, as they should; +TEST_F(ScreenshareLayerTestVP9, AccumelateUsedBitsOverLayers) { + const int kNumLayers = kMaxVp9NumberOfSpatialLayers; + InitScreenshareLayers(kNumLayers); + for (int l = 0; l < kNumLayers - 1; ++l) + ConfigureBitrateForLayer(100, l); + for (int l = 0; l < kNumLayers; ++l) { + expected_.layer[l].upd_buf = l; + expected_.layer[l].ref_buf1 = l; + } + + expected_.start_layer = 0; + expected_.stop_layer = kNumLayers - 1; + EqualToExpected(); + + for (int layer = 0; layer < kNumLayers - 1; ++layer) { + expected_.start_layer = layer; + EqualToExpected(); + AddKilobitsToLayer(101, layer); + } +} + +// General testing of the bitrate controller. +TEST_F(ScreenshareLayerTestVP9, 2LayerBitrate) { + InitScreenshareLayers(2); + ConfigureBitrateForLayer(100, 0); + + expected_.layer[0].upd_buf = 0; + expected_.layer[1].upd_buf = 1; + expected_.layer[0].ref_buf1 = -1; + expected_.layer[1].ref_buf1 = -1; + expected_.start_layer = 0; + expected_.stop_layer = 1; + + expected_.is_keyframe = true; + EqualToExpected(); + AddKilobitsToLayer(100, 0); + + expected_.layer[0].ref_buf1 = 0; + expected_.layer[1].ref_buf1 = 1; + expected_.is_keyframe = false; + AdvanceTime(199); + EqualToExpected(); + AddKilobitsToLayer(100, 0); + + expected_.start_layer = 1; + for (int frame = 0; frame < 3; ++frame) { + AdvanceTime(200); + EqualToExpected(); + AddKilobitsToLayer(100, 1); + } + + // Just before enough bits become available for L0 @0.999 seconds. + AdvanceTime(199); + EqualToExpected(); + AddKilobitsToLayer(100, 1); + + // Just after enough bits become available for L0 @1.0001 seconds. + expected_.start_layer = 0; + AdvanceTime(2); + EqualToExpected(); + AddKilobitsToLayer(100, 0); + + // Keyframes always encode all layers, even if it is over budget. + expected_.layer[0].ref_buf1 = -1; + expected_.layer[1].ref_buf1 = -1; + expected_.is_keyframe = true; + AdvanceTime(499); + EqualToExpected(); + expected_.layer[0].ref_buf1 = 0; + expected_.layer[1].ref_buf1 = 1; + expected_.start_layer = 1; + expected_.is_keyframe = false; + EqualToExpected(); + AddKilobitsToLayer(100, 0); + + // 400 kb in L0 --> @3 second mark to fall below the threshold.. + // just before @2.999 seconds. + expected_.is_keyframe = false; + AdvanceTime(1499); + EqualToExpected(); + AddKilobitsToLayer(100, 1); + + // just after @3.001 seconds. + expected_.start_layer = 0; + AdvanceTime(2); + EqualToExpected(); + AddKilobitsToLayer(100, 0); +} + +// General testing of the bitrate controller. +TEST_F(ScreenshareLayerTestVP9, 3LayerBitrate) { + InitScreenshareLayers(3); + ConfigureBitrateForLayer(100, 0); + ConfigureBitrateForLayer(100, 1); + + for (int l = 0; l < 3; ++l) { + expected_.layer[l].upd_buf = l; + expected_.layer[l].ref_buf1 = l; + } + expected_.start_layer = 0; + expected_.stop_layer = 2; + + EqualToExpected(); + AddKilobitsToLayer(105, 0); + AddKilobitsToLayer(30, 1); + + AdvanceTime(199); + EqualToExpected(); + AddKilobitsToLayer(105, 0); + AddKilobitsToLayer(30, 1); + + expected_.start_layer = 1; + AdvanceTime(200); + EqualToExpected(); + AddKilobitsToLayer(130, 1); + + expected_.start_layer = 2; + AdvanceTime(200); + EqualToExpected(); + + // 400 kb in L1 --> @1.0 second mark to fall below threshold. + // 210 kb in L0 --> @1.1 second mark to fall below threshold. + // Just before L1 @0.999 seconds. + AdvanceTime(399); + EqualToExpected(); + + // Just after L1 @1.001 seconds. + expected_.start_layer = 1; + AdvanceTime(2); + EqualToExpected(); + + // Just before L0 @1.099 seconds. + AdvanceTime(99); + EqualToExpected(); + + // Just after L0 @1.101 seconds. + expected_.start_layer = 0; + AdvanceTime(2); + EqualToExpected(); + + // @1.1 seconds + AdvanceTime(99); + EqualToExpected(); + AddKilobitsToLayer(200, 1); + + expected_.is_keyframe = true; + for (int l = 0; l < 3; ++l) + expected_.layer[l].ref_buf1 = -1; + AdvanceTime(200); + EqualToExpected(); + + expected_.is_keyframe = false; + expected_.start_layer = 2; + for (int l = 0; l < 3; ++l) + expected_.layer[l].ref_buf1 = l; + AdvanceTime(200); + EqualToExpected(); +} + +// Test that the bitrate calculations are +// correct when the timestamp wrap. +TEST_F(ScreenshareLayerTestVP9, TimestampWrap) { + InitScreenshareLayers(2); + ConfigureBitrateForLayer(100, 0); + + expected_.layer[0].upd_buf = 0; + expected_.layer[0].ref_buf1 = 0; + expected_.layer[1].upd_buf = 1; + expected_.layer[1].ref_buf1 = 1; + expected_.start_layer = 0; + expected_.stop_layer = 1; + + // Advance time to just before the timestamp wraps. + AdvanceTime(std::numeric_limits<uint32_t>::max() / (kTickFrequency / 1000)); + EqualToExpected(); + AddKilobitsToLayer(200, 0); + + // Wrap + expected_.start_layer = 1; + AdvanceTime(1); + EqualToExpected(); +} + +} // namespace webrtc diff --git a/webrtc/modules/video_coding/codecs/vp9/vp9.gyp b/webrtc/modules/video_coding/codecs/vp9/vp9.gyp index 752521c5cb..9049b63419 100644 --- a/webrtc/modules/video_coding/codecs/vp9/vp9.gyp +++ b/webrtc/modules/video_coding/codecs/vp9/vp9.gyp @@ -28,6 +28,8 @@ ['build_vp9==1', { 'sources': [ 'include/vp9.h', + 'screenshare_layers.cc', + 'screenshare_layers.h', 'vp9_frame_buffer_pool.cc', 'vp9_frame_buffer_pool.h', 'vp9_impl.cc', diff --git a/webrtc/modules/video_coding/codecs/vp9/vp9_impl.cc b/webrtc/modules/video_coding/codecs/vp9/vp9_impl.cc index 7ebe1a275d..4dd59b268d 100644 --- a/webrtc/modules/video_coding/codecs/vp9/vp9_impl.cc +++ b/webrtc/modules/video_coding/codecs/vp9/vp9_impl.cc @@ -27,6 +27,7 @@ #include "webrtc/common.h" #include "webrtc/common_video/libyuv/include/webrtc_libyuv.h" #include "webrtc/modules/include/module_common_types.h" +#include "webrtc/modules/video_coding/codecs/vp9/screenshare_layers.h" #include "webrtc/system_wrappers/include/logging.h" #include "webrtc/system_wrappers/include/tick_util.h" @@ -76,9 +77,12 @@ VP9EncoderImpl::VP9EncoderImpl() raw_(NULL), input_image_(NULL), tl0_pic_idx_(0), - gof_idx_(0), + frames_since_kf_(0), num_temporal_layers_(0), - num_spatial_layers_(0) { + num_spatial_layers_(0), + frames_encoded_(0), + // Use two spatial when screensharing with flexible mode. + spatial_layer_(new ScreenshareLayersVP9(2)) { memset(&codec_, 0, sizeof(codec_)); uint32_t seed = static_cast<uint32_t>(TickTime::MillisecondTimestamp()); srand(seed); @@ -208,6 +212,7 @@ int VP9EncoderImpl::SetRates(uint32_t new_bitrate_kbit, } config_->rc_target_bitrate = new_bitrate_kbit; codec_.maxFramerate = new_framerate; + spatial_layer_->ConfigureBitrate(new_bitrate_kbit, 0); if (!SetSvcRates()) { return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; @@ -246,6 +251,7 @@ int VP9EncoderImpl::InitEncode(const VideoCodec* inst, if (inst->codecSpecific.VP9.numberOfSpatialLayers > 2) { return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; } + int retVal = Release(); if (retVal < 0) { return retVal; @@ -324,7 +330,13 @@ int VP9EncoderImpl::InitEncode(const VideoCodec* inst, // TODO(asapersson): Check configuration of temporal switch up and increase // pattern length. - if (num_temporal_layers_ == 1) { + is_flexible_mode_ = inst->codecSpecific.VP9.flexibleMode; + if (is_flexible_mode_) { + config_->temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_BYPASS; + config_->ts_number_layers = num_temporal_layers_; + if (codec_.mode == kScreensharing) + spatial_layer_->ConfigureBitrate(inst->startBitrate, 0); + } else if (num_temporal_layers_ == 1) { gof_.SetGofInfoVP9(kTemporalStructureMode1); config_->temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_NOLAYERING; config_->ts_number_layers = 1; @@ -395,7 +407,8 @@ int VP9EncoderImpl::InitAndSetControlSettings(const VideoCodec* inst) { // 1:2 scaling in each dimension. svc_internal_.svc_params.scaling_factor_num[i] = scaling_factor_num; svc_internal_.svc_params.scaling_factor_den[i] = 256; - scaling_factor_num /= 2; + if (codec_.mode != kScreensharing) + scaling_factor_num /= 2; } } @@ -495,12 +508,35 @@ int VP9EncoderImpl::Encode(const VideoFrame& input_image, raw_->stride[VPX_PLANE_U] = input_image.stride(kUPlane); raw_->stride[VPX_PLANE_V] = input_image.stride(kVPlane); - int flags = 0; + vpx_enc_frame_flags_t flags = 0; bool send_keyframe = (frame_type == kVideoFrameKey); if (send_keyframe) { // Key frame request from caller. flags = VPX_EFLAG_FORCE_KF; } + + if (is_flexible_mode_) { + SuperFrameRefSettings settings; + + // These structs are copied when calling vpx_codec_control, + // therefore it is ok for them to go out of scope. + vpx_svc_ref_frame_config enc_layer_conf; + vpx_svc_layer_id layer_id; + + if (codec_.mode == kRealtimeVideo) { + // Real time video not yet implemented in flexible mode. + RTC_NOTREACHED(); + } else { + settings = spatial_layer_->GetSuperFrameSettings(input_image.timestamp(), + send_keyframe); + } + enc_layer_conf = GenerateRefsAndFlags(settings); + layer_id.temporal_layer_id = 0; + layer_id.spatial_layer_id = settings.start_layer; + vpx_codec_control(encoder_, VP9E_SET_SVC_LAYER_ID, &layer_id); + vpx_codec_control(encoder_, VP9E_SET_SVC_REF_FRAME_CONFIG, &enc_layer_conf); + } + assert(codec_.maxFramerate > 0); uint32_t duration = 90000 / codec_.maxFramerate; if (vpx_codec_encode(encoder_, raw_, timestamp_, duration, flags, @@ -526,9 +562,8 @@ void VP9EncoderImpl::PopulateCodecSpecific(CodecSpecificInfo* codec_specific, !codec_.codecSpecific.VP9.flexibleMode) ? true : false; - if (pkt.data.frame.flags & VPX_FRAME_IS_KEY) { - gof_idx_ = 0; - } + if (pkt.data.frame.flags & VPX_FRAME_IS_KEY) + frames_since_kf_ = 0; vpx_svc_layer_id_t layer_id = {0}; vpx_codec_control(encoder_, VP9E_GET_SVC_LAYER_ID, &layer_id); @@ -551,17 +586,18 @@ void VP9EncoderImpl::PopulateCodecSpecific(CodecSpecificInfo* codec_specific, vp9_info->ss_data_available = false; } - if (vp9_info->flexible_mode) { - vp9_info->gof_idx = kNoGofIdx; - } else { - vp9_info->gof_idx = - static_cast<uint8_t>(gof_idx_++ % gof_.num_frames_in_gof); - } - // TODO(asapersson): this info has to be obtained from the encoder. vp9_info->temporal_up_switch = true; - if (layer_id.spatial_layer_id == 0) { + bool is_first_frame = false; + if (is_flexible_mode_) { + is_first_frame = + layer_id.spatial_layer_id == spatial_layer_->GetStartLayer(); + } else { + is_first_frame = layer_id.spatial_layer_id == 0; + } + + if (is_first_frame) { picture_id_ = (picture_id_ + 1) & 0x7FFF; // TODO(asapersson): this info has to be obtained from the encoder. vp9_info->inter_layer_predicted = false; @@ -582,6 +618,20 @@ void VP9EncoderImpl::PopulateCodecSpecific(CodecSpecificInfo* codec_specific, // Always populate this, so that the packetizer can properly set the marker // bit. vp9_info->num_spatial_layers = num_spatial_layers_; + + vp9_info->num_ref_pics = 0; + if (vp9_info->flexible_mode) { + vp9_info->gof_idx = kNoGofIdx; + vp9_info->num_ref_pics = num_ref_pics_[layer_id.spatial_layer_id]; + for (int i = 0; i < num_ref_pics_[layer_id.spatial_layer_id]; ++i) { + vp9_info->p_diff[i] = p_diff_[layer_id.spatial_layer_id][i]; + } + } else { + vp9_info->gof_idx = + static_cast<uint8_t>(frames_since_kf_ % gof_.num_frames_in_gof); + } + ++frames_since_kf_; + if (vp9_info->ss_data_available) { vp9_info->spatial_layer_resolution_present = true; for (size_t i = 0; i < vp9_info->num_spatial_layers; ++i) { @@ -617,6 +667,14 @@ int VP9EncoderImpl::GetEncodedLayerFrame(const vpx_codec_cx_pkt* pkt) { frag_info.fragmentationPlType[part_idx] = 0; frag_info.fragmentationTimeDiff[part_idx] = 0; encoded_image_._length += static_cast<uint32_t>(pkt->data.frame.sz); + + vpx_svc_layer_id_t layer_id = {0}; + vpx_codec_control(encoder_, VP9E_GET_SVC_LAYER_ID, &layer_id); + if (is_flexible_mode_ && codec_.mode == kScreensharing) + spatial_layer_->LayerFrameEncoded( + static_cast<unsigned int>(encoded_image_._length), + layer_id.spatial_layer_id); + assert(encoded_image_._length <= encoded_image_._size); // End of frame. @@ -638,6 +696,108 @@ int VP9EncoderImpl::GetEncodedLayerFrame(const vpx_codec_cx_pkt* pkt) { return WEBRTC_VIDEO_CODEC_OK; } +vpx_svc_ref_frame_config VP9EncoderImpl::GenerateRefsAndFlags( + const SuperFrameRefSettings& settings) { + static const vpx_enc_frame_flags_t kAllFlags = + VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_LAST | + VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_GF; + vpx_svc_ref_frame_config sf_conf = {}; + if (settings.is_keyframe) { + // Used later on to make sure we don't make any invalid references. + memset(buffer_updated_at_frame_, -1, sizeof(buffer_updated_at_frame_)); + for (int layer = settings.start_layer; layer <= settings.stop_layer; + ++layer) { + num_ref_pics_[layer] = 0; + buffer_updated_at_frame_[settings.layer[layer].upd_buf] = frames_encoded_; + // When encoding a keyframe only the alt_fb_idx is used + // to specify which layer ends up in which buffer. + sf_conf.alt_fb_idx[layer] = settings.layer[layer].upd_buf; + } + } else { + for (int layer_idx = settings.start_layer; layer_idx <= settings.stop_layer; + ++layer_idx) { + vpx_enc_frame_flags_t layer_flags = kAllFlags; + num_ref_pics_[layer_idx] = 0; + int8_t refs[3] = {settings.layer[layer_idx].ref_buf1, + settings.layer[layer_idx].ref_buf2, + settings.layer[layer_idx].ref_buf3}; + + for (unsigned int ref_idx = 0; ref_idx < kMaxVp9RefPics; ++ref_idx) { + if (refs[ref_idx] == -1) + continue; + + RTC_DCHECK_GE(refs[ref_idx], 0); + RTC_DCHECK_LE(refs[ref_idx], 7); + // Easier to remove flags from all flags rather than having to + // build the flags from 0. + switch (num_ref_pics_[layer_idx]) { + case 0: { + sf_conf.lst_fb_idx[layer_idx] = refs[ref_idx]; + layer_flags &= ~VP8_EFLAG_NO_REF_LAST; + break; + } + case 1: { + sf_conf.gld_fb_idx[layer_idx] = refs[ref_idx]; + layer_flags &= ~VP8_EFLAG_NO_REF_GF; + break; + } + case 2: { + sf_conf.alt_fb_idx[layer_idx] = refs[ref_idx]; + layer_flags &= ~VP8_EFLAG_NO_REF_ARF; + break; + } + } + // Make sure we don't reference a buffer that hasn't been + // used at all or hasn't been used since a keyframe. + RTC_DCHECK_NE(buffer_updated_at_frame_[refs[ref_idx]], -1); + + p_diff_[layer_idx][num_ref_pics_[layer_idx]] = + frames_encoded_ - buffer_updated_at_frame_[refs[ref_idx]]; + num_ref_pics_[layer_idx]++; + } + + bool upd_buf_same_as_a_ref = false; + if (settings.layer[layer_idx].upd_buf != -1) { + for (unsigned int ref_idx = 0; ref_idx < kMaxVp9RefPics; ++ref_idx) { + if (settings.layer[layer_idx].upd_buf == refs[ref_idx]) { + switch (ref_idx) { + case 0: { + layer_flags &= ~VP8_EFLAG_NO_UPD_LAST; + break; + } + case 1: { + layer_flags &= ~VP8_EFLAG_NO_UPD_GF; + break; + } + case 2: { + layer_flags &= ~VP8_EFLAG_NO_UPD_ARF; + break; + } + } + upd_buf_same_as_a_ref = true; + break; + } + } + if (!upd_buf_same_as_a_ref) { + // If we have three references and a buffer is specified to be + // updated, then that buffer must be the same as one of the + // three references. + RTC_CHECK_LT(num_ref_pics_[layer_idx], kMaxVp9RefPics); + + sf_conf.alt_fb_idx[layer_idx] = settings.layer[layer_idx].upd_buf; + layer_flags ^= VP8_EFLAG_NO_UPD_ARF; + } + + int updated_buffer = settings.layer[layer_idx].upd_buf; + buffer_updated_at_frame_[updated_buffer] = frames_encoded_; + sf_conf.frame_flags[layer_idx] = layer_flags; + } + } + } + ++frames_encoded_; + return sf_conf; +} + int VP9EncoderImpl::SetChannelParameters(uint32_t packet_loss, int64_t rtt) { return WEBRTC_VIDEO_CODEC_OK; } diff --git a/webrtc/modules/video_coding/codecs/vp9/vp9_impl.h b/webrtc/modules/video_coding/codecs/vp9/vp9_impl.h index ecc04651e5..9a48e74d3e 100644 --- a/webrtc/modules/video_coding/codecs/vp9/vp9_impl.h +++ b/webrtc/modules/video_coding/codecs/vp9/vp9_impl.h @@ -21,6 +21,8 @@ namespace webrtc { +class ScreenshareLayersVP9; + class VP9EncoderImpl : public VP9Encoder { public: VP9EncoderImpl(); @@ -45,6 +47,20 @@ class VP9EncoderImpl : public VP9Encoder { void OnDroppedFrame() override {} + struct LayerFrameRefSettings { + int8_t upd_buf = -1; // -1 - no update, 0..7 - update buffer 0..7 + int8_t ref_buf1 = -1; // -1 - no reference, 0..7 - reference buffer 0..7 + int8_t ref_buf2 = -1; // -1 - no reference, 0..7 - reference buffer 0..7 + int8_t ref_buf3 = -1; // -1 - no reference, 0..7 - reference buffer 0..7 + }; + + struct SuperFrameRefSettings { + LayerFrameRefSettings layer[kMaxVp9NumberOfSpatialLayers]; + uint8_t start_layer = 0; // The first spatial layer to be encoded. + uint8_t stop_layer = 0; // The last spatial layer to be encoded. + bool is_keyframe = false; + }; + private: // Determine number of encoder threads to use. int NumberOfThreads(int width, int height, int number_of_cores); @@ -59,6 +75,15 @@ class VP9EncoderImpl : public VP9Encoder { bool ExplicitlyConfiguredSpatialLayers() const; bool SetSvcRates(); + // Used for flexible mode to set the flags and buffer references used + // by the encoder. Also calculates the references used by the RTP + // packetizer. + // + // Has to be called for every frame (keyframes included) to update the + // state used to calculate references. + vpx_svc_ref_frame_config GenerateRefsAndFlags( + const SuperFrameRefSettings& settings); + virtual int GetEncodedLayerFrame(const vpx_codec_cx_pkt* pkt); // Callback function for outputting packets per spatial layer. @@ -89,9 +114,17 @@ class VP9EncoderImpl : public VP9Encoder { GofInfoVP9 gof_; // Contains each frame's temporal information for // non-flexible mode. uint8_t tl0_pic_idx_; // Only used in non-flexible mode. - size_t gof_idx_; // Only used in non-flexible mode. + size_t frames_since_kf_; uint8_t num_temporal_layers_; uint8_t num_spatial_layers_; + + // Used for flexible mode. + bool is_flexible_mode_; + int64_t buffer_updated_at_frame_[kNumVp9Buffers]; + int64_t frames_encoded_; + uint8_t num_ref_pics_[kMaxVp9NumberOfSpatialLayers]; + uint8_t p_diff_[kMaxVp9NumberOfSpatialLayers][kMaxVp9RefPics]; + rtc::scoped_ptr<ScreenshareLayersVP9> spatial_layer_; }; diff --git a/webrtc/modules/video_coding/main/source/decoding_state.cc b/webrtc/modules/video_coding/main/source/decoding_state.cc index a3da7c65d8..bdc632933c 100644 --- a/webrtc/modules/video_coding/main/source/decoding_state.cc +++ b/webrtc/modules/video_coding/main/source/decoding_state.cc @@ -24,7 +24,9 @@ VCMDecodingState::VCMDecodingState() temporal_id_(kNoTemporalIdx), tl0_pic_id_(kNoTl0PicIdx), full_sync_(true), - in_initial_state_(true) {} + in_initial_state_(true) { + memset(frame_decoded_, 0, sizeof(frame_decoded_)); +} VCMDecodingState::~VCMDecodingState() {} @@ -37,6 +39,7 @@ void VCMDecodingState::Reset() { tl0_pic_id_ = kNoTl0PicIdx; full_sync_ = true; in_initial_state_ = true; + memset(frame_decoded_, 0, sizeof(frame_decoded_)); } uint32_t VCMDecodingState::time_stamp() const { @@ -63,12 +66,33 @@ bool VCMDecodingState::IsOldPacket(const VCMPacket* packet) const { void VCMDecodingState::SetState(const VCMFrameBuffer* frame) { assert(frame != NULL && frame->GetHighSeqNum() >= 0); - UpdateSyncState(frame); + if (!UsingFlexibleMode(frame)) + UpdateSyncState(frame); sequence_num_ = static_cast<uint16_t>(frame->GetHighSeqNum()); time_stamp_ = frame->TimeStamp(); picture_id_ = frame->PictureId(); temporal_id_ = frame->TemporalId(); tl0_pic_id_ = frame->Tl0PicId(); + + if (UsingFlexibleMode(frame)) { + uint16_t frame_index = picture_id_ % kFrameDecodedLength; + if (in_initial_state_) { + frame_decoded_cleared_to_ = frame_index; + } else if (frame->FrameType() == kVideoFrameKey) { + memset(frame_decoded_, 0, sizeof(frame_decoded_)); + frame_decoded_cleared_to_ = frame_index; + } else { + if (AheadOfFramesDecodedClearedTo(frame_index)) { + while (frame_decoded_cleared_to_ != frame_index) { + frame_decoded_cleared_to_ = + (frame_decoded_cleared_to_ + 1) % kFrameDecodedLength; + frame_decoded_[frame_decoded_cleared_to_] = false; + } + } + } + frame_decoded_[frame_index] = true; + } + in_initial_state_ = false; } @@ -80,6 +104,8 @@ void VCMDecodingState::CopyFrom(const VCMDecodingState& state) { tl0_pic_id_ = state.tl0_pic_id_; full_sync_ = state.full_sync_; in_initial_state_ = state.in_initial_state_; + frame_decoded_cleared_to_ = state.frame_decoded_cleared_to_; + memcpy(frame_decoded_, state.frame_decoded_, sizeof(frame_decoded_)); } bool VCMDecodingState::UpdateEmptyFrame(const VCMFrameBuffer* frame) { @@ -173,7 +199,11 @@ bool VCMDecodingState::ContinuousFrame(const VCMFrameBuffer* frame) const { if (!full_sync_ && !frame->LayerSync()) return false; if (UsingPictureId(frame)) { - return ContinuousPictureId(frame->PictureId()); + if (UsingFlexibleMode(frame)) { + return ContinuousFrameRefs(frame); + } else { + return ContinuousPictureId(frame->PictureId()); + } } else { return ContinuousSeqNum(static_cast<uint16_t>(frame->GetLowSeqNum())); } @@ -216,8 +246,41 @@ bool VCMDecodingState::ContinuousLayer(int temporal_id, return (static_cast<uint8_t>(tl0_pic_id_ + 1) == tl0_pic_id); } +bool VCMDecodingState::ContinuousFrameRefs(const VCMFrameBuffer* frame) const { + uint8_t num_refs = frame->CodecSpecific()->codecSpecific.VP9.num_ref_pics; + for (uint8_t r = 0; r < num_refs; ++r) { + uint16_t frame_ref = frame->PictureId() - + frame->CodecSpecific()->codecSpecific.VP9.p_diff[r]; + uint16_t frame_index = frame_ref % kFrameDecodedLength; + if (AheadOfFramesDecodedClearedTo(frame_index) || + !frame_decoded_[frame_index]) { + return false; + } + } + return true; +} + bool VCMDecodingState::UsingPictureId(const VCMFrameBuffer* frame) const { return (frame->PictureId() != kNoPictureId && picture_id_ != kNoPictureId); } +bool VCMDecodingState::UsingFlexibleMode(const VCMFrameBuffer* frame) const { + return frame->CodecSpecific()->codecType == kVideoCodecVP9 && + frame->CodecSpecific()->codecSpecific.VP9.flexible_mode; +} + +// TODO(philipel): change how check work, this check practially +// limits the max p_diff to 64. +bool VCMDecodingState::AheadOfFramesDecodedClearedTo(uint16_t index) const { + // No way of knowing for sure if we are actually ahead of + // frame_decoded_cleared_to_. We just make the assumption + // that we are not trying to reference back to a very old + // index, but instead are referencing a newer index. + uint16_t diff = + index > frame_decoded_cleared_to_ + ? kFrameDecodedLength - (index - frame_decoded_cleared_to_) + : frame_decoded_cleared_to_ - index; + return diff > kFrameDecodedLength / 2; +} + } // namespace webrtc diff --git a/webrtc/modules/video_coding/main/source/decoding_state.h b/webrtc/modules/video_coding/main/source/decoding_state.h index 99ee335195..fe40b24030 100644 --- a/webrtc/modules/video_coding/main/source/decoding_state.h +++ b/webrtc/modules/video_coding/main/source/decoding_state.h @@ -21,6 +21,11 @@ class VCMPacket; class VCMDecodingState { public: + // The max number of bits used to reference back + // to a previous frame when using flexible mode. + static const uint16_t kNumRefBits = 7; + static const uint16_t kFrameDecodedLength = 1 << kNumRefBits; + VCMDecodingState(); ~VCMDecodingState(); // Check for old frame @@ -52,7 +57,10 @@ class VCMDecodingState { bool ContinuousPictureId(int picture_id) const; bool ContinuousSeqNum(uint16_t seq_num) const; bool ContinuousLayer(int temporal_id, int tl0_pic_id) const; + bool ContinuousFrameRefs(const VCMFrameBuffer* frame) const; bool UsingPictureId(const VCMFrameBuffer* frame) const; + bool UsingFlexibleMode(const VCMFrameBuffer* frame) const; + bool AheadOfFramesDecodedClearedTo(uint16_t index) const; // Keep state of last decoded frame. // TODO(mikhal/stefan): create designated classes to handle these types. @@ -63,6 +71,10 @@ class VCMDecodingState { int tl0_pic_id_; bool full_sync_; // Sync flag when temporal layers are used. bool in_initial_state_; + + // Used to check references in flexible mode. + bool frame_decoded_[kFrameDecodedLength]; + uint16_t frame_decoded_cleared_to_; }; } // namespace webrtc diff --git a/webrtc/modules/video_coding/main/source/decoding_state_unittest.cc b/webrtc/modules/video_coding/main/source/decoding_state_unittest.cc index 30b57862d5..9dfb3133aa 100644 --- a/webrtc/modules/video_coding/main/source/decoding_state_unittest.cc +++ b/webrtc/modules/video_coding/main/source/decoding_state_unittest.cc @@ -446,4 +446,254 @@ TEST(TestDecodingState, PictureIdRepeat) { EXPECT_FALSE(dec_state.ContinuousFrame(&frame)); } +TEST(TestDecodingState, FrameContinuityFlexibleModeKeyFrame) { + VCMDecodingState dec_state; + VCMFrameBuffer frame; + VCMPacket packet; + packet.isFirstPacket = true; + packet.timestamp = 1; + packet.seqNum = 0xffff; + uint8_t data[] = "I need a data pointer for this test!"; + packet.sizeBytes = sizeof(data); + packet.dataPtr = data; + packet.codecSpecificHeader.codec = kRtpVideoVp9; + + RTPVideoHeaderVP9& vp9_hdr = packet.codecSpecificHeader.codecHeader.VP9; + vp9_hdr.picture_id = 10; + vp9_hdr.flexible_mode = true; + + FrameData frame_data; + frame_data.rtt_ms = 0; + frame_data.rolling_average_packets_per_frame = -1; + + // Key frame as first frame + packet.frameType = kVideoFrameKey; + EXPECT_LE(0, frame.InsertPacket(packet, 0, kNoErrors, frame_data)); + EXPECT_TRUE(dec_state.ContinuousFrame(&frame)); + dec_state.SetState(&frame); + + // Key frame again + vp9_hdr.picture_id = 11; + frame.Reset(); + EXPECT_LE(0, frame.InsertPacket(packet, 0, kNoErrors, frame_data)); + EXPECT_TRUE(dec_state.ContinuousFrame(&frame)); + dec_state.SetState(&frame); + + // Ref to 11, continuous + frame.Reset(); + packet.frameType = kVideoFrameDelta; + vp9_hdr.picture_id = 12; + vp9_hdr.num_ref_pics = 1; + vp9_hdr.pid_diff[0] = 1; + EXPECT_LE(0, frame.InsertPacket(packet, 0, kNoErrors, frame_data)); + EXPECT_TRUE(dec_state.ContinuousFrame(&frame)); +} + +TEST(TestDecodingState, FrameContinuityFlexibleModeOutOfOrderFrames) { + VCMDecodingState dec_state; + VCMFrameBuffer frame; + VCMPacket packet; + packet.isFirstPacket = true; + packet.timestamp = 1; + packet.seqNum = 0xffff; + uint8_t data[] = "I need a data pointer for this test!"; + packet.sizeBytes = sizeof(data); + packet.dataPtr = data; + packet.codecSpecificHeader.codec = kRtpVideoVp9; + + RTPVideoHeaderVP9& vp9_hdr = packet.codecSpecificHeader.codecHeader.VP9; + vp9_hdr.picture_id = 10; + vp9_hdr.flexible_mode = true; + + FrameData frame_data; + frame_data.rtt_ms = 0; + frame_data.rolling_average_packets_per_frame = -1; + + // Key frame as first frame + packet.frameType = kVideoFrameKey; + EXPECT_LE(0, frame.InsertPacket(packet, 0, kNoErrors, frame_data)); + EXPECT_TRUE(dec_state.ContinuousFrame(&frame)); + dec_state.SetState(&frame); + + // Ref to 10, continuous + frame.Reset(); + packet.frameType = kVideoFrameDelta; + vp9_hdr.picture_id = 15; + vp9_hdr.num_ref_pics = 1; + vp9_hdr.pid_diff[0] = 5; + EXPECT_LE(0, frame.InsertPacket(packet, 0, kNoErrors, frame_data)); + EXPECT_TRUE(dec_state.ContinuousFrame(&frame)); + dec_state.SetState(&frame); + + // Out of order, last id 15, this id 12, ref to 10, continuous + frame.Reset(); + vp9_hdr.picture_id = 12; + vp9_hdr.pid_diff[0] = 2; + EXPECT_LE(0, frame.InsertPacket(packet, 0, kNoErrors, frame_data)); + EXPECT_TRUE(dec_state.ContinuousFrame(&frame)); + dec_state.SetState(&frame); + + // Ref 10, 12, 15, continuous + frame.Reset(); + vp9_hdr.picture_id = 20; + vp9_hdr.num_ref_pics = 3; + vp9_hdr.pid_diff[0] = 10; + vp9_hdr.pid_diff[1] = 8; + vp9_hdr.pid_diff[2] = 5; + EXPECT_LE(0, frame.InsertPacket(packet, 0, kNoErrors, frame_data)); + EXPECT_TRUE(dec_state.ContinuousFrame(&frame)); +} + +TEST(TestDecodingState, FrameContinuityFlexibleModeGeneral) { + VCMDecodingState dec_state; + VCMFrameBuffer frame; + VCMPacket packet; + packet.isFirstPacket = true; + packet.timestamp = 1; + packet.seqNum = 0xffff; + uint8_t data[] = "I need a data pointer for this test!"; + packet.sizeBytes = sizeof(data); + packet.dataPtr = data; + packet.codecSpecificHeader.codec = kRtpVideoVp9; + + RTPVideoHeaderVP9& vp9_hdr = packet.codecSpecificHeader.codecHeader.VP9; + vp9_hdr.picture_id = 10; + vp9_hdr.flexible_mode = true; + + FrameData frame_data; + frame_data.rtt_ms = 0; + frame_data.rolling_average_packets_per_frame = -1; + + // Key frame as first frame + packet.frameType = kVideoFrameKey; + EXPECT_LE(0, frame.InsertPacket(packet, 0, kNoErrors, frame_data)); + EXPECT_TRUE(dec_state.ContinuousFrame(&frame)); + + // Delta frame as first frame + frame.Reset(); + packet.frameType = kVideoFrameDelta; + EXPECT_LE(0, frame.InsertPacket(packet, 0, kNoErrors, frame_data)); + EXPECT_FALSE(dec_state.ContinuousFrame(&frame)); + + // Key frame then delta frame + frame.Reset(); + packet.frameType = kVideoFrameKey; + EXPECT_LE(0, frame.InsertPacket(packet, 0, kNoErrors, frame_data)); + dec_state.SetState(&frame); + frame.Reset(); + packet.frameType = kVideoFrameDelta; + vp9_hdr.num_ref_pics = 1; + vp9_hdr.picture_id = 15; + vp9_hdr.pid_diff[0] = 5; + EXPECT_LE(0, frame.InsertPacket(packet, 0, kNoErrors, frame_data)); + EXPECT_TRUE(dec_state.ContinuousFrame(&frame)); + dec_state.SetState(&frame); + + // Ref to 11, not continuous + frame.Reset(); + vp9_hdr.picture_id = 16; + EXPECT_LE(0, frame.InsertPacket(packet, 0, kNoErrors, frame_data)); + EXPECT_FALSE(dec_state.ContinuousFrame(&frame)); + + // Ref to 15, continuous + frame.Reset(); + vp9_hdr.picture_id = 16; + vp9_hdr.pid_diff[0] = 1; + EXPECT_LE(0, frame.InsertPacket(packet, 0, kNoErrors, frame_data)); + EXPECT_TRUE(dec_state.ContinuousFrame(&frame)); + dec_state.SetState(&frame); + + // Ref to 11 and 15, not continuous + frame.Reset(); + vp9_hdr.picture_id = 20; + vp9_hdr.num_ref_pics = 2; + vp9_hdr.pid_diff[0] = 9; + vp9_hdr.pid_diff[1] = 5; + EXPECT_LE(0, frame.InsertPacket(packet, 0, kNoErrors, frame_data)); + EXPECT_FALSE(dec_state.ContinuousFrame(&frame)); + + // Ref to 10, 15 and 16, continuous + frame.Reset(); + vp9_hdr.picture_id = 22; + vp9_hdr.num_ref_pics = 3; + vp9_hdr.pid_diff[0] = 12; + vp9_hdr.pid_diff[1] = 7; + vp9_hdr.pid_diff[2] = 6; + EXPECT_LE(0, frame.InsertPacket(packet, 0, kNoErrors, frame_data)); + EXPECT_TRUE(dec_state.ContinuousFrame(&frame)); + dec_state.SetState(&frame); + + // Key Frame, continuous + frame.Reset(); + packet.frameType = kVideoFrameKey; + vp9_hdr.picture_id = VCMDecodingState::kFrameDecodedLength - 2; + vp9_hdr.num_ref_pics = 0; + EXPECT_LE(0, frame.InsertPacket(packet, 0, kNoErrors, frame_data)); + EXPECT_TRUE(dec_state.ContinuousFrame(&frame)); + dec_state.SetState(&frame); + + // Frame at last index, ref to KF, continuous + frame.Reset(); + packet.frameType = kVideoFrameDelta; + vp9_hdr.picture_id = VCMDecodingState::kFrameDecodedLength - 1; + vp9_hdr.num_ref_pics = 1; + vp9_hdr.pid_diff[0] = 1; + EXPECT_LE(0, frame.InsertPacket(packet, 0, kNoErrors, frame_data)); + EXPECT_TRUE(dec_state.ContinuousFrame(&frame)); + dec_state.SetState(&frame); + + // Frame after wrapping buffer length, ref to last index, continuous + frame.Reset(); + vp9_hdr.picture_id = 0; + vp9_hdr.num_ref_pics = 1; + vp9_hdr.pid_diff[0] = 1; + EXPECT_LE(0, frame.InsertPacket(packet, 0, kNoErrors, frame_data)); + EXPECT_TRUE(dec_state.ContinuousFrame(&frame)); + dec_state.SetState(&frame); + + // Frame after wrapping start frame, ref to 0, continuous + frame.Reset(); + vp9_hdr.picture_id = 20; + vp9_hdr.num_ref_pics = 1; + vp9_hdr.pid_diff[0] = 20; + EXPECT_LE(0, frame.InsertPacket(packet, 0, kNoErrors, frame_data)); + EXPECT_TRUE(dec_state.ContinuousFrame(&frame)); + dec_state.SetState(&frame); + + // Frame after wrapping start frame, ref to 10, not continuous + frame.Reset(); + vp9_hdr.picture_id = 23; + vp9_hdr.num_ref_pics = 1; + vp9_hdr.pid_diff[0] = 13; + EXPECT_LE(0, frame.InsertPacket(packet, 0, kNoErrors, frame_data)); + EXPECT_FALSE(dec_state.ContinuousFrame(&frame)); + + // Key frame, continuous + frame.Reset(); + packet.frameType = kVideoFrameKey; + vp9_hdr.picture_id = 25; + vp9_hdr.num_ref_pics = 0; + EXPECT_LE(0, frame.InsertPacket(packet, 0, kNoErrors, frame_data)); + EXPECT_TRUE(dec_state.ContinuousFrame(&frame)); + dec_state.SetState(&frame); + + // Ref to KF, continuous + frame.Reset(); + packet.frameType = kVideoFrameDelta; + vp9_hdr.picture_id = 26; + vp9_hdr.num_ref_pics = 1; + vp9_hdr.pid_diff[0] = 1; + EXPECT_LE(0, frame.InsertPacket(packet, 0, kNoErrors, frame_data)); + EXPECT_TRUE(dec_state.ContinuousFrame(&frame)); + dec_state.SetState(&frame); + + // Ref to frame previous to KF, not continuous + frame.Reset(); + vp9_hdr.picture_id = 30; + vp9_hdr.num_ref_pics = 1; + vp9_hdr.pid_diff[0] = 30; + EXPECT_LE(0, frame.InsertPacket(packet, 0, kNoErrors, frame_data)); + EXPECT_FALSE(dec_state.ContinuousFrame(&frame)); +} + } // namespace webrtc diff --git a/webrtc/modules/video_coding/main/source/encoded_frame.cc b/webrtc/modules/video_coding/main/source/encoded_frame.cc index d86704d632..89a8777013 100644 --- a/webrtc/modules/video_coding/main/source/encoded_frame.cc +++ b/webrtc/modules/video_coding/main/source/encoded_frame.cc @@ -147,6 +147,12 @@ void VCMEncodedFrame::CopyCodecSpecific(const RTPVideoHeader* header) header->codecHeader.VP9.inter_pic_predicted; _codecSpecificInfo.codecSpecific.VP9.flexible_mode = header->codecHeader.VP9.flexible_mode; + _codecSpecificInfo.codecSpecific.VP9.num_ref_pics = + header->codecHeader.VP9.num_ref_pics; + for (uint8_t r = 0; r < header->codecHeader.VP9.num_ref_pics; ++r) { + _codecSpecificInfo.codecSpecific.VP9.p_diff[r] = + header->codecHeader.VP9.pid_diff[r]; + } _codecSpecificInfo.codecSpecific.VP9.ss_data_available = header->codecHeader.VP9.ss_data_available; if (header->codecHeader.VP9.picture_id != kNoPictureId) { diff --git a/webrtc/modules/video_coding/main/source/generic_encoder.cc b/webrtc/modules/video_coding/main/source/generic_encoder.cc index de196040f0..c10c3ab9a5 100644 --- a/webrtc/modules/video_coding/main/source/generic_encoder.cc +++ b/webrtc/modules/video_coding/main/source/generic_encoder.cc @@ -54,11 +54,9 @@ void CopyCodecSpecific(const CodecSpecificInfo* info, RTPVideoHeader* rtp) { rtp->codecHeader.VP9.inter_layer_predicted = info->codecSpecific.VP9.inter_layer_predicted; rtp->codecHeader.VP9.gof_idx = info->codecSpecific.VP9.gof_idx; - - // Packetizer needs to know the number of spatial layers to correctly set - // the marker bit, even when the number won't be written in the packet. rtp->codecHeader.VP9.num_spatial_layers = info->codecSpecific.VP9.num_spatial_layers; + if (info->codecSpecific.VP9.ss_data_available) { rtp->codecHeader.VP9.spatial_layer_resolution_present = info->codecSpecific.VP9.spatial_layer_resolution_present; @@ -71,6 +69,10 @@ void CopyCodecSpecific(const CodecSpecificInfo* info, RTPVideoHeader* rtp) { } rtp->codecHeader.VP9.gof.CopyGofInfoVP9(info->codecSpecific.VP9.gof); } + + rtp->codecHeader.VP9.num_ref_pics = info->codecSpecific.VP9.num_ref_pics; + for (int i = 0; i < info->codecSpecific.VP9.num_ref_pics; ++i) + rtp->codecHeader.VP9.pid_diff[i] = info->codecSpecific.VP9.p_diff[i]; return; } case kVideoCodecH264: diff --git a/webrtc/modules/video_coding/main/source/jitter_buffer.cc b/webrtc/modules/video_coding/main/source/jitter_buffer.cc index a5e774b015..23fb3f9004 100644 --- a/webrtc/modules/video_coding/main/source/jitter_buffer.cc +++ b/webrtc/modules/video_coding/main/source/jitter_buffer.cc @@ -686,12 +686,6 @@ VCMFrameBufferEnum VCMJitterBuffer::InsertPacket(const VCMPacket& packet, num_consecutive_old_packets_ = 0; - if (packet.codec == kVideoCodecVP9 && - packet.codecSpecificHeader.codecHeader.VP9.flexible_mode) { - // TODO(asapersson): Add support for flexible mode. - return kGeneralError; - } - VCMFrameBuffer* frame; FrameList* frame_list; const VCMFrameBufferEnum error = GetFrame(packet, &frame, &frame_list); diff --git a/webrtc/video/full_stack.cc b/webrtc/video/full_stack.cc index 2810cd610e..66ec49c870 100644 --- a/webrtc/video/full_stack.cc +++ b/webrtc/video/full_stack.cc @@ -145,12 +145,15 @@ TEST_F(FullStackTest, ScreenshareSlidesVP8_2TL_Scroll) { RunTest(config); } -TEST_F(FullStackTest, ScreenshareSlidesVP9_2TL) { +TEST_F(FullStackTest, ScreenshareSlidesVP9_2SL) { VideoQualityTest::Params screenshare = { - {1850, 1110, 5, 50000, 200000, 2000000, "VP9", 2, 1, 400000}, + {1850, 1110, 5, 50000, 200000, 2000000, "VP9", 1, 0, 400000}, {}, {true, 10}, - {"screenshare_slides_vp9_2tl", 0.0, 0.0, kFullStackTestDurationSecs}}; + {"screenshare_slides_vp9_2tl", 0.0, 0.0, kFullStackTestDurationSecs}, + {}, + false, + {std::vector<VideoStream>(), 0, 2, 1}}; RunTest(screenshare); } } // namespace webrtc diff --git a/webrtc/video/video_send_stream.cc b/webrtc/video/video_send_stream.cc index 33e1f57575..fd0906d657 100644 --- a/webrtc/video/video_send_stream.cc +++ b/webrtc/video/video_send_stream.cc @@ -345,6 +345,12 @@ bool VideoSendStream::ReconfigureVideoEncoder( if (config.encoder_specific_settings != nullptr) { video_codec.codecSpecific.VP9 = *reinterpret_cast<const VideoCodecVP9*>( config.encoder_specific_settings); + if (video_codec.mode == kScreensharing) { + video_codec.codecSpecific.VP9.flexibleMode = true; + // For now VP9 screensharing use 1 temporal and 2 spatial layers. + RTC_DCHECK_EQ(video_codec.codecSpecific.VP9.numberOfTemporalLayers, 1); + RTC_DCHECK_EQ(video_codec.codecSpecific.VP9.numberOfSpatialLayers, 2); + } } video_codec.codecSpecific.VP9.numberOfTemporalLayers = static_cast<unsigned char>( diff --git a/webrtc/video/video_send_stream_tests.cc b/webrtc/video/video_send_stream_tests.cc index e19dc48fab..0f44c6fd52 100644 --- a/webrtc/video/video_send_stream_tests.cc +++ b/webrtc/video/video_send_stream_tests.cc @@ -1793,7 +1793,10 @@ class VP9HeaderObeserver : public test::SendTest { VP9HeaderObeserver() : SendTest(VideoSendStreamTest::kDefaultTimeoutMs), vp9_encoder_(VP9Encoder::Create()), - vp9_settings_(VideoEncoder::GetDefaultVp9Settings()) {} + vp9_settings_(VideoEncoder::GetDefaultVp9Settings()) { + vp9_settings_.numberOfTemporalLayers = 1; + vp9_settings_.numberOfSpatialLayers = 2; + } virtual void ModifyConfigsHook( VideoSendStream::Config* send_config, @@ -1809,6 +1812,7 @@ class VP9HeaderObeserver : public test::SendTest { std::vector<VideoReceiveStream::Config>* receive_configs, VideoEncoderConfig* encoder_config) override { encoder_config->encoder_specific_settings = &vp9_settings_; + encoder_config->content_type = VideoEncoderConfig::ContentType::kScreen; send_config->encoder_settings.encoder = vp9_encoder_.get(); send_config->encoder_settings.payload_name = "VP9"; send_config->encoder_settings.payload_type = kVp9PayloadType; @@ -1857,10 +1861,17 @@ class VP9HeaderObeserver : public test::SendTest { VideoCodecVP9 vp9_settings_; }; -TEST_F(VideoSendStreamTest, VP9NoFlexMode) { - class NoFlexibleMode : public VP9HeaderObeserver { +TEST_F(VideoSendStreamTest, DISABLED_VP9FlexMode) { + class FlexibleMode : public VP9HeaderObeserver { + void ModifyConfigsHook( + VideoSendStream::Config* send_config, + std::vector<VideoReceiveStream::Config>* receive_configs, + VideoEncoderConfig* encoder_config) override { + vp9_settings_.flexibleMode = true; + } + void InspectHeader(RTPVideoHeaderVP9* vp9videoHeader) override { - EXPECT_FALSE(vp9videoHeader->flexible_mode); + EXPECT_TRUE(vp9videoHeader->flexible_mode); observation_complete_->Set(); } } test; @@ -1868,7 +1879,7 @@ TEST_F(VideoSendStreamTest, VP9NoFlexMode) { RunBaseTest(&test, FakeNetworkPipe::Config()); } -TEST_F(VideoSendStreamTest, DISABLED_VP9FlexMode) { +TEST_F(VideoSendStreamTest, VP9FlexModeHasPictureId) { class FlexibleMode : public VP9HeaderObeserver { void ModifyConfigsHook( VideoSendStream::Config* send_config, @@ -1878,9 +1889,51 @@ TEST_F(VideoSendStreamTest, DISABLED_VP9FlexMode) { } void InspectHeader(RTPVideoHeaderVP9* vp9videoHeader) override { - EXPECT_TRUE(vp9videoHeader->flexible_mode); + EXPECT_NE(vp9videoHeader->picture_id, kNoPictureId); observation_complete_->Set(); } + } test; + + RunBaseTest(&test, FakeNetworkPipe::Config()); +} + +TEST_F(VideoSendStreamTest, VP9FlexModeRefCount) { + class FlexibleMode : public VP9HeaderObeserver { + void ModifyConfigsHook( + VideoSendStream::Config* send_config, + std::vector<VideoReceiveStream::Config>* receive_configs, + VideoEncoderConfig* encoder_config) override { + vp9_settings_.flexibleMode = true; + } + + void InspectHeader(RTPVideoHeaderVP9* vp9videoHeader) override { + EXPECT_TRUE(vp9videoHeader->flexible_mode); + if (vp9videoHeader->inter_pic_predicted) { + EXPECT_GT(vp9videoHeader->num_ref_pics, 0u); + observation_complete_->Set(); + } + } + } test; + + RunBaseTest(&test, FakeNetworkPipe::Config()); +} + +TEST_F(VideoSendStreamTest, VP9FlexModeRefs) { + class FlexibleMode : public VP9HeaderObeserver { + void ModifyConfigsHook( + VideoSendStream::Config* send_config, + std::vector<VideoReceiveStream::Config>* receive_configs, + VideoEncoderConfig* encoder_config) override { + vp9_settings_.flexibleMode = true; + } + + void InspectHeader(RTPVideoHeaderVP9* vp9videoHeader) override { + EXPECT_TRUE(vp9videoHeader->flexible_mode); + if (vp9videoHeader->inter_pic_predicted) { + EXPECT_GT(vp9videoHeader->num_ref_pics, 0u); + observation_complete_->Set(); + } + } } test; |