/* * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include "testing/gtest/include/gtest/gtest.h" #include "webrtc/video_engine/stream_synchronization.h" namespace webrtc { // These correspond to the same constants defined in vie_sync_module.cc. enum { kMaxVideoDiffMs = 80 }; enum { kMaxAudioDiffMs = 80 }; enum { kMaxDelay = 1500 }; // Test constants. enum { kDefaultAudioFrequency = 8000 }; enum { kDefaultVideoFrequency = 90000 }; const double kNtpFracPerMs = 4.294967296E6; static const int kSmoothingFilter = 4 * 2; class Time { public: explicit Time(int64_t offset) : kNtpJan1970(2208988800UL), time_now_ms_(offset) {} RtcpMeasurement GenerateRtcp(int frequency, uint32_t offset) const { RtcpMeasurement rtcp; NowNtp(&rtcp.ntp_secs, &rtcp.ntp_frac); rtcp.rtp_timestamp = NowRtp(frequency, offset); return rtcp; } void NowNtp(uint32_t* ntp_secs, uint32_t* ntp_frac) const { *ntp_secs = time_now_ms_ / 1000 + kNtpJan1970; int64_t remainder_ms = time_now_ms_ % 1000; *ntp_frac = static_cast( static_cast(remainder_ms) * kNtpFracPerMs + 0.5); } uint32_t NowRtp(int frequency, uint32_t offset) const { return frequency * time_now_ms_ / 1000 + offset; } void IncreaseTimeMs(int64_t inc) { time_now_ms_ += inc; } int64_t time_now_ms() const { return time_now_ms_; } private: // January 1970, in NTP seconds. const uint32_t kNtpJan1970; int64_t time_now_ms_; }; class StreamSynchronizationTest : public ::testing::Test { protected: virtual void SetUp() { sync_ = new StreamSynchronization(0, 0); send_time_ = new Time(kSendTimeOffsetMs); receive_time_ = new Time(kReceiveTimeOffsetMs); audio_clock_drift_ = 1.0; video_clock_drift_ = 1.0; } virtual void TearDown() { delete sync_; delete send_time_; delete receive_time_; } // Generates the necessary RTCP measurements and RTP timestamps and computes // the audio and video delays needed to get the two streams in sync. // |audio_delay_ms| and |video_delay_ms| are the number of milliseconds after // capture which the frames are rendered. // |current_audio_delay_ms| is the number of milliseconds which audio is // currently being delayed by the receiver. bool DelayedStreams(int audio_delay_ms, int video_delay_ms, int current_audio_delay_ms, int* extra_audio_delay_ms, int* total_video_delay_ms) { int audio_frequency = static_cast(kDefaultAudioFrequency * audio_clock_drift_ + 0.5); int audio_offset = 0; int video_frequency = static_cast(kDefaultVideoFrequency * video_clock_drift_ + 0.5); int video_offset = 0; StreamSynchronization::Measurements audio; StreamSynchronization::Measurements video; // Generate NTP/RTP timestamp pair for both streams corresponding to RTCP. audio.rtcp.push_front(send_time_->GenerateRtcp(audio_frequency, audio_offset)); send_time_->IncreaseTimeMs(100); receive_time_->IncreaseTimeMs(100); video.rtcp.push_front(send_time_->GenerateRtcp(video_frequency, video_offset)); send_time_->IncreaseTimeMs(900); receive_time_->IncreaseTimeMs(900); audio.rtcp.push_front(send_time_->GenerateRtcp(audio_frequency, audio_offset)); send_time_->IncreaseTimeMs(100); receive_time_->IncreaseTimeMs(100); video.rtcp.push_front(send_time_->GenerateRtcp(video_frequency, video_offset)); send_time_->IncreaseTimeMs(900); receive_time_->IncreaseTimeMs(900); // Capture an audio and a video frame at the same time. audio.latest_timestamp = send_time_->NowRtp(audio_frequency, audio_offset); video.latest_timestamp = send_time_->NowRtp(video_frequency, video_offset); if (audio_delay_ms > video_delay_ms) { // Audio later than video. receive_time_->IncreaseTimeMs(video_delay_ms); video.latest_receive_time_ms = receive_time_->time_now_ms(); receive_time_->IncreaseTimeMs(audio_delay_ms - video_delay_ms); audio.latest_receive_time_ms = receive_time_->time_now_ms(); } else { // Video later than audio. receive_time_->IncreaseTimeMs(audio_delay_ms); audio.latest_receive_time_ms = receive_time_->time_now_ms(); receive_time_->IncreaseTimeMs(video_delay_ms - audio_delay_ms); video.latest_receive_time_ms = receive_time_->time_now_ms(); } int relative_delay_ms; StreamSynchronization::ComputeRelativeDelay(audio, video, &relative_delay_ms); EXPECT_EQ(video_delay_ms - audio_delay_ms, relative_delay_ms); return sync_->ComputeDelays(relative_delay_ms, current_audio_delay_ms, extra_audio_delay_ms, total_video_delay_ms); } // Simulate audio playback 300 ms after capture and video rendering 100 ms // after capture. Verify that the correct extra delays are calculated for // audio and video, and that they change correctly when we simulate that // NetEQ or the VCM adds more delay to the streams. // TODO(holmer): This is currently wrong! We should simply change // audio_delay_ms or video_delay_ms since those now include VCM and NetEQ // delays. void BothDelayedAudioLaterTest(int base_target_delay) { int current_audio_delay_ms = base_target_delay; int audio_delay_ms = base_target_delay + 300; int video_delay_ms = base_target_delay + 100; int extra_audio_delay_ms = 0; int total_video_delay_ms = base_target_delay; int filtered_move = (audio_delay_ms - video_delay_ms) / kSmoothingFilter; const int kNeteqDelayIncrease = 50; const int kNeteqDelayDecrease = 10; EXPECT_TRUE(DelayedStreams(audio_delay_ms, video_delay_ms, current_audio_delay_ms, &extra_audio_delay_ms, &total_video_delay_ms)); EXPECT_EQ(base_target_delay + filtered_move, total_video_delay_ms); EXPECT_EQ(base_target_delay, extra_audio_delay_ms); current_audio_delay_ms = extra_audio_delay_ms; send_time_->IncreaseTimeMs(1000); receive_time_->IncreaseTimeMs(1000 - std::max(audio_delay_ms, video_delay_ms)); // Simulate base_target_delay minimum delay in the VCM. total_video_delay_ms = base_target_delay; EXPECT_TRUE(DelayedStreams(audio_delay_ms, video_delay_ms, current_audio_delay_ms, &extra_audio_delay_ms, &total_video_delay_ms)); EXPECT_EQ(base_target_delay + 2 * filtered_move, total_video_delay_ms); EXPECT_EQ(base_target_delay, extra_audio_delay_ms); current_audio_delay_ms = extra_audio_delay_ms; send_time_->IncreaseTimeMs(1000); receive_time_->IncreaseTimeMs(1000 - std::max(audio_delay_ms, video_delay_ms)); // Simulate base_target_delay minimum delay in the VCM. total_video_delay_ms = base_target_delay; EXPECT_TRUE(DelayedStreams(audio_delay_ms, video_delay_ms, current_audio_delay_ms, &extra_audio_delay_ms, &total_video_delay_ms)); EXPECT_EQ(base_target_delay + 3 * filtered_move, total_video_delay_ms); EXPECT_EQ(base_target_delay, extra_audio_delay_ms); // Simulate that NetEQ introduces some audio delay. current_audio_delay_ms = base_target_delay + kNeteqDelayIncrease; send_time_->IncreaseTimeMs(1000); receive_time_->IncreaseTimeMs(1000 - std::max(audio_delay_ms, video_delay_ms)); // Simulate base_target_delay minimum delay in the VCM. total_video_delay_ms = base_target_delay; EXPECT_TRUE(DelayedStreams(audio_delay_ms, video_delay_ms, current_audio_delay_ms, &extra_audio_delay_ms, &total_video_delay_ms)); filtered_move = 3 * filtered_move + (kNeteqDelayIncrease + audio_delay_ms - video_delay_ms) / kSmoothingFilter; EXPECT_EQ(base_target_delay + filtered_move, total_video_delay_ms); EXPECT_EQ(base_target_delay, extra_audio_delay_ms); // Simulate that NetEQ reduces its delay. current_audio_delay_ms = base_target_delay + kNeteqDelayDecrease; send_time_->IncreaseTimeMs(1000); receive_time_->IncreaseTimeMs(1000 - std::max(audio_delay_ms, video_delay_ms)); // Simulate base_target_delay minimum delay in the VCM. total_video_delay_ms = base_target_delay; EXPECT_TRUE(DelayedStreams(audio_delay_ms, video_delay_ms, current_audio_delay_ms, &extra_audio_delay_ms, &total_video_delay_ms)); filtered_move = filtered_move + (kNeteqDelayDecrease + audio_delay_ms - video_delay_ms) / kSmoothingFilter; EXPECT_EQ(base_target_delay + filtered_move, total_video_delay_ms); EXPECT_EQ(base_target_delay, extra_audio_delay_ms); } void BothDelayedVideoLaterTest(int base_target_delay) { int current_audio_delay_ms = base_target_delay; int audio_delay_ms = base_target_delay + 100; int video_delay_ms = base_target_delay + 300; int extra_audio_delay_ms = 0; int total_video_delay_ms = base_target_delay; EXPECT_TRUE(DelayedStreams(audio_delay_ms, video_delay_ms, current_audio_delay_ms, &extra_audio_delay_ms, &total_video_delay_ms)); EXPECT_EQ(base_target_delay, total_video_delay_ms); // The audio delay is not allowed to change more than this in 1 second. EXPECT_GE(base_target_delay + kMaxAudioDiffMs, extra_audio_delay_ms); current_audio_delay_ms = extra_audio_delay_ms; int current_extra_delay_ms = extra_audio_delay_ms; send_time_->IncreaseTimeMs(1000); receive_time_->IncreaseTimeMs(800); EXPECT_TRUE(DelayedStreams(audio_delay_ms, video_delay_ms, current_audio_delay_ms, &extra_audio_delay_ms, &total_video_delay_ms)); EXPECT_EQ(base_target_delay, total_video_delay_ms); // The audio delay is not allowed to change more than the half of the // required change in delay. EXPECT_EQ(current_extra_delay_ms + MaxAudioDelayIncrease( current_audio_delay_ms, base_target_delay + video_delay_ms - audio_delay_ms), extra_audio_delay_ms); current_audio_delay_ms = extra_audio_delay_ms; current_extra_delay_ms = extra_audio_delay_ms; send_time_->IncreaseTimeMs(1000); receive_time_->IncreaseTimeMs(800); EXPECT_TRUE(DelayedStreams(audio_delay_ms, video_delay_ms, current_audio_delay_ms, &extra_audio_delay_ms, &total_video_delay_ms)); EXPECT_EQ(base_target_delay, total_video_delay_ms); // The audio delay is not allowed to change more than the half of the // required change in delay. EXPECT_EQ(current_extra_delay_ms + MaxAudioDelayIncrease( current_audio_delay_ms, base_target_delay + video_delay_ms - audio_delay_ms), extra_audio_delay_ms); current_extra_delay_ms = extra_audio_delay_ms; // Simulate that NetEQ for some reason reduced the delay. current_audio_delay_ms = base_target_delay + 10; send_time_->IncreaseTimeMs(1000); receive_time_->IncreaseTimeMs(800); EXPECT_TRUE(DelayedStreams(audio_delay_ms, video_delay_ms, current_audio_delay_ms, &extra_audio_delay_ms, &total_video_delay_ms)); EXPECT_EQ(base_target_delay, total_video_delay_ms); // Since we only can ask NetEQ for a certain amount of extra delay, and // we only measure the total NetEQ delay, we will ask for additional delay // here to try to stay in sync. EXPECT_EQ(current_extra_delay_ms + MaxAudioDelayIncrease( current_audio_delay_ms, base_target_delay + video_delay_ms - audio_delay_ms), extra_audio_delay_ms); current_extra_delay_ms = extra_audio_delay_ms; // Simulate that NetEQ for some reason significantly increased the delay. current_audio_delay_ms = base_target_delay + 350; send_time_->IncreaseTimeMs(1000); receive_time_->IncreaseTimeMs(800); EXPECT_TRUE(DelayedStreams(audio_delay_ms, video_delay_ms, current_audio_delay_ms, &extra_audio_delay_ms, &total_video_delay_ms)); EXPECT_EQ(base_target_delay, total_video_delay_ms); // The audio delay is not allowed to change more than the half of the // required change in delay. EXPECT_EQ(current_extra_delay_ms + MaxAudioDelayIncrease( current_audio_delay_ms, base_target_delay + video_delay_ms - audio_delay_ms), extra_audio_delay_ms); } int MaxAudioDelayIncrease(int current_audio_delay_ms, int delay_ms) { return std::min((delay_ms - current_audio_delay_ms) / kSmoothingFilter, static_cast(kMaxAudioDiffMs)); } int MaxAudioDelayDecrease(int current_audio_delay_ms, int delay_ms) { return std::max((delay_ms - current_audio_delay_ms) / kSmoothingFilter, -kMaxAudioDiffMs); } enum { kSendTimeOffsetMs = 98765 }; enum { kReceiveTimeOffsetMs = 43210 }; StreamSynchronization* sync_; Time* send_time_; // The simulated clock at the sender. Time* receive_time_; // The simulated clock at the receiver. double audio_clock_drift_; double video_clock_drift_; }; TEST_F(StreamSynchronizationTest, NoDelay) { uint32_t current_audio_delay_ms = 0; int extra_audio_delay_ms = 0; int total_video_delay_ms = 0; EXPECT_FALSE(DelayedStreams(0, 0, current_audio_delay_ms, &extra_audio_delay_ms, &total_video_delay_ms)); EXPECT_EQ(0, extra_audio_delay_ms); EXPECT_EQ(0, total_video_delay_ms); } TEST_F(StreamSynchronizationTest, VideoDelay) { uint32_t current_audio_delay_ms = 0; int delay_ms = 200; int extra_audio_delay_ms = 0; int total_video_delay_ms = 0; EXPECT_TRUE(DelayedStreams(delay_ms, 0, current_audio_delay_ms, &extra_audio_delay_ms, &total_video_delay_ms)); EXPECT_EQ(0, extra_audio_delay_ms); // The video delay is not allowed to change more than this in 1 second. EXPECT_EQ(delay_ms / kSmoothingFilter, total_video_delay_ms); send_time_->IncreaseTimeMs(1000); receive_time_->IncreaseTimeMs(800); // Simulate 0 minimum delay in the VCM. total_video_delay_ms = 0; EXPECT_TRUE(DelayedStreams(delay_ms, 0, current_audio_delay_ms, &extra_audio_delay_ms, &total_video_delay_ms)); EXPECT_EQ(0, extra_audio_delay_ms); // The video delay is not allowed to change more than this in 1 second. EXPECT_EQ(2 * delay_ms / kSmoothingFilter, total_video_delay_ms); send_time_->IncreaseTimeMs(1000); receive_time_->IncreaseTimeMs(800); // Simulate 0 minimum delay in the VCM. total_video_delay_ms = 0; EXPECT_TRUE(DelayedStreams(delay_ms, 0, current_audio_delay_ms, &extra_audio_delay_ms, &total_video_delay_ms)); EXPECT_EQ(0, extra_audio_delay_ms); EXPECT_EQ(3 * delay_ms / kSmoothingFilter, total_video_delay_ms); } TEST_F(StreamSynchronizationTest, AudioDelay) { int current_audio_delay_ms = 0; int delay_ms = 200; int extra_audio_delay_ms = 0; int total_video_delay_ms = 0; EXPECT_TRUE(DelayedStreams(0, delay_ms, current_audio_delay_ms, &extra_audio_delay_ms, &total_video_delay_ms)); EXPECT_EQ(0, total_video_delay_ms); // The audio delay is not allowed to change more than this in 1 second. EXPECT_EQ(delay_ms / kSmoothingFilter, extra_audio_delay_ms); current_audio_delay_ms = extra_audio_delay_ms; int current_extra_delay_ms = extra_audio_delay_ms; send_time_->IncreaseTimeMs(1000); receive_time_->IncreaseTimeMs(800); EXPECT_TRUE(DelayedStreams(0, delay_ms, current_audio_delay_ms, &extra_audio_delay_ms, &total_video_delay_ms)); EXPECT_EQ(0, total_video_delay_ms); // The audio delay is not allowed to change more than the half of the required // change in delay. EXPECT_EQ(current_extra_delay_ms + MaxAudioDelayIncrease(current_audio_delay_ms, delay_ms), extra_audio_delay_ms); current_audio_delay_ms = extra_audio_delay_ms; current_extra_delay_ms = extra_audio_delay_ms; send_time_->IncreaseTimeMs(1000); receive_time_->IncreaseTimeMs(800); EXPECT_TRUE(DelayedStreams(0, delay_ms, current_audio_delay_ms, &extra_audio_delay_ms, &total_video_delay_ms)); EXPECT_EQ(0, total_video_delay_ms); // The audio delay is not allowed to change more than the half of the required // change in delay. EXPECT_EQ(current_extra_delay_ms + MaxAudioDelayIncrease(current_audio_delay_ms, delay_ms), extra_audio_delay_ms); current_extra_delay_ms = extra_audio_delay_ms; // Simulate that NetEQ for some reason reduced the delay. current_audio_delay_ms = 10; send_time_->IncreaseTimeMs(1000); receive_time_->IncreaseTimeMs(800); EXPECT_TRUE(DelayedStreams(0, delay_ms, current_audio_delay_ms, &extra_audio_delay_ms, &total_video_delay_ms)); EXPECT_EQ(0, total_video_delay_ms); // Since we only can ask NetEQ for a certain amount of extra delay, and // we only measure the total NetEQ delay, we will ask for additional delay // here to try to EXPECT_EQ(current_extra_delay_ms + MaxAudioDelayIncrease(current_audio_delay_ms, delay_ms), extra_audio_delay_ms); current_extra_delay_ms = extra_audio_delay_ms; // Simulate that NetEQ for some reason significantly increased the delay. current_audio_delay_ms = 350; send_time_->IncreaseTimeMs(1000); receive_time_->IncreaseTimeMs(800); EXPECT_TRUE(DelayedStreams(0, delay_ms, current_audio_delay_ms, &extra_audio_delay_ms, &total_video_delay_ms)); EXPECT_EQ(0, total_video_delay_ms); // The audio delay is not allowed to change more than the half of the required // change in delay. EXPECT_EQ(current_extra_delay_ms + MaxAudioDelayDecrease(current_audio_delay_ms, delay_ms), extra_audio_delay_ms); } TEST_F(StreamSynchronizationTest, BothDelayedVideoLater) { BothDelayedVideoLaterTest(0); } TEST_F(StreamSynchronizationTest, BothDelayedVideoLaterAudioClockDrift) { audio_clock_drift_ = 1.05; BothDelayedVideoLaterTest(0); } TEST_F(StreamSynchronizationTest, BothDelayedVideoLaterVideoClockDrift) { video_clock_drift_ = 1.05; BothDelayedVideoLaterTest(0); } TEST_F(StreamSynchronizationTest, BothDelayedAudioLater) { BothDelayedAudioLaterTest(0); } TEST_F(StreamSynchronizationTest, BothDelayedAudioClockDrift) { audio_clock_drift_ = 1.05; BothDelayedAudioLaterTest(0); } TEST_F(StreamSynchronizationTest, BothDelayedVideoClockDrift) { video_clock_drift_ = 1.05; BothDelayedAudioLaterTest(0); } TEST_F(StreamSynchronizationTest, BaseDelay) { int base_target_delay_ms = 2000; int current_audio_delay_ms = 2000; int extra_audio_delay_ms = 0; int total_video_delay_ms = base_target_delay_ms; sync_->SetTargetBufferingDelay(base_target_delay_ms); // We are in sync don't change. EXPECT_FALSE(DelayedStreams(base_target_delay_ms, base_target_delay_ms, current_audio_delay_ms, &extra_audio_delay_ms, &total_video_delay_ms)); // Triggering another call with the same values. Delay should not be modified. base_target_delay_ms = 2000; current_audio_delay_ms = base_target_delay_ms; total_video_delay_ms = base_target_delay_ms; sync_->SetTargetBufferingDelay(base_target_delay_ms); // We are in sync don't change. EXPECT_FALSE(DelayedStreams(base_target_delay_ms, base_target_delay_ms, current_audio_delay_ms, &extra_audio_delay_ms, &total_video_delay_ms)); // Changing delay value - intended to test this module only. In practice it // would take VoE time to adapt. base_target_delay_ms = 5000; current_audio_delay_ms = base_target_delay_ms; total_video_delay_ms = base_target_delay_ms; sync_->SetTargetBufferingDelay(base_target_delay_ms); // We are in sync don't change. EXPECT_FALSE(DelayedStreams(base_target_delay_ms, base_target_delay_ms, current_audio_delay_ms, &extra_audio_delay_ms, &total_video_delay_ms)); } TEST_F(StreamSynchronizationTest, BothDelayedAudioLaterWithBaseDelay) { int base_target_delay_ms = 3000; sync_->SetTargetBufferingDelay(base_target_delay_ms); BothDelayedAudioLaterTest(base_target_delay_ms); } TEST_F(StreamSynchronizationTest, BothDelayedAudioClockDriftWithBaseDelay) { int base_target_delay_ms = 3000; sync_->SetTargetBufferingDelay(base_target_delay_ms); audio_clock_drift_ = 1.05; BothDelayedAudioLaterTest(base_target_delay_ms); } TEST_F(StreamSynchronizationTest, BothDelayedVideoClockDriftWithBaseDelay) { int base_target_delay_ms = 3000; sync_->SetTargetBufferingDelay(base_target_delay_ms); video_clock_drift_ = 1.05; BothDelayedAudioLaterTest(base_target_delay_ms); } TEST_F(StreamSynchronizationTest, BothDelayedVideoLaterWithBaseDelay) { int base_target_delay_ms = 2000; sync_->SetTargetBufferingDelay(base_target_delay_ms); BothDelayedVideoLaterTest(base_target_delay_ms); } TEST_F(StreamSynchronizationTest, BothDelayedVideoLaterAudioClockDriftWithBaseDelay) { int base_target_delay_ms = 2000; audio_clock_drift_ = 1.05; sync_->SetTargetBufferingDelay(base_target_delay_ms); BothDelayedVideoLaterTest(base_target_delay_ms); } TEST_F(StreamSynchronizationTest, BothDelayedVideoLaterVideoClockDriftWithBaseDelay) { int base_target_delay_ms = 2000; video_clock_drift_ = 1.05; sync_->SetTargetBufferingDelay(base_target_delay_ms); BothDelayedVideoLaterTest(base_target_delay_ms); } } // namespace webrtc