diff options
author | wu@webrtc.org <wu@webrtc.org@4adac7df-926f-26a2-2b94-8c16560cd09d> | 2014-06-05 20:34:08 +0000 |
---|---|---|
committer | wu@webrtc.org <wu@webrtc.org@4adac7df-926f-26a2-2b94-8c16560cd09d> | 2014-06-05 20:34:08 +0000 |
commit | 94454b71adc37e15fd3f5a5fc432063f05caabcb (patch) | |
tree | c86ef909184b482c08033fd1755b74a7c6e1a96a | |
parent | 130fa64d4c726765c66879e440e27e7bda86508f (diff) | |
download | webrtc-94454b71adc37e15fd3f5a5fc432063f05caabcb.tar.gz |
Fix the chain that propagates the audio frame's rtp and ntp timestamp including:
* In AudioCodingModuleImpl::PlayoutData10Ms, don't reset the timestamp got from GetAudio.
* When there're more than one participant, set AudioFrame's RTP timestamp to 0.
* Copy ntp_time_ms_ in AudioFrame::CopyFrom method.
* In RemixAndResample, pass src frame's timestamp_ and ntp_time_ms_ to the dst frame.
* Fix how |elapsed_time_ms| is computed in channel.cc by adding GetPlayoutFrequency.
Tweaks on ntp_time_ms_:
* Init ntp_time_ms_ to -1 in AudioFrame ctor.
* When there're more than one participant, set AudioFrame's ntp_time_ms_ to an invalid value. I.e. we don't support ntp_time_ms_ in multiple participants case before the mixing is moved to chrome.
Added elapsed_time_ms to AudioFrame and pass it to chrome, where we don't have the information about the rtp timestmp's sample rate, i.e. can't convert rtp timestamp to ms.
BUG=3111
R=henrik.lundin@webrtc.org, turaj@webrtc.org, xians@webrtc.org
TBR=andrew
andrew to take another look on audio_conference_mixer_impl.cc
Review URL: https://webrtc-codereview.appspot.com/14559004
git-svn-id: http://webrtc.googlecode.com/svn/trunk@6346 4adac7df-926f-26a2-2b94-8c16560cd09d
26 files changed, 168 insertions, 100 deletions
diff --git a/talk/app/webrtc/test/fakeaudiocapturemodule.cc b/talk/app/webrtc/test/fakeaudiocapturemodule.cc index 72d39c9714..c22ed6f5d4 100644 --- a/talk/app/webrtc/test/fakeaudiocapturemodule.cc +++ b/talk/app/webrtc/test/fakeaudiocapturemodule.cc @@ -729,12 +729,12 @@ void FakeAudioCaptureModule::ReceiveFrameP() { ResetRecBuffer(); uint32_t nSamplesOut = 0; #ifdef USE_WEBRTC_DEV_BRANCH - uint32_t rtp_timestamp = 0; + int64_t elapsed_time_ms = 0; int64_t ntp_time_ms = 0; if (audio_callback_->NeedMorePlayData(kNumberSamples, kNumberBytesPerSample, kNumberOfChannels, kSamplesPerSecond, rec_buffer_, nSamplesOut, - &rtp_timestamp, &ntp_time_ms) != 0) { + &elapsed_time_ms, &ntp_time_ms) != 0) { ASSERT(false); } #else diff --git a/talk/app/webrtc/test/fakeaudiocapturemodule_unittest.cc b/talk/app/webrtc/test/fakeaudiocapturemodule_unittest.cc index ea92f7b0d6..ab0db06231 100644 --- a/talk/app/webrtc/test/fakeaudiocapturemodule_unittest.cc +++ b/talk/app/webrtc/test/fakeaudiocapturemodule_unittest.cc @@ -86,7 +86,7 @@ class FakeAdmTest : public testing::Test, void* audioSamples, #ifdef USE_WEBRTC_DEV_BRANCH uint32_t& nSamplesOut, - uint32_t* rtp_timestamp, + int64_t* elapsed_time_ms, int64_t* ntp_time_ms) { #else uint32_t& nSamplesOut) { @@ -98,7 +98,7 @@ class FakeAdmTest : public testing::Test, GenerateZeroBuffer(audioSamples, audio_buffer_size); nSamplesOut = bytes_out / nBytesPerSample; #ifdef USE_WEBRTC_DEV_BRANCH - *rtp_timestamp = 0; + *elapsed_time_ms = 0; *ntp_time_ms = 0; #endif return 0; diff --git a/webrtc/modules/audio_coding/main/acm2/acm_receiver.cc b/webrtc/modules/audio_coding/main/acm2/acm_receiver.cc index 613491a052..052bd4fccb 100644 --- a/webrtc/modules/audio_coding/main/acm2/acm_receiver.cc +++ b/webrtc/modules/audio_coding/main/acm2/acm_receiver.cc @@ -475,10 +475,17 @@ int AcmReceiver::GetAudio(int desired_freq_hz, AudioFrame* audio_frame) { call_stats_.DecodedByNetEq(audio_frame->speech_type_); // Computes the RTP timestamp of the first sample in |audio_frame| from - // |PlayoutTimestamp|, which is the timestamp of the last sample of + // |GetPlayoutTimestamp|, which is the timestamp of the last sample of // |audio_frame|. - audio_frame->timestamp_ = - PlayoutTimestamp() - audio_frame->samples_per_channel_; + uint32_t playout_timestamp = 0; + if (GetPlayoutTimestamp(&playout_timestamp)) { + audio_frame->timestamp_ = + playout_timestamp - audio_frame->samples_per_channel_; + } else { + // Remain 0 until we have a valid |playout_timestamp|. + audio_frame->timestamp_ = 0; + } + return 0; } @@ -596,13 +603,14 @@ void AcmReceiver::set_id(int id) { id_ = id; } -uint32_t AcmReceiver::PlayoutTimestamp() { +bool AcmReceiver::GetPlayoutTimestamp(uint32_t* timestamp) { if (av_sync_) { assert(initial_delay_manager_.get()); - if (initial_delay_manager_->buffering()) - return initial_delay_manager_->playout_timestamp(); + if (initial_delay_manager_->buffering()) { + return initial_delay_manager_->GetPlayoutTimestamp(timestamp); + } } - return neteq_->PlayoutTimestamp(); + return neteq_->GetPlayoutTimestamp(timestamp); } int AcmReceiver::last_audio_codec_id() const { diff --git a/webrtc/modules/audio_coding/main/acm2/acm_receiver.h b/webrtc/modules/audio_coding/main/acm2/acm_receiver.h index 7a238aec11..748744a4c1 100644 --- a/webrtc/modules/audio_coding/main/acm2/acm_receiver.h +++ b/webrtc/modules/audio_coding/main/acm2/acm_receiver.h @@ -242,9 +242,10 @@ class AcmReceiver { void set_id(int id); // TODO(turajs): can be inline. // - // Returns the RTP timestamp of the last sample delivered by GetAudio(). + // Gets the RTP timestamp of the last sample delivered by GetAudio(). + // Returns true if the RTP timestamp is valid, otherwise false. // - uint32_t PlayoutTimestamp(); + bool GetPlayoutTimestamp(uint32_t* timestamp); // // Return the index of the codec associated with the last non-CNG/non-DTMF diff --git a/webrtc/modules/audio_coding/main/acm2/audio_coding_module_impl.cc b/webrtc/modules/audio_coding/main/acm2/audio_coding_module_impl.cc index 7f6c84051e..a07e854334 100644 --- a/webrtc/modules/audio_coding/main/acm2/audio_coding_module_impl.cc +++ b/webrtc/modules/audio_coding/main/acm2/audio_coding_module_impl.cc @@ -1776,7 +1776,6 @@ int AudioCodingModuleImpl::PlayoutData10Ms(int desired_freq_hz, } audio_frame->id_ = id_; - audio_frame->timestamp_ = 0; return 0; } @@ -1917,8 +1916,7 @@ int AudioCodingModuleImpl::ConfigISACBandwidthEstimator( } int AudioCodingModuleImpl::PlayoutTimestamp(uint32_t* timestamp) { - *timestamp = receiver_.PlayoutTimestamp(); - return 0; + return receiver_.GetPlayoutTimestamp(timestamp) ? 0 : -1; } bool AudioCodingModuleImpl::HaveValidEncoder(const char* caller_name) const { diff --git a/webrtc/modules/audio_coding/main/acm2/initial_delay_manager.cc b/webrtc/modules/audio_coding/main/acm2/initial_delay_manager.cc index c2b218cb6c..786fb2e527 100644 --- a/webrtc/modules/audio_coding/main/acm2/initial_delay_manager.cc +++ b/webrtc/modules/audio_coding/main/acm2/initial_delay_manager.cc @@ -219,6 +219,14 @@ void InitialDelayManager::LatePackets( return; } +bool InitialDelayManager::GetPlayoutTimestamp(uint32_t* playout_timestamp) { + if (!buffering_) { + return false; + } + *playout_timestamp = playout_timestamp_; + return true; +} + void InitialDelayManager::DisableBuffering() { buffering_ = false; } diff --git a/webrtc/modules/audio_coding/main/acm2/initial_delay_manager.h b/webrtc/modules/audio_coding/main/acm2/initial_delay_manager.h index 3c5ba3c013..6edc115084 100644 --- a/webrtc/modules/audio_coding/main/acm2/initial_delay_manager.h +++ b/webrtc/modules/audio_coding/main/acm2/initial_delay_manager.h @@ -65,8 +65,9 @@ class InitialDelayManager { // sequence of late (or perhaps missing) packets is computed. void LatePackets(uint32_t timestamp_now, SyncStream* sync_stream); - // Playout timestamp, valid when buffering. - uint32_t playout_timestamp() { return playout_timestamp_; } + // Get playout timestamp. + // Returns true if the timestamp is valid (when buffering), otherwise false. + bool GetPlayoutTimestamp(uint32_t* playout_timestamp); // True if buffered audio is less than the given initial delay (specified at // the constructor). Buffering might be disabled by the client of this class. diff --git a/webrtc/modules/audio_coding/main/acm2/initial_delay_manager_unittest.cc b/webrtc/modules/audio_coding/main/acm2/initial_delay_manager_unittest.cc index 15e88a5391..38b7cfc271 100644 --- a/webrtc/modules/audio_coding/main/acm2/initial_delay_manager_unittest.cc +++ b/webrtc/modules/audio_coding/main/acm2/initial_delay_manager_unittest.cc @@ -359,7 +359,9 @@ TEST_F(InitialDelayManagerTest, BufferingAudio) { EXPECT_TRUE(manager_->buffering()); const uint32_t expected_playout_timestamp = rtp_info_.header.timestamp - kInitDelayMs * kSamplingRateHz / 1000; - EXPECT_EQ(expected_playout_timestamp, manager_->playout_timestamp()); + uint32_t actual_playout_timestamp = 0; + EXPECT_TRUE(manager_->GetPlayoutTimestamp(&actual_playout_timestamp)); + EXPECT_EQ(expected_playout_timestamp, actual_playout_timestamp); NextRtpHeader(&rtp_info_, &rtp_receive_timestamp_); } diff --git a/webrtc/modules/audio_coding/neteq4/interface/neteq.h b/webrtc/modules/audio_coding/neteq4/interface/neteq.h index 763da31b82..79a5dfb0b1 100644 --- a/webrtc/modules/audio_coding/neteq4/interface/neteq.h +++ b/webrtc/modules/audio_coding/neteq4/interface/neteq.h @@ -228,8 +228,9 @@ class NetEq { // Disables post-decode VAD. virtual void DisableVad() = 0; - // Returns the RTP timestamp for the last sample delivered by GetAudio(). - virtual uint32_t PlayoutTimestamp() = 0; + // Gets the RTP timestamp for the last sample delivered by GetAudio(). + // Returns true if the RTP timestamp is valid, otherwise false. + virtual bool GetPlayoutTimestamp(uint32_t* timestamp) = 0; // Not implemented. virtual int SetTargetNumberOfChannels() = 0; diff --git a/webrtc/modules/audio_coding/neteq4/neteq_impl.cc b/webrtc/modules/audio_coding/neteq4/neteq_impl.cc index 963a820610..f860766085 100644 --- a/webrtc/modules/audio_coding/neteq4/neteq_impl.cc +++ b/webrtc/modules/audio_coding/neteq4/neteq_impl.cc @@ -335,9 +335,15 @@ void NetEqImpl::DisableVad() { vad_->Disable(); } -uint32_t NetEqImpl::PlayoutTimestamp() { +bool NetEqImpl::GetPlayoutTimestamp(uint32_t* timestamp) { CriticalSectionScoped lock(crit_sect_.get()); - return timestamp_scaler_->ToExternal(playout_timestamp_); + if (first_packet_) { + // We don't have a valid RTP timestamp until we have decoded our first + // RTP packet. + return false; + } + *timestamp = timestamp_scaler_->ToExternal(playout_timestamp_); + return true; } int NetEqImpl::LastError() { diff --git a/webrtc/modules/audio_coding/neteq4/neteq_impl.h b/webrtc/modules/audio_coding/neteq4/neteq_impl.h index 751de66dd9..822a523a62 100644 --- a/webrtc/modules/audio_coding/neteq4/neteq_impl.h +++ b/webrtc/modules/audio_coding/neteq4/neteq_impl.h @@ -166,8 +166,7 @@ class NetEqImpl : public webrtc::NetEq { // Disables post-decode VAD. virtual void DisableVad(); - // Returns the RTP timestamp for the last sample delivered by GetAudio(). - virtual uint32_t PlayoutTimestamp(); + virtual bool GetPlayoutTimestamp(uint32_t* timestamp); virtual int SetTargetNumberOfChannels() { return kNotImplemented; } diff --git a/webrtc/modules/audio_coding/neteq4/neteq_impl_unittest.cc b/webrtc/modules/audio_coding/neteq4/neteq_impl_unittest.cc index aedd8d5a1f..26279aa93a 100644 --- a/webrtc/modules/audio_coding/neteq4/neteq_impl_unittest.cc +++ b/webrtc/modules/audio_coding/neteq4/neteq_impl_unittest.cc @@ -477,8 +477,10 @@ TEST_F(NetEqImplTest, VerifyTimestampPropagation) { // The value of the last of the output samples is the same as the number of // samples played from the decoded packet. Thus, this number + the RTP // timestamp should match the playout timestamp. + uint32_t timestamp = 0; + EXPECT_TRUE(neteq_->GetPlayoutTimestamp(×tamp)); EXPECT_EQ(rtp_header.header.timestamp + output[samples_per_channel - 1], - neteq_->PlayoutTimestamp()); + timestamp); // Check the timestamp for the last value in the sync buffer. This should // be one full frame length ahead of the RTP timestamp. diff --git a/webrtc/modules/audio_coding/neteq4/neteq_unittest.cc b/webrtc/modules/audio_coding/neteq4/neteq_unittest.cc index f66a3cfae5..c1a7e16526 100644 --- a/webrtc/modules/audio_coding/neteq4/neteq_unittest.cc +++ b/webrtc/modules/audio_coding/neteq4/neteq_unittest.cc @@ -228,6 +228,8 @@ class NetEqDecodingTest : public ::testing::Test { void DuplicateCng(); + uint32_t PlayoutTimestamp(); + NetEq* neteq_; FILE* rtp_fp_; unsigned int sim_clock_; @@ -736,7 +738,7 @@ void NetEqDecodingTest::LongCngWithClockDrift(double drift_factor, } EXPECT_EQ(kOutputNormal, type); - int32_t delay_before = timestamp - neteq_->PlayoutTimestamp(); + int32_t delay_before = timestamp - PlayoutTimestamp(); // Insert CNG for 1 minute (= 60000 ms). const int kCngPeriodMs = 100; @@ -829,7 +831,7 @@ void NetEqDecodingTest::LongCngWithClockDrift(double drift_factor, // Check that the speech starts again within reasonable time. double time_until_speech_returns_ms = t_ms - speech_restart_time_ms; EXPECT_LT(time_until_speech_returns_ms, max_time_to_speech_ms); - int32_t delay_after = timestamp - neteq_->PlayoutTimestamp(); + int32_t delay_after = timestamp - PlayoutTimestamp(); // Compare delay before and after, and make sure it differs less than 20 ms. EXPECT_LE(delay_after, delay_before + delay_tolerance_ms * 16); EXPECT_GE(delay_after, delay_before - delay_tolerance_ms * 16); @@ -1310,7 +1312,7 @@ void NetEqDecodingTest::WrapTest(uint16_t start_seq_no, ASSERT_EQ(1, num_channels); // Expect delay (in samples) to be less than 2 packets. - EXPECT_LE(timestamp - neteq_->PlayoutTimestamp(), + EXPECT_LE(timestamp - PlayoutTimestamp(), static_cast<uint32_t>(kSamples * 2)); } // Make sure we have actually tested wrap-around. @@ -1391,7 +1393,7 @@ void NetEqDecodingTest::DuplicateCng() { kMaxBlockSize, out_data_, &out_len, &num_channels, &type)); ASSERT_EQ(kBlockSize16kHz, out_len); EXPECT_EQ(kOutputCNG, type); - EXPECT_EQ(timestamp - algorithmic_delay_samples, neteq_->PlayoutTimestamp()); + EXPECT_EQ(timestamp - algorithmic_delay_samples, PlayoutTimestamp()); // Insert the same CNG packet again. Note that at this point it is old, since // we have already decoded the first copy of it. @@ -1406,7 +1408,7 @@ void NetEqDecodingTest::DuplicateCng() { ASSERT_EQ(kBlockSize16kHz, out_len); EXPECT_EQ(kOutputCNG, type); EXPECT_EQ(timestamp - algorithmic_delay_samples, - neteq_->PlayoutTimestamp()); + PlayoutTimestamp()); } // Insert speech again. @@ -1422,7 +1424,13 @@ void NetEqDecodingTest::DuplicateCng() { ASSERT_EQ(kBlockSize16kHz, out_len); EXPECT_EQ(kOutputNormal, type); EXPECT_EQ(timestamp + kSamples - algorithmic_delay_samples, - neteq_->PlayoutTimestamp()); + PlayoutTimestamp()); +} + +uint32_t NetEqDecodingTest::PlayoutTimestamp() { + uint32_t playout_timestamp = 0; + EXPECT_TRUE(neteq_->GetPlayoutTimestamp(&playout_timestamp)); + return playout_timestamp; } TEST_F(NetEqDecodingTest, DiscardDuplicateCng) { DuplicateCng(); } diff --git a/webrtc/modules/audio_conference_mixer/source/audio_conference_mixer_impl.cc b/webrtc/modules/audio_conference_mixer/source/audio_conference_mixer_impl.cc index f3883c0b58..26ef3e881f 100644 --- a/webrtc/modules/audio_conference_mixer/source/audio_conference_mixer_impl.cc +++ b/webrtc/modules/audio_conference_mixer/source/audio_conference_mixer_impl.cc @@ -651,6 +651,11 @@ void AudioConferenceMixerImpl::UpdateToMix( _audioFramePool->PushMemory(audioFrame); continue; } + if (_participantList.size() != 1) { + // TODO(wu): Issue 3390, add support for multiple participants case. + audioFrame->ntp_time_ms_ = -1; + } + // TODO(henrike): this assert triggers in some test cases where SRTP is // used which prevents NetEQ from making a VAD. Temporarily disable this // assert until the problem is fixed on a higher level. @@ -950,6 +955,16 @@ int32_t AudioConferenceMixerImpl::MixFromList( return 0; } + if (audioFrameList->size() == 1) { + mixedAudio.timestamp_ = audioFrameList->front()->timestamp_; + mixedAudio.elapsed_time_ms_ = audioFrameList->front()->elapsed_time_ms_; + } else { + // TODO(wu): Issue 3390. + // Audio frame timestamp is only supported in one channel case. + mixedAudio.timestamp_ = 0; + mixedAudio.elapsed_time_ms_ = -1; + } + for (AudioFrameList::const_iterator iter = audioFrameList->begin(); iter != audioFrameList->end(); ++iter) { diff --git a/webrtc/modules/audio_device/audio_device_buffer.cc b/webrtc/modules/audio_device/audio_device_buffer.cc index ed1bf2020b..42fdaad22c 100644 --- a/webrtc/modules/audio_device/audio_device_buffer.cc +++ b/webrtc/modules/audio_device/audio_device_buffer.cc @@ -548,15 +548,15 @@ int32_t AudioDeviceBuffer::RequestPlayoutData(uint32_t nSamples) if (_ptrCbAudioTransport) { uint32_t res(0); - uint32_t rtp_timestamp = 0; - int64_t ntp_time_ms = 0; + int64_t elapsed_time_ms = -1; + int64_t ntp_time_ms = -1; res = _ptrCbAudioTransport->NeedMorePlayData(_playSamples, playBytesPerSample, playChannels, playSampleRate, &_playBuffer[0], nSamplesOut, - &rtp_timestamp, + &elapsed_time_ms, &ntp_time_ms); if (res != 0) { diff --git a/webrtc/modules/audio_device/include/audio_device_defines.h b/webrtc/modules/audio_device/include/audio_device_defines.h index f65e3a8ec3..56a584ef9e 100644 --- a/webrtc/modules/audio_device/include/audio_device_defines.h +++ b/webrtc/modules/audio_device/include/audio_device_defines.h @@ -71,7 +71,7 @@ public: const uint32_t samplesPerSec, void* audioSamples, uint32_t& nSamplesOut, - uint32_t* rtp_timestamp, + int64_t* elapsed_time_ms, int64_t* ntp_time_ms) = 0; // Method to pass captured data directly and unmixed to network channels. @@ -128,7 +128,7 @@ public: virtual void PullRenderData(int bits_per_sample, int sample_rate, int number_of_channels, int number_of_frames, void* audio_data, - uint32_t* rtp_timestamp, + int64_t* elapsed_time_ms, int64_t* ntp_time_ms) {} protected: diff --git a/webrtc/modules/audio_device/test/audio_device_test_api.cc b/webrtc/modules/audio_device/test/audio_device_test_api.cc index b10accb753..011fc1033c 100644 --- a/webrtc/modules/audio_device/test/audio_device_test_api.cc +++ b/webrtc/modules/audio_device/test/audio_device_test_api.cc @@ -117,7 +117,7 @@ class AudioTransportAPI: public AudioTransport { const uint32_t sampleRate, void* audioSamples, uint32_t& nSamplesOut, - uint32_t* rtp_timestamp, + int64_t* elapsed_time_ms, int64_t* ntp_time_ms) { play_count_++; if (play_count_ % 100 == 0) { @@ -152,7 +152,7 @@ class AudioTransportAPI: public AudioTransport { virtual void PullRenderData(int bits_per_sample, int sample_rate, int number_of_channels, int number_of_frames, void* audio_data, - uint32_t* rtp_timestamp, + int64_t* elapsed_time_ms, int64_t* ntp_time_ms) {} private: uint32_t rec_count_; diff --git a/webrtc/modules/audio_device/test/func_test_manager.cc b/webrtc/modules/audio_device/test/func_test_manager.cc index a51ebfba2c..2a1928775c 100644 --- a/webrtc/modules/audio_device/test/func_test_manager.cc +++ b/webrtc/modules/audio_device/test/func_test_manager.cc @@ -293,7 +293,7 @@ int32_t AudioTransportImpl::NeedMorePlayData( const uint32_t samplesPerSec, void* audioSamples, uint32_t& nSamplesOut, - uint32_t* rtp_timestamp, + int64_t* elapsed_time_ms, int64_t* ntp_time_ms) { if (_fullDuplex) @@ -554,7 +554,7 @@ void AudioTransportImpl::PullRenderData(int bits_per_sample, int sample_rate, int number_of_channels, int number_of_frames, void* audio_data, - uint32_t* rtp_timestamp, + int64_t* elapsed_time_ms, int64_t* ntp_time_ms) {} FuncTestManager::FuncTestManager() : diff --git a/webrtc/modules/audio_device/test/func_test_manager.h b/webrtc/modules/audio_device/test/func_test_manager.h index 1a1c2a5a4f..5cb4f46102 100644 --- a/webrtc/modules/audio_device/test/func_test_manager.h +++ b/webrtc/modules/audio_device/test/func_test_manager.h @@ -119,7 +119,7 @@ public: const uint32_t samplesPerSec, void* audioSamples, uint32_t& nSamplesOut, - uint32_t* rtp_timestamp, + int64_t* elapsed_time_ms, int64_t* ntp_time_ms); virtual int OnDataAvailable(const int voe_channels[], @@ -141,7 +141,7 @@ public: virtual void PullRenderData(int bits_per_sample, int sample_rate, int number_of_channels, int number_of_frames, void* audio_data, - uint32_t* rtp_timestamp, + int64_t* elapsed_time_ms, int64_t* ntp_time_ms); AudioTransportImpl(AudioDeviceModule* audioDevice); diff --git a/webrtc/modules/interface/module_common_types.h b/webrtc/modules/interface/module_common_types.h index 9d00de773f..2c94707104 100644 --- a/webrtc/modules/interface/module_common_types.h +++ b/webrtc/modules/interface/module_common_types.h @@ -690,6 +690,9 @@ class AudioFrame { int id_; // RTP timestamp of the first sample in the AudioFrame. uint32_t timestamp_; + // Time since the first frame in milliseconds. + // -1 represents an uninitialized value. + int64_t elapsed_time_ms_; // NTP time of the estimated capture time in local timebase in milliseconds. // -1 represents an uninitialized value. int64_t ntp_time_ms_; @@ -720,6 +723,7 @@ inline void AudioFrame::Reset() { // TODO(wu): Zero is a valid value for |timestamp_|. We should initialize // to an invalid value, or add a new member to indicate invalidity. timestamp_ = 0; + elapsed_time_ms_ = -1; ntp_time_ms_ = -1; samples_per_channel_ = 0; sample_rate_hz_ = 0; @@ -759,6 +763,8 @@ inline void AudioFrame::CopyFrom(const AudioFrame& src) { id_ = src.id_; timestamp_ = src.timestamp_; + elapsed_time_ms_ = src.elapsed_time_ms_; + ntp_time_ms_ = src.ntp_time_ms_; samples_per_channel_ = src.samples_per_channel_; sample_rate_hz_ = src.sample_rate_hz_; speech_type_ = src.speech_type_; diff --git a/webrtc/test/fake_audio_device.cc b/webrtc/test/fake_audio_device.cc index d3421ebd64..989c12b7fd 100644 --- a/webrtc/test/fake_audio_device.cc +++ b/webrtc/test/fake_audio_device.cc @@ -121,8 +121,8 @@ void FakeAudioDevice::CaptureAudio() { samples_needed = std::min(kFrequencyHz / time_since_last_playout_ms, kBufferSizeBytes / 2); uint32_t samples_out = 0; - uint32_t rtp_timestamp = 0; - int64_t ntp_time_ms = 0; + int64_t elapsed_time_ms = -1; + int64_t ntp_time_ms = -1; EXPECT_EQ(0, audio_callback_->NeedMorePlayData(samples_needed, 2, @@ -130,7 +130,7 @@ void FakeAudioDevice::CaptureAudio() { kFrequencyHz, playout_buffer_, samples_out, - &rtp_timestamp, + &elapsed_time_ms, &ntp_time_ms)); } } diff --git a/webrtc/voice_engine/channel.cc b/webrtc/voice_engine/channel.cc index 12e66ef3cb..f99e590ae4 100644 --- a/webrtc/voice_engine/channel.cc +++ b/webrtc/voice_engine/channel.cc @@ -10,6 +10,7 @@ #include "webrtc/voice_engine/channel.h" +#include "webrtc/base/timeutils.h" #include "webrtc/common.h" #include "webrtc/modules/audio_device/include/audio_device.h" #include "webrtc/modules/audio_processing/include/audio_processing.h" @@ -683,21 +684,30 @@ int32_t Channel::GetAudioFrame(int32_t id, AudioFrame& audioFrame) // Measure audio level (0-9) _outputAudioLevel.ComputeLevel(audioFrame); - audioFrame.ntp_time_ms_ = ntp_estimator_->Estimate(audioFrame.timestamp_); - - if (!first_frame_arrived_) { - first_frame_arrived_ = true; + if (capture_start_rtp_time_stamp_ < 0 && audioFrame.timestamp_ != 0) { + // The first frame with a valid rtp timestamp. capture_start_rtp_time_stamp_ = audioFrame.timestamp_; - } else { + } + + if (capture_start_rtp_time_stamp_ >= 0) { + // audioFrame.timestamp_ should be valid from now on. + + // Compute elapsed time. + int64_t unwrap_timestamp = + rtp_ts_wraparound_handler_->Unwrap(audioFrame.timestamp_); + audioFrame.elapsed_time_ms_ = + (unwrap_timestamp - capture_start_rtp_time_stamp_) / + (GetPlayoutFrequency() / 1000); + + // Compute ntp time. + audioFrame.ntp_time_ms_ = ntp_estimator_->Estimate(audioFrame.timestamp_); // |ntp_time_ms_| won't be valid until at least 2 RTCP SRs are received. if (audioFrame.ntp_time_ms_ > 0) { // Compute |capture_start_ntp_time_ms_| so that - // |capture_start_ntp_time_ms_| + |elapsed_time_ms| == |ntp_time_ms_| + // |capture_start_ntp_time_ms_| + |elapsed_time_ms_| == |ntp_time_ms_| CriticalSectionScoped lock(ts_stats_lock_.get()); - uint32_t elapsed_time_ms = - (audioFrame.timestamp_ - capture_start_rtp_time_stamp_) / - (audioFrame.sample_rate_hz_ * 1000); - capture_start_ntp_time_ms_ = audioFrame.ntp_time_ms_ - elapsed_time_ms; + capture_start_ntp_time_ms_ = + audioFrame.ntp_time_ms_ - audioFrame.elapsed_time_ms_; } } @@ -875,8 +885,8 @@ Channel::Channel(int32_t channelId, _numberOfDiscardedPackets(0), send_sequence_number_(0), ts_stats_lock_(CriticalSectionWrapper::CreateCriticalSection()), - first_frame_arrived_(false), - capture_start_rtp_time_stamp_(0), + rtp_ts_wraparound_handler_(new rtc::TimestampWrapAroundHandler()), + capture_start_rtp_time_stamp_(-1), capture_start_ntp_time_ms_(-1), _engineStatisticsPtr(NULL), _outputMixerPtr(NULL), @@ -4045,20 +4055,10 @@ void Channel::UpdatePlayoutTimestamp(bool rtcp) { return; } - int32_t playout_frequency = audio_coding_->PlayoutFrequency(); - CodecInst current_recive_codec; - if (audio_coding_->ReceiveCodec(¤t_recive_codec) == 0) { - if (STR_CASE_CMP("G722", current_recive_codec.plname) == 0) { - playout_frequency = 8000; - } else if (STR_CASE_CMP("opus", current_recive_codec.plname) == 0) { - playout_frequency = 48000; - } - } - jitter_buffer_playout_timestamp_ = playout_timestamp; // Remove the playout delay. - playout_timestamp -= (delay_ms * (playout_frequency / 1000)); + playout_timestamp -= (delay_ms * (GetPlayoutFrequency() / 1000)); WEBRTC_TRACE(kTraceStream, kTraceVoice, VoEId(_instanceId,_channelId), "Channel::UpdatePlayoutTimestamp() => playoutTimestamp = %lu", @@ -4364,30 +4364,11 @@ void Channel::UpdatePacketDelay(uint32_t rtp_timestamp, rtp_timestamp, sequence_number); // Get frequency of last received payload - int rtp_receive_frequency = audio_coding_->ReceiveFrequency(); - - CodecInst current_receive_codec; - if (audio_coding_->ReceiveCodec(¤t_receive_codec) != 0) { - return; - } + int rtp_receive_frequency = GetPlayoutFrequency(); // Update the least required delay. least_required_delay_ms_ = audio_coding_->LeastRequiredDelayMs(); - if (STR_CASE_CMP("G722", current_receive_codec.plname) == 0) { - // Even though the actual sampling rate for G.722 audio is - // 16,000 Hz, the RTP clock rate for the G722 payload format is - // 8,000 Hz because that value was erroneously assigned in - // RFC 1890 and must remain unchanged for backward compatibility. - rtp_receive_frequency = 8000; - } else if (STR_CASE_CMP("opus", current_receive_codec.plname) == 0) { - // We are resampling Opus internally to 32,000 Hz until all our - // DSP routines can operate at 48,000 Hz, but the RTP clock - // rate for the Opus payload format is standardized to 48,000 Hz, - // because that is the maximum supported decoding sampling rate. - rtp_receive_frequency = 48000; - } - // |jitter_buffer_playout_timestamp_| updated in UpdatePlayoutTimestamp for // every incoming packet. uint32_t timestamp_diff_ms = (rtp_timestamp - @@ -4560,5 +4541,26 @@ int Channel::SetSendRtpHeaderExtension(bool enable, RTPExtensionType type, return error; } +int32_t Channel::GetPlayoutFrequency() { + int32_t playout_frequency = audio_coding_->PlayoutFrequency(); + CodecInst current_recive_codec; + if (audio_coding_->ReceiveCodec(¤t_recive_codec) == 0) { + if (STR_CASE_CMP("G722", current_recive_codec.plname) == 0) { + // Even though the actual sampling rate for G.722 audio is + // 16,000 Hz, the RTP clock rate for the G722 payload format is + // 8,000 Hz because that value was erroneously assigned in + // RFC 1890 and must remain unchanged for backward compatibility. + playout_frequency = 8000; + } else if (STR_CASE_CMP("opus", current_recive_codec.plname) == 0) { + // We are resampling Opus internally to 32,000 Hz until all our + // DSP routines can operate at 48,000 Hz, but the RTP clock + // rate for the Opus payload format is standardized to 48,000 Hz, + // because that is the maximum supported decoding sampling rate. + playout_frequency = 48000; + } + } + return playout_frequency; +} + } // namespace voe } // namespace webrtc diff --git a/webrtc/voice_engine/channel.h b/webrtc/voice_engine/channel.h index 7c8a3e2e72..3c49d9bd29 100644 --- a/webrtc/voice_engine/channel.h +++ b/webrtc/voice_engine/channel.h @@ -35,6 +35,11 @@ #include "webrtc/voice_engine/include/voe_dtmf.h" #endif +namespace rtc { + +class TimestampWrapAroundHandler; +} + namespace webrtc { class AudioDeviceModule; @@ -500,6 +505,8 @@ private: int SetSendRtpHeaderExtension(bool enable, RTPExtensionType type, unsigned char id); + int32_t GetPlayoutFrequency(); + CriticalSectionWrapper& _fileCritSect; CriticalSectionWrapper& _callbackCritSect; CriticalSectionWrapper& volume_settings_critsect_; @@ -553,9 +560,9 @@ private: scoped_ptr<CriticalSectionWrapper> ts_stats_lock_; - bool first_frame_arrived_; + scoped_ptr<rtc::TimestampWrapAroundHandler> rtp_ts_wraparound_handler_; // The rtp timestamp of the first played out audio frame. - uint32_t capture_start_rtp_time_stamp_; + int64_t capture_start_rtp_time_stamp_; // The capture ntp time (in local timebase) of the first played out audio // frame. int64_t capture_start_ntp_time_ms_; diff --git a/webrtc/voice_engine/utility.cc b/webrtc/voice_engine/utility.cc index 04f1f2c1ff..561b4ef5e2 100644 --- a/webrtc/voice_engine/utility.cc +++ b/webrtc/voice_engine/utility.cc @@ -65,6 +65,10 @@ void RemixAndResample(const AudioFrame& src_frame, dst_frame->num_channels_ = 1; AudioFrameOperations::MonoToStereo(dst_frame); } + + dst_frame->timestamp_ = src_frame.timestamp_; + dst_frame->elapsed_time_ms_ = src_frame.elapsed_time_ms_; + dst_frame->ntp_time_ms_ = src_frame.ntp_time_ms_; } void DownConvertToCodecFormat(const int16_t* src_data, diff --git a/webrtc/voice_engine/voe_base_impl.cc b/webrtc/voice_engine/voe_base_impl.cc index cfedd40563..ad6314a7e5 100644 --- a/webrtc/voice_engine/voe_base_impl.cc +++ b/webrtc/voice_engine/voe_base_impl.cc @@ -149,7 +149,7 @@ int32_t VoEBaseImpl::NeedMorePlayData( uint32_t samplesPerSec, void* audioSamples, uint32_t& nSamplesOut, - uint32_t* rtp_timestamp, + int64_t* elapsed_time_ms, int64_t* ntp_time_ms) { WEBRTC_TRACE(kTraceStream, kTraceVoice, VoEId(_shared->instance_id(), -1), @@ -160,7 +160,7 @@ int32_t VoEBaseImpl::NeedMorePlayData( GetPlayoutData(static_cast<int>(samplesPerSec), static_cast<int>(nChannels), static_cast<int>(nSamples), true, audioSamples, - rtp_timestamp, ntp_time_ms); + elapsed_time_ms, ntp_time_ms); nSamplesOut = _audioFrame.samples_per_channel_; @@ -237,13 +237,13 @@ void VoEBaseImpl::PushCaptureData(int voe_channel, const void* audio_data, void VoEBaseImpl::PullRenderData(int bits_per_sample, int sample_rate, int number_of_channels, int number_of_frames, void* audio_data, - uint32_t* rtp_timestamp, + int64_t* elapsed_time_ms, int64_t* ntp_time_ms) { assert(bits_per_sample == 16); assert(number_of_frames == static_cast<int>(sample_rate / 100)); GetPlayoutData(sample_rate, number_of_channels, number_of_frames, false, - audio_data, rtp_timestamp, ntp_time_ms); + audio_data, elapsed_time_ms, ntp_time_ms); } int VoEBaseImpl::RegisterVoiceEngineObserver(VoiceEngineObserver& observer) @@ -1087,7 +1087,7 @@ int VoEBaseImpl::ProcessRecordedDataWithAPM( void VoEBaseImpl::GetPlayoutData(int sample_rate, int number_of_channels, int number_of_frames, bool feed_data_to_apm, void* audio_data, - uint32_t* rtp_timestamp, + int64_t* elapsed_time_ms, int64_t* ntp_time_ms) { assert(_shared->output_mixer() != NULL); @@ -1110,7 +1110,7 @@ void VoEBaseImpl::GetPlayoutData(int sample_rate, int number_of_channels, memcpy(audio_data, _audioFrame.data_, sizeof(int16_t) * number_of_frames * number_of_channels); - *rtp_timestamp = _audioFrame.timestamp_; + *elapsed_time_ms = _audioFrame.elapsed_time_ms_; *ntp_time_ms = _audioFrame.ntp_time_ms_; } diff --git a/webrtc/voice_engine/voe_base_impl.h b/webrtc/voice_engine/voe_base_impl.h index fbcb4dd857..985ef5d838 100644 --- a/webrtc/voice_engine/voe_base_impl.h +++ b/webrtc/voice_engine/voe_base_impl.h @@ -80,7 +80,7 @@ public: uint32_t samplesPerSec, void* audioSamples, uint32_t& nSamplesOut, - uint32_t* rtp_timestamp, + int64_t* elapsed_time_ms, int64_t* ntp_time_ms); virtual int OnDataAvailable(const int voe_channels[], @@ -105,7 +105,7 @@ public: virtual void PullRenderData(int bits_per_sample, int sample_rate, int number_of_channels, int number_of_frames, void* audio_data, - uint32_t* rtp_timestamp, + int64_t* elapsed_time_ms, int64_t* ntp_time_ms); // AudioDeviceObserver @@ -143,7 +143,7 @@ private: void GetPlayoutData(int sample_rate, int number_of_channels, int number_of_frames, bool feed_data_to_apm, void* audio_data, - uint32_t* rtp_timestamp, + int64_t* elapsed_time_ms, int64_t* ntp_time_ms); int32_t AddBuildInfo(char* str) const; |