Fix the chain that propagates the audio frame's rtp and ntp timestamp including:

* In AudioCodingModuleImpl::PlayoutData10Ms, don't reset the timestamp got from GetAudio. * When there're more than one participant, set AudioFrame's RTP timestamp to 0. * Copy ntp_time_ms_ in AudioFrame::CopyFrom method. * In RemixAndResample, pass src frame's timestamp_ and ntp_time_ms_ to the dst frame. * Fix how |elapsed_time_ms| is computed in channel.cc by adding GetPlayoutFrequency. Tweaks on ntp_time_ms_: * Init ntp_time_ms_ to -1 in AudioFrame ctor. * When there're more than one participant, set AudioFrame's ntp_time_ms_ to an invalid value. I.e. we don't support ntp_time_ms_ in multiple participants case before the mixing is moved to chrome. Added elapsed_time_ms to AudioFrame and pass it to chrome, where we don't have the information about the rtp timestmp's sample rate, i.e. can't convert rtp timestamp to ms. BUG=3111 R=henrik.lundin@webrtc.org, turaj@webrtc.org, xians@webrtc.org TBR=andrew andrew to take another look on audio_conference_mixer_impl.cc Review URL: https://webrtc-codereview.appspot.com/14559004 git-svn-id: http://webrtc.googlecode.com/svn/trunk@6346 4adac7df-926f-26a2-2b94-8c16560cd09d
author: wu@webrtc.org <wu@webrtc.org@4adac7df-926f-26a2-2b94-8c16560cd09d> 2014-06-05 20:34:08 +0000
committer: wu@webrtc.org <wu@webrtc.org@4adac7df-926f-26a2-2b94-8c16560cd09d> 2014-06-05 20:34:08 +0000
commit: 94454b71adc37e15fd3f5a5fc432063f05caabcb (patch)
tree: c86ef909184b482c08033fd1755b74a7c6e1a96a
parent: 130fa64d4c726765c66879e440e27e7bda86508f (diff)
download: webrtc-94454b71adc37e15fd3f5a5fc432063f05caabcb.tar.gz
26 files changed, 168 insertions, 100 deletions
diff --git a/talk/app/webrtc/test/fakeaudiocapturemodule.cc b/talk/app/webrtc/test/fakeaudiocapturemodule.cc
index 72d39c9714..c22ed6f5d4 100644
--- a/talk/app/webrtc/test/fakeaudiocapturemodule.cc
+++ b/talk/app/webrtc/test/fakeaudiocapturemodule.cc
@@ -729,12 +729,12 @@ void FakeAudioCaptureModule::ReceiveFrameP() {
     ResetRecBuffer();
     uint32_t nSamplesOut = 0;
 #ifdef USE_WEBRTC_DEV_BRANCH
-    uint32_t rtp_timestamp = 0;
+    int64_t elapsed_time_ms = 0;
     int64_t ntp_time_ms = 0;
     if (audio_callback_->NeedMorePlayData(kNumberSamples, kNumberBytesPerSample,
                                          kNumberOfChannels, kSamplesPerSecond,
                                          rec_buffer_, nSamplesOut,
-                                         &rtp_timestamp, &ntp_time_ms) != 0) {
+                                         &elapsed_time_ms, &ntp_time_ms) != 0) {
       ASSERT(false);
     }
 #else
diff --git a/talk/app/webrtc/test/fakeaudiocapturemodule_unittest.cc b/talk/app/webrtc/test/fakeaudiocapturemodule_unittest.cc
index ea92f7b0d6..ab0db06231 100644
--- a/talk/app/webrtc/test/fakeaudiocapturemodule_unittest.cc
+++ b/talk/app/webrtc/test/fakeaudiocapturemodule_unittest.cc
@@ -86,7 +86,7 @@ class FakeAdmTest : public testing::Test,
                                    void* audioSamples,
 #ifdef USE_WEBRTC_DEV_BRANCH
                                    uint32_t& nSamplesOut,
-                                   uint32_t* rtp_timestamp,
+                                   int64_t* elapsed_time_ms,
                                    int64_t* ntp_time_ms) {
 #else
                                    uint32_t& nSamplesOut) {
@@ -98,7 +98,7 @@ class FakeAdmTest : public testing::Test,
         GenerateZeroBuffer(audioSamples, audio_buffer_size);
     nSamplesOut = bytes_out / nBytesPerSample;
 #ifdef USE_WEBRTC_DEV_BRANCH
-    *rtp_timestamp = 0;
+    *elapsed_time_ms = 0;
     *ntp_time_ms = 0;
 #endif
     return 0;
diff --git a/webrtc/modules/audio_coding/main/acm2/acm_receiver.cc b/webrtc/modules/audio_coding/main/acm2/acm_receiver.cc
index 613491a052..052bd4fccb 100644
--- a/webrtc/modules/audio_coding/main/acm2/acm_receiver.cc
+++ b/webrtc/modules/audio_coding/main/acm2/acm_receiver.cc
@@ -475,10 +475,17 @@ int AcmReceiver::GetAudio(int desired_freq_hz, AudioFrame* audio_frame) {
   call_stats_.DecodedByNetEq(audio_frame->speech_type_);
 
   // Computes the RTP timestamp of the first sample in |audio_frame| from
-  // |PlayoutTimestamp|, which is the timestamp of the last sample of
+  // |GetPlayoutTimestamp|, which is the timestamp of the last sample of
   // |audio_frame|.
-  audio_frame->timestamp_ =
-      PlayoutTimestamp() - audio_frame->samples_per_channel_;
+  uint32_t playout_timestamp = 0;
+  if (GetPlayoutTimestamp(&playout_timestamp)) {
+    audio_frame->timestamp_ =
+        playout_timestamp - audio_frame->samples_per_channel_;
+  } else {
+    // Remain 0 until we have a valid |playout_timestamp|.
+    audio_frame->timestamp_ = 0;
+  }
+
   return 0;
 }
 
@@ -596,13 +603,14 @@ void AcmReceiver::set_id(int id) {
   id_ = id;
 }
 
-uint32_t AcmReceiver::PlayoutTimestamp() {
+bool AcmReceiver::GetPlayoutTimestamp(uint32_t* timestamp) {
   if (av_sync_) {
     assert(initial_delay_manager_.get());
-    if (initial_delay_manager_->buffering())
-      return initial_delay_manager_->playout_timestamp();
+    if (initial_delay_manager_->buffering()) {
+      return initial_delay_manager_->GetPlayoutTimestamp(timestamp);
+    }
   }
-  return neteq_->PlayoutTimestamp();
+  return neteq_->GetPlayoutTimestamp(timestamp);
 }
 
 int AcmReceiver::last_audio_codec_id() const {
diff --git a/webrtc/modules/audio_coding/main/acm2/acm_receiver.h b/webrtc/modules/audio_coding/main/acm2/acm_receiver.h
index 7a238aec11..748744a4c1 100644
--- a/webrtc/modules/audio_coding/main/acm2/acm_receiver.h
+++ b/webrtc/modules/audio_coding/main/acm2/acm_receiver.h
@@ -242,9 +242,10 @@ class AcmReceiver {
   void set_id(int id);  // TODO(turajs): can be inline.
 
   //
-  // Returns the RTP timestamp of the last sample delivered by GetAudio().
+  // Gets the RTP timestamp of the last sample delivered by GetAudio().
+  // Returns true if the RTP timestamp is valid, otherwise false.
   //
-  uint32_t PlayoutTimestamp();
+  bool GetPlayoutTimestamp(uint32_t* timestamp);
 
   //
   // Return the index of the codec associated with the last non-CNG/non-DTMF
diff --git a/webrtc/modules/audio_coding/main/acm2/audio_coding_module_impl.cc b/webrtc/modules/audio_coding/main/acm2/audio_coding_module_impl.cc
index 7f6c84051e..a07e854334 100644
--- a/webrtc/modules/audio_coding/main/acm2/audio_coding_module_impl.cc
+++ b/webrtc/modules/audio_coding/main/acm2/audio_coding_module_impl.cc
@@ -1776,7 +1776,6 @@ int AudioCodingModuleImpl::PlayoutData10Ms(int desired_freq_hz,
   }
 
   audio_frame->id_ = id_;
-  audio_frame->timestamp_ = 0;
   return 0;
 }
 
@@ -1917,8 +1916,7 @@ int AudioCodingModuleImpl::ConfigISACBandwidthEstimator(
 }
 
 int AudioCodingModuleImpl::PlayoutTimestamp(uint32_t* timestamp) {
-  *timestamp = receiver_.PlayoutTimestamp();
-  return 0;
+  return receiver_.GetPlayoutTimestamp(timestamp) ? 0 : -1;
 }
 
 bool AudioCodingModuleImpl::HaveValidEncoder(const char* caller_name) const {
diff --git a/webrtc/modules/audio_coding/main/acm2/initial_delay_manager.cc b/webrtc/modules/audio_coding/main/acm2/initial_delay_manager.cc
index c2b218cb6c..786fb2e527 100644
--- a/webrtc/modules/audio_coding/main/acm2/initial_delay_manager.cc
+++ b/webrtc/modules/audio_coding/main/acm2/initial_delay_manager.cc
@@ -219,6 +219,14 @@ void InitialDelayManager::LatePackets(
   return;
 }
 
+bool InitialDelayManager::GetPlayoutTimestamp(uint32_t* playout_timestamp) {
+  if (!buffering_) {
+    return false;
+  }
+  *playout_timestamp = playout_timestamp_;
+  return true;
+}
+
 void InitialDelayManager::DisableBuffering() {
   buffering_ = false;
 }
diff --git a/webrtc/modules/audio_coding/main/acm2/initial_delay_manager.h b/webrtc/modules/audio_coding/main/acm2/initial_delay_manager.h
index 3c5ba3c013..6edc115084 100644
--- a/webrtc/modules/audio_coding/main/acm2/initial_delay_manager.h
+++ b/webrtc/modules/audio_coding/main/acm2/initial_delay_manager.h
@@ -65,8 +65,9 @@ class InitialDelayManager {
   // sequence of late (or perhaps missing) packets is computed.
   void LatePackets(uint32_t timestamp_now, SyncStream* sync_stream);
 
-  // Playout timestamp, valid when buffering.
-  uint32_t playout_timestamp() { return playout_timestamp_; }
+  // Get playout timestamp.
+  // Returns true if the timestamp is valid (when buffering), otherwise false.
+  bool GetPlayoutTimestamp(uint32_t* playout_timestamp);
 
   // True if buffered audio is less than the given initial delay (specified at
   // the constructor). Buffering might be disabled by the client of this class.
diff --git a/webrtc/modules/audio_coding/main/acm2/initial_delay_manager_unittest.cc b/webrtc/modules/audio_coding/main/acm2/initial_delay_manager_unittest.cc
index 15e88a5391..38b7cfc271 100644
--- a/webrtc/modules/audio_coding/main/acm2/initial_delay_manager_unittest.cc
+++ b/webrtc/modules/audio_coding/main/acm2/initial_delay_manager_unittest.cc
@@ -359,7 +359,9 @@ TEST_F(InitialDelayManagerTest, BufferingAudio) {
     EXPECT_TRUE(manager_->buffering());
     const uint32_t expected_playout_timestamp = rtp_info_.header.timestamp -
         kInitDelayMs * kSamplingRateHz / 1000;
-    EXPECT_EQ(expected_playout_timestamp, manager_->playout_timestamp());
+    uint32_t actual_playout_timestamp = 0;
+    EXPECT_TRUE(manager_->GetPlayoutTimestamp(&actual_playout_timestamp));
+    EXPECT_EQ(expected_playout_timestamp, actual_playout_timestamp);
     NextRtpHeader(&rtp_info_, &rtp_receive_timestamp_);
   }
 
diff --git a/webrtc/modules/audio_coding/neteq4/interface/neteq.h b/webrtc/modules/audio_coding/neteq4/interface/neteq.h
index 763da31b82..79a5dfb0b1 100644
--- a/webrtc/modules/audio_coding/neteq4/interface/neteq.h
+++ b/webrtc/modules/audio_coding/neteq4/interface/neteq.h
@@ -228,8 +228,9 @@ class NetEq {
   // Disables post-decode VAD.
   virtual void DisableVad() = 0;
 
-  // Returns the RTP timestamp for the last sample delivered by GetAudio().
-  virtual uint32_t PlayoutTimestamp() = 0;
+  // Gets the RTP timestamp for the last sample delivered by GetAudio().
+  // Returns true if the RTP timestamp is valid, otherwise false.
+  virtual bool GetPlayoutTimestamp(uint32_t* timestamp) = 0;
 
   // Not implemented.
   virtual int SetTargetNumberOfChannels() = 0;
diff --git a/webrtc/modules/audio_coding/neteq4/neteq_impl.cc b/webrtc/modules/audio_coding/neteq4/neteq_impl.cc
index 963a820610..f860766085 100644
--- a/webrtc/modules/audio_coding/neteq4/neteq_impl.cc
+++ b/webrtc/modules/audio_coding/neteq4/neteq_impl.cc
@@ -335,9 +335,15 @@ void NetEqImpl::DisableVad() {
   vad_->Disable();
 }
 
-uint32_t NetEqImpl::PlayoutTimestamp() {
+bool NetEqImpl::GetPlayoutTimestamp(uint32_t* timestamp) {
   CriticalSectionScoped lock(crit_sect_.get());
-  return timestamp_scaler_->ToExternal(playout_timestamp_);
+  if (first_packet_) {
+    // We don't have a valid RTP timestamp until we have decoded our first
+    // RTP packet.
+    return false;
+  }
+  *timestamp = timestamp_scaler_->ToExternal(playout_timestamp_);
+  return true;
 }
 
 int NetEqImpl::LastError() {
diff --git a/webrtc/modules/audio_coding/neteq4/neteq_impl.h b/webrtc/modules/audio_coding/neteq4/neteq_impl.h
index 751de66dd9..822a523a62 100644
--- a/webrtc/modules/audio_coding/neteq4/neteq_impl.h
+++ b/webrtc/modules/audio_coding/neteq4/neteq_impl.h
@@ -166,8 +166,7 @@ class NetEqImpl : public webrtc::NetEq {
   // Disables post-decode VAD.
   virtual void DisableVad();
 
-  // Returns the RTP timestamp for the last sample delivered by GetAudio().
-  virtual uint32_t PlayoutTimestamp();
+  virtual bool GetPlayoutTimestamp(uint32_t* timestamp);
 
   virtual int SetTargetNumberOfChannels() { return kNotImplemented; }
 
diff --git a/webrtc/modules/audio_coding/neteq4/neteq_impl_unittest.cc b/webrtc/modules/audio_coding/neteq4/neteq_impl_unittest.cc
index aedd8d5a1f..26279aa93a 100644
--- a/webrtc/modules/audio_coding/neteq4/neteq_impl_unittest.cc
+++ b/webrtc/modules/audio_coding/neteq4/neteq_impl_unittest.cc
@@ -477,8 +477,10 @@ TEST_F(NetEqImplTest, VerifyTimestampPropagation) {
   // The value of the last of the output samples is the same as the number of
   // samples played from the decoded packet. Thus, this number + the RTP
   // timestamp should match the playout timestamp.
+  uint32_t timestamp = 0;
+  EXPECT_TRUE(neteq_->GetPlayoutTimestamp(&timestamp));
   EXPECT_EQ(rtp_header.header.timestamp + output[samples_per_channel - 1],
-            neteq_->PlayoutTimestamp());
+            timestamp);
 
   // Check the timestamp for the last value in the sync buffer. This should
   // be one full frame length ahead of the RTP timestamp.
diff --git a/webrtc/modules/audio_coding/neteq4/neteq_unittest.cc b/webrtc/modules/audio_coding/neteq4/neteq_unittest.cc
index f66a3cfae5..c1a7e16526 100644
--- a/webrtc/modules/audio_coding/neteq4/neteq_unittest.cc
+++ b/webrtc/modules/audio_coding/neteq4/neteq_unittest.cc
@@ -228,6 +228,8 @@ class NetEqDecodingTest : public ::testing::Test {
 
   void DuplicateCng();
 
+  uint32_t PlayoutTimestamp();
+
   NetEq* neteq_;
   FILE* rtp_fp_;
   unsigned int sim_clock_;
@@ -736,7 +738,7 @@ void NetEqDecodingTest::LongCngWithClockDrift(double drift_factor,
   }
 
   EXPECT_EQ(kOutputNormal, type);
-  int32_t delay_before = timestamp - neteq_->PlayoutTimestamp();
+  int32_t delay_before = timestamp - PlayoutTimestamp();
 
   // Insert CNG for 1 minute (= 60000 ms).
   const int kCngPeriodMs = 100;
@@ -829,7 +831,7 @@ void NetEqDecodingTest::LongCngWithClockDrift(double drift_factor,
   // Check that the speech starts again within reasonable time.
   double time_until_speech_returns_ms = t_ms - speech_restart_time_ms;
   EXPECT_LT(time_until_speech_returns_ms, max_time_to_speech_ms);
-  int32_t delay_after = timestamp - neteq_->PlayoutTimestamp();
+  int32_t delay_after = timestamp - PlayoutTimestamp();
   // Compare delay before and after, and make sure it differs less than 20 ms.
   EXPECT_LE(delay_after, delay_before + delay_tolerance_ms * 16);
   EXPECT_GE(delay_after, delay_before - delay_tolerance_ms * 16);
@@ -1310,7 +1312,7 @@ void NetEqDecodingTest::WrapTest(uint16_t start_seq_no,
     ASSERT_EQ(1, num_channels);
 
     // Expect delay (in samples) to be less than 2 packets.
-    EXPECT_LE(timestamp - neteq_->PlayoutTimestamp(),
+    EXPECT_LE(timestamp - PlayoutTimestamp(),
               static_cast<uint32_t>(kSamples * 2));
   }
   // Make sure we have actually tested wrap-around.
@@ -1391,7 +1393,7 @@ void NetEqDecodingTest::DuplicateCng() {
                 kMaxBlockSize, out_data_, &out_len, &num_channels, &type));
   ASSERT_EQ(kBlockSize16kHz, out_len);
   EXPECT_EQ(kOutputCNG, type);
-  EXPECT_EQ(timestamp - algorithmic_delay_samples, neteq_->PlayoutTimestamp());
+  EXPECT_EQ(timestamp - algorithmic_delay_samples, PlayoutTimestamp());
 
   // Insert the same CNG packet again. Note that at this point it is old, since
   // we have already decoded the first copy of it.
@@ -1406,7 +1408,7 @@ void NetEqDecodingTest::DuplicateCng() {
     ASSERT_EQ(kBlockSize16kHz, out_len);
     EXPECT_EQ(kOutputCNG, type);
     EXPECT_EQ(timestamp - algorithmic_delay_samples,
-              neteq_->PlayoutTimestamp());
+              PlayoutTimestamp());
   }
 
   // Insert speech again.
@@ -1422,7 +1424,13 @@ void NetEqDecodingTest::DuplicateCng() {
   ASSERT_EQ(kBlockSize16kHz, out_len);
   EXPECT_EQ(kOutputNormal, type);
   EXPECT_EQ(timestamp + kSamples - algorithmic_delay_samples,
-            neteq_->PlayoutTimestamp());
+            PlayoutTimestamp());
+}
+
+uint32_t NetEqDecodingTest::PlayoutTimestamp() {
+  uint32_t playout_timestamp = 0;
+  EXPECT_TRUE(neteq_->GetPlayoutTimestamp(&playout_timestamp));
+  return playout_timestamp;
 }
 
 TEST_F(NetEqDecodingTest, DiscardDuplicateCng) { DuplicateCng(); }
diff --git a/webrtc/modules/audio_conference_mixer/source/audio_conference_mixer_impl.cc b/webrtc/modules/audio_conference_mixer/source/audio_conference_mixer_impl.cc
index f3883c0b58..26ef3e881f 100644
--- a/webrtc/modules/audio_conference_mixer/source/audio_conference_mixer_impl.cc
+++ b/webrtc/modules/audio_conference_mixer/source/audio_conference_mixer_impl.cc
@@ -651,6 +651,11 @@ void AudioConferenceMixerImpl::UpdateToMix(
             _audioFramePool->PushMemory(audioFrame);
             continue;
         }
+        if (_participantList.size() != 1) {
+          // TODO(wu): Issue 3390, add support for multiple participants case.
+          audioFrame->ntp_time_ms_ = -1;
+        }
+
         // TODO(henrike): this assert triggers in some test cases where SRTP is
         // used which prevents NetEQ from making a VAD. Temporarily disable this
         // assert until the problem is fixed on a higher level.
@@ -950,6 +955,16 @@ int32_t AudioConferenceMixerImpl::MixFromList(
         return 0;
     }
 
+    if (audioFrameList->size() == 1) {
+      mixedAudio.timestamp_ = audioFrameList->front()->timestamp_;
+      mixedAudio.elapsed_time_ms_ = audioFrameList->front()->elapsed_time_ms_;
+    } else {
+      // TODO(wu): Issue 3390.
+      // Audio frame timestamp is only supported in one channel case.
+      mixedAudio.timestamp_ = 0;
+      mixedAudio.elapsed_time_ms_ = -1;
+    }
+
     for (AudioFrameList::const_iterator iter = audioFrameList->begin();
          iter != audioFrameList->end();
          ++iter) {
diff --git a/webrtc/modules/audio_device/audio_device_buffer.cc b/webrtc/modules/audio_device/audio_device_buffer.cc
index ed1bf2020b..42fdaad22c 100644
--- a/webrtc/modules/audio_device/audio_device_buffer.cc
+++ b/webrtc/modules/audio_device/audio_device_buffer.cc
@@ -548,15 +548,15 @@ int32_t AudioDeviceBuffer::RequestPlayoutData(uint32_t nSamples)
     if (_ptrCbAudioTransport)
     {
         uint32_t res(0);
-        uint32_t rtp_timestamp = 0;
-        int64_t ntp_time_ms = 0;
+        int64_t elapsed_time_ms = -1;
+        int64_t ntp_time_ms = -1;
         res = _ptrCbAudioTransport->NeedMorePlayData(_playSamples,
                                                      playBytesPerSample,
                                                      playChannels,
                                                      playSampleRate,
                                                      &_playBuffer[0],
                                                      nSamplesOut,
-                                                     &rtp_timestamp,
+                                                     &elapsed_time_ms,
                                                      &ntp_time_ms);
         if (res != 0)
         {
diff --git a/webrtc/modules/audio_device/include/audio_device_defines.h b/webrtc/modules/audio_device/include/audio_device_defines.h
index f65e3a8ec3..56a584ef9e 100644
--- a/webrtc/modules/audio_device/include/audio_device_defines.h
+++ b/webrtc/modules/audio_device/include/audio_device_defines.h
@@ -71,7 +71,7 @@ public:
                                      const uint32_t samplesPerSec,
                                      void* audioSamples,
                                      uint32_t& nSamplesOut,
-                                     uint32_t* rtp_timestamp,
+                                     int64_t* elapsed_time_ms,
                                      int64_t* ntp_time_ms) = 0;
 
     // Method to pass captured data directly and unmixed to network channels.
@@ -128,7 +128,7 @@ public:
     virtual void PullRenderData(int bits_per_sample, int sample_rate,
                                 int number_of_channels, int number_of_frames,
                                 void* audio_data,
-                                uint32_t* rtp_timestamp,
+                                int64_t* elapsed_time_ms,
                                 int64_t* ntp_time_ms) {}
 
 protected:
diff --git a/webrtc/modules/audio_device/test/audio_device_test_api.cc b/webrtc/modules/audio_device/test/audio_device_test_api.cc
index b10accb753..011fc1033c 100644
--- a/webrtc/modules/audio_device/test/audio_device_test_api.cc
+++ b/webrtc/modules/audio_device/test/audio_device_test_api.cc
@@ -117,7 +117,7 @@ class AudioTransportAPI: public AudioTransport {
       const uint32_t sampleRate,
       void* audioSamples,
       uint32_t& nSamplesOut,
-      uint32_t* rtp_timestamp,
+      int64_t* elapsed_time_ms,
       int64_t* ntp_time_ms) {
     play_count_++;
     if (play_count_ % 100 == 0) {
@@ -152,7 +152,7 @@ class AudioTransportAPI: public AudioTransport {
   virtual void PullRenderData(int bits_per_sample, int sample_rate,
                               int number_of_channels, int number_of_frames,
                               void* audio_data,
-                              uint32_t* rtp_timestamp,
+                              int64_t* elapsed_time_ms,
                               int64_t* ntp_time_ms) {}
  private:
   uint32_t rec_count_;
diff --git a/webrtc/modules/audio_device/test/func_test_manager.cc b/webrtc/modules/audio_device/test/func_test_manager.cc
index a51ebfba2c..2a1928775c 100644
--- a/webrtc/modules/audio_device/test/func_test_manager.cc
+++ b/webrtc/modules/audio_device/test/func_test_manager.cc
@@ -293,7 +293,7 @@ int32_t AudioTransportImpl::NeedMorePlayData(
     const uint32_t samplesPerSec,
     void* audioSamples,
     uint32_t& nSamplesOut,
-    uint32_t* rtp_timestamp,
+    int64_t* elapsed_time_ms,
     int64_t* ntp_time_ms)
 {
     if (_fullDuplex)
@@ -554,7 +554,7 @@ void AudioTransportImpl::PullRenderData(int bits_per_sample, int sample_rate,
                                         int number_of_channels,
                                         int number_of_frames,
                                         void* audio_data,
-                                        uint32_t* rtp_timestamp,
+                                        int64_t* elapsed_time_ms,
                                         int64_t* ntp_time_ms) {}
 
 FuncTestManager::FuncTestManager() :
diff --git a/webrtc/modules/audio_device/test/func_test_manager.h b/webrtc/modules/audio_device/test/func_test_manager.h
index 1a1c2a5a4f..5cb4f46102 100644
--- a/webrtc/modules/audio_device/test/func_test_manager.h
+++ b/webrtc/modules/audio_device/test/func_test_manager.h
@@ -119,7 +119,7 @@ public:
                                      const uint32_t samplesPerSec,
                                      void* audioSamples,
                                      uint32_t& nSamplesOut,
-                                     uint32_t* rtp_timestamp,
+                                     int64_t* elapsed_time_ms,
                                      int64_t* ntp_time_ms);
 
     virtual int OnDataAvailable(const int voe_channels[],
@@ -141,7 +141,7 @@ public:
     virtual void PullRenderData(int bits_per_sample, int sample_rate,
                                 int number_of_channels, int number_of_frames,
                                 void* audio_data,
-                                uint32_t* rtp_timestamp,
+                                int64_t* elapsed_time_ms,
                                 int64_t* ntp_time_ms);
 
     AudioTransportImpl(AudioDeviceModule* audioDevice);
diff --git a/webrtc/modules/interface/module_common_types.h b/webrtc/modules/interface/module_common_types.h
index 9d00de773f..2c94707104 100644
--- a/webrtc/modules/interface/module_common_types.h
+++ b/webrtc/modules/interface/module_common_types.h
@@ -690,6 +690,9 @@ class AudioFrame {
   int id_;
   // RTP timestamp of the first sample in the AudioFrame.
   uint32_t timestamp_;
+  // Time since the first frame in milliseconds.
+  // -1 represents an uninitialized value.
+  int64_t elapsed_time_ms_;
   // NTP time of the estimated capture time in local timebase in milliseconds.
   // -1 represents an uninitialized value.
   int64_t ntp_time_ms_;
@@ -720,6 +723,7 @@ inline void AudioFrame::Reset() {
   // TODO(wu): Zero is a valid value for |timestamp_|. We should initialize
   // to an invalid value, or add a new member to indicate invalidity.
   timestamp_ = 0;
+  elapsed_time_ms_ = -1;
   ntp_time_ms_ = -1;
   samples_per_channel_ = 0;
   sample_rate_hz_ = 0;
@@ -759,6 +763,8 @@ inline void AudioFrame::CopyFrom(const AudioFrame& src) {
 
   id_ = src.id_;
   timestamp_ = src.timestamp_;
+  elapsed_time_ms_ = src.elapsed_time_ms_;
+  ntp_time_ms_ = src.ntp_time_ms_;
   samples_per_channel_ = src.samples_per_channel_;
   sample_rate_hz_ = src.sample_rate_hz_;
   speech_type_ = src.speech_type_;
diff --git a/webrtc/test/fake_audio_device.cc b/webrtc/test/fake_audio_device.cc
index d3421ebd64..989c12b7fd 100644
--- a/webrtc/test/fake_audio_device.cc
+++ b/webrtc/test/fake_audio_device.cc
@@ -121,8 +121,8 @@ void FakeAudioDevice::CaptureAudio() {
         samples_needed = std::min(kFrequencyHz / time_since_last_playout_ms,
                                   kBufferSizeBytes / 2);
       uint32_t samples_out = 0;
-      uint32_t rtp_timestamp = 0;
-      int64_t ntp_time_ms = 0;
+      int64_t elapsed_time_ms = -1;
+      int64_t ntp_time_ms = -1;
       EXPECT_EQ(0,
                 audio_callback_->NeedMorePlayData(samples_needed,
                                                   2,
@@ -130,7 +130,7 @@ void FakeAudioDevice::CaptureAudio() {
                                                   kFrequencyHz,
                                                   playout_buffer_,
                                                   samples_out,
-                                                  &rtp_timestamp,
+                                                  &elapsed_time_ms,
                                                   &ntp_time_ms));
     }
   }
diff --git a/webrtc/voice_engine/channel.cc b/webrtc/voice_engine/channel.cc
index 12e66ef3cb..f99e590ae4 100644
--- a/webrtc/voice_engine/channel.cc
+++ b/webrtc/voice_engine/channel.cc
@@ -10,6 +10,7 @@
 
 #include "webrtc/voice_engine/channel.h"
 
+#include "webrtc/base/timeutils.h"
 #include "webrtc/common.h"
 #include "webrtc/modules/audio_device/include/audio_device.h"
 #include "webrtc/modules/audio_processing/include/audio_processing.h"
@@ -683,21 +684,30 @@ int32_t Channel::GetAudioFrame(int32_t id, AudioFrame& audioFrame)
     // Measure audio level (0-9)
     _outputAudioLevel.ComputeLevel(audioFrame);
 
-    audioFrame.ntp_time_ms_ = ntp_estimator_->Estimate(audioFrame.timestamp_);
-
-    if (!first_frame_arrived_) {
-      first_frame_arrived_ = true;
+    if (capture_start_rtp_time_stamp_ < 0 && audioFrame.timestamp_ != 0) {
+      // The first frame with a valid rtp timestamp.
       capture_start_rtp_time_stamp_ = audioFrame.timestamp_;
-    } else {
+    }
+
+    if (capture_start_rtp_time_stamp_ >= 0) {
+      // audioFrame.timestamp_ should be valid from now on.
+
+      // Compute elapsed time.
+      int64_t unwrap_timestamp =
+          rtp_ts_wraparound_handler_->Unwrap(audioFrame.timestamp_);
+      audioFrame.elapsed_time_ms_ =
+          (unwrap_timestamp - capture_start_rtp_time_stamp_) /
+          (GetPlayoutFrequency() / 1000);
+
+      // Compute ntp time.
+      audioFrame.ntp_time_ms_ = ntp_estimator_->Estimate(audioFrame.timestamp_);
       // |ntp_time_ms_| won't be valid until at least 2 RTCP SRs are received.
       if (audioFrame.ntp_time_ms_ > 0) {
         // Compute |capture_start_ntp_time_ms_| so that
-        // |capture_start_ntp_time_ms_| + |elapsed_time_ms| == |ntp_time_ms_|
+        // |capture_start_ntp_time_ms_| + |elapsed_time_ms_| == |ntp_time_ms_|
         CriticalSectionScoped lock(ts_stats_lock_.get());
-        uint32_t elapsed_time_ms =
-            (audioFrame.timestamp_ - capture_start_rtp_time_stamp_) /
-            (audioFrame.sample_rate_hz_ * 1000);
-        capture_start_ntp_time_ms_ = audioFrame.ntp_time_ms_ - elapsed_time_ms;
+        capture_start_ntp_time_ms_ =
+            audioFrame.ntp_time_ms_ - audioFrame.elapsed_time_ms_;
       }
     }
 
@@ -875,8 +885,8 @@ Channel::Channel(int32_t channelId,
     _numberOfDiscardedPackets(0),
     send_sequence_number_(0),
     ts_stats_lock_(CriticalSectionWrapper::CreateCriticalSection()),
-    first_frame_arrived_(false),
-    capture_start_rtp_time_stamp_(0),
+    rtp_ts_wraparound_handler_(new rtc::TimestampWrapAroundHandler()),
+    capture_start_rtp_time_stamp_(-1),
     capture_start_ntp_time_ms_(-1),
     _engineStatisticsPtr(NULL),
     _outputMixerPtr(NULL),
@@ -4045,20 +4055,10 @@ void Channel::UpdatePlayoutTimestamp(bool rtcp) {
     return;
   }
 
-  int32_t playout_frequency = audio_coding_->PlayoutFrequency();
-  CodecInst current_recive_codec;
-  if (audio_coding_->ReceiveCodec(&current_recive_codec) == 0) {
-    if (STR_CASE_CMP("G722", current_recive_codec.plname) == 0) {
-      playout_frequency = 8000;
-    } else if (STR_CASE_CMP("opus", current_recive_codec.plname) == 0) {
-      playout_frequency = 48000;
-    }
-  }
-
   jitter_buffer_playout_timestamp_ = playout_timestamp;
 
   // Remove the playout delay.
-  playout_timestamp -= (delay_ms * (playout_frequency / 1000));
+  playout_timestamp -= (delay_ms * (GetPlayoutFrequency() / 1000));
 
   WEBRTC_TRACE(kTraceStream, kTraceVoice, VoEId(_instanceId,_channelId),
                "Channel::UpdatePlayoutTimestamp() => playoutTimestamp = %lu",
@@ -4364,30 +4364,11 @@ void Channel::UpdatePacketDelay(uint32_t rtp_timestamp,
                rtp_timestamp, sequence_number);
 
   // Get frequency of last received payload
-  int rtp_receive_frequency = audio_coding_->ReceiveFrequency();
-
-  CodecInst current_receive_codec;
-  if (audio_coding_->ReceiveCodec(&current_receive_codec) != 0) {
-    return;
-  }
+  int rtp_receive_frequency = GetPlayoutFrequency();
 
   // Update the least required delay.
   least_required_delay_ms_ = audio_coding_->LeastRequiredDelayMs();
 
-  if (STR_CASE_CMP("G722", current_receive_codec.plname) == 0) {
-    // Even though the actual sampling rate for G.722 audio is
-    // 16,000 Hz, the RTP clock rate for the G722 payload format is
-    // 8,000 Hz because that value was erroneously assigned in
-    // RFC 1890 and must remain unchanged for backward compatibility.
-    rtp_receive_frequency = 8000;
-  } else if (STR_CASE_CMP("opus", current_receive_codec.plname) == 0) {
-    // We are resampling Opus internally to 32,000 Hz until all our
-    // DSP routines can operate at 48,000 Hz, but the RTP clock
-    // rate for the Opus payload format is standardized to 48,000 Hz,
-    // because that is the maximum supported decoding sampling rate.
-    rtp_receive_frequency = 48000;
-  }
-
   // |jitter_buffer_playout_timestamp_| updated in UpdatePlayoutTimestamp for
   // every incoming packet.
   uint32_t timestamp_diff_ms = (rtp_timestamp -
@@ -4560,5 +4541,26 @@ int Channel::SetSendRtpHeaderExtension(bool enable, RTPExtensionType type,
   return error;
 }
 
+int32_t Channel::GetPlayoutFrequency() {
+  int32_t playout_frequency = audio_coding_->PlayoutFrequency();
+  CodecInst current_recive_codec;
+  if (audio_coding_->ReceiveCodec(&current_recive_codec) == 0) {
+    if (STR_CASE_CMP("G722", current_recive_codec.plname) == 0) {
+      // Even though the actual sampling rate for G.722 audio is
+      // 16,000 Hz, the RTP clock rate for the G722 payload format is
+      // 8,000 Hz because that value was erroneously assigned in
+      // RFC 1890 and must remain unchanged for backward compatibility.
+      playout_frequency = 8000;
+    } else if (STR_CASE_CMP("opus", current_recive_codec.plname) == 0) {
+      // We are resampling Opus internally to 32,000 Hz until all our
+      // DSP routines can operate at 48,000 Hz, but the RTP clock
+      // rate for the Opus payload format is standardized to 48,000 Hz,
+      // because that is the maximum supported decoding sampling rate.
+      playout_frequency = 48000;
+    }
+  }
+  return playout_frequency;
+}
+
 }  // namespace voe
 }  // namespace webrtc
diff --git a/webrtc/voice_engine/channel.h b/webrtc/voice_engine/channel.h
index 7c8a3e2e72..3c49d9bd29 100644
--- a/webrtc/voice_engine/channel.h
+++ b/webrtc/voice_engine/channel.h
@@ -35,6 +35,11 @@
 #include "webrtc/voice_engine/include/voe_dtmf.h"
 #endif
 
+namespace rtc {
+
+class TimestampWrapAroundHandler;
+}
+
 namespace webrtc {
 
 class AudioDeviceModule;
@@ -500,6 +505,8 @@ private:
     int SetSendRtpHeaderExtension(bool enable, RTPExtensionType type,
                                   unsigned char id);
 
+    int32_t GetPlayoutFrequency();
+
     CriticalSectionWrapper& _fileCritSect;
     CriticalSectionWrapper& _callbackCritSect;
     CriticalSectionWrapper& volume_settings_critsect_;
@@ -553,9 +560,9 @@ private:
 
     scoped_ptr<CriticalSectionWrapper> ts_stats_lock_;
 
-    bool first_frame_arrived_;
+    scoped_ptr<rtc::TimestampWrapAroundHandler> rtp_ts_wraparound_handler_;
     // The rtp timestamp of the first played out audio frame.
-    uint32_t capture_start_rtp_time_stamp_;
+    int64_t capture_start_rtp_time_stamp_;
     // The capture ntp time (in local timebase) of the first played out audio
     // frame.
     int64_t capture_start_ntp_time_ms_;
diff --git a/webrtc/voice_engine/utility.cc b/webrtc/voice_engine/utility.cc
index 04f1f2c1ff..561b4ef5e2 100644
--- a/webrtc/voice_engine/utility.cc
+++ b/webrtc/voice_engine/utility.cc
@@ -65,6 +65,10 @@ void RemixAndResample(const AudioFrame& src_frame,
     dst_frame->num_channels_ = 1;
     AudioFrameOperations::MonoToStereo(dst_frame);
   }
+
+  dst_frame->timestamp_ = src_frame.timestamp_;
+  dst_frame->elapsed_time_ms_ = src_frame.elapsed_time_ms_;
+  dst_frame->ntp_time_ms_ = src_frame.ntp_time_ms_;
 }
 
 void DownConvertToCodecFormat(const int16_t* src_data,
diff --git a/webrtc/voice_engine/voe_base_impl.cc b/webrtc/voice_engine/voe_base_impl.cc
index cfedd40563..ad6314a7e5 100644
--- a/webrtc/voice_engine/voe_base_impl.cc
+++ b/webrtc/voice_engine/voe_base_impl.cc
@@ -149,7 +149,7 @@ int32_t VoEBaseImpl::NeedMorePlayData(
         uint32_t samplesPerSec,
         void* audioSamples,
         uint32_t& nSamplesOut,
-        uint32_t* rtp_timestamp,
+        int64_t* elapsed_time_ms,
         int64_t* ntp_time_ms)
 {
   WEBRTC_TRACE(kTraceStream, kTraceVoice, VoEId(_shared->instance_id(), -1),
@@ -160,7 +160,7 @@ int32_t VoEBaseImpl::NeedMorePlayData(
   GetPlayoutData(static_cast<int>(samplesPerSec),
                  static_cast<int>(nChannels),
                  static_cast<int>(nSamples), true, audioSamples,
-                 rtp_timestamp, ntp_time_ms);
+                 elapsed_time_ms, ntp_time_ms);
 
   nSamplesOut = _audioFrame.samples_per_channel_;
 
@@ -237,13 +237,13 @@ void VoEBaseImpl::PushCaptureData(int voe_channel, const void* audio_data,
 void VoEBaseImpl::PullRenderData(int bits_per_sample, int sample_rate,
                                  int number_of_channels, int number_of_frames,
                                  void* audio_data,
-                                 uint32_t* rtp_timestamp,
+                                 int64_t* elapsed_time_ms,
                                  int64_t* ntp_time_ms) {
   assert(bits_per_sample == 16);
   assert(number_of_frames == static_cast<int>(sample_rate / 100));
 
   GetPlayoutData(sample_rate, number_of_channels, number_of_frames, false,
-                 audio_data, rtp_timestamp, ntp_time_ms);
+                 audio_data, elapsed_time_ms, ntp_time_ms);
 }
 
 int VoEBaseImpl::RegisterVoiceEngineObserver(VoiceEngineObserver& observer)
@@ -1087,7 +1087,7 @@ int VoEBaseImpl::ProcessRecordedDataWithAPM(
 void VoEBaseImpl::GetPlayoutData(int sample_rate, int number_of_channels,
                                  int number_of_frames, bool feed_data_to_apm,
                                  void* audio_data,
-                                 uint32_t* rtp_timestamp,
+                                 int64_t* elapsed_time_ms,
                                  int64_t* ntp_time_ms) {
   assert(_shared->output_mixer() != NULL);
 
@@ -1110,7 +1110,7 @@ void VoEBaseImpl::GetPlayoutData(int sample_rate, int number_of_channels,
   memcpy(audio_data, _audioFrame.data_,
          sizeof(int16_t) * number_of_frames * number_of_channels);
 
-  *rtp_timestamp = _audioFrame.timestamp_;
+  *elapsed_time_ms = _audioFrame.elapsed_time_ms_;
   *ntp_time_ms = _audioFrame.ntp_time_ms_;
 }
 
diff --git a/webrtc/voice_engine/voe_base_impl.h b/webrtc/voice_engine/voe_base_impl.h
index fbcb4dd857..985ef5d838 100644
--- a/webrtc/voice_engine/voe_base_impl.h
+++ b/webrtc/voice_engine/voe_base_impl.h
@@ -80,7 +80,7 @@ public:
                                      uint32_t samplesPerSec,
                                      void* audioSamples,
                                      uint32_t& nSamplesOut,
-                                     uint32_t* rtp_timestamp,
+                                     int64_t* elapsed_time_ms,
                                      int64_t* ntp_time_ms);
 
     virtual int OnDataAvailable(const int voe_channels[],
@@ -105,7 +105,7 @@ public:
     virtual void PullRenderData(int bits_per_sample, int sample_rate,
                                 int number_of_channels, int number_of_frames,
                                 void* audio_data,
-                                uint32_t* rtp_timestamp,
+                                int64_t* elapsed_time_ms,
                                 int64_t* ntp_time_ms);
 
     // AudioDeviceObserver
@@ -143,7 +143,7 @@ private:
     void GetPlayoutData(int sample_rate, int number_of_channels,
                         int number_of_frames, bool feed_data_to_apm,
                         void* audio_data,
-                        uint32_t* rtp_timestamp,
+                        int64_t* elapsed_time_ms,
                         int64_t* ntp_time_ms);
 
     int32_t AddBuildInfo(char* str) const;
author	wu@webrtc.org <wu@webrtc.org@4adac7df-926f-26a2-2b94-8c16560cd09d>	2014-06-05 20:34:08 +0000
committer	wu@webrtc.org <wu@webrtc.org@4adac7df-926f-26a2-2b94-8c16560cd09d>	2014-06-05 20:34:08 +0000
commit	94454b71adc37e15fd3f5a5fc432063f05caabcb (patch)
tree	c86ef909184b482c08033fd1755b74a7c6e1a96a
parent	130fa64d4c726765c66879e440e27e7bda86508f (diff)
download	webrtc-94454b71adc37e15fd3f5a5fc432063f05caabcb.tar.gz