Adding a delay line to NetEq's output

This change adds an optional delay to NetEq's output. Note, this is not equivalent to increasing the jitter buffer with the same extra length. Bug: b/156734419 Change-Id: I8b70b6b3bffcfd3da296ccf29853864baa03d6bb Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/175110 Commit-Queue: Henrik Lundin <henrik.lundin@webrtc.org> Reviewed-by: Karl Wiberg <kwiberg@webrtc.org> Reviewed-by: Ivo Creusen <ivoc@webrtc.org> Cr-Commit-Position: refs/heads/master@{#31343}
author: Henrik Lundin <henrik.lundin@webrtc.org> 2020-05-25 11:26:15 +0200
committer: Commit Bot <commit-bot@chromium.org> 2020-05-25 12:03:39 +0000
commit: c49e9c253f53d7c01ce727ab84b4b321ae745669 (patch)
tree: 2b868917f1600442c5b22ff085ed4d7e6e4c1469 /modules/audio_coding
parent: 848ea9f0d3678118cb8926a2898454e5a4df58ae (diff)
download: webrtc-c49e9c253f53d7c01ce727ab84b4b321ae745669.tar.gz
7 files changed, 222 insertions, 13 deletions
diff --git a/modules/audio_coding/neteq/neteq_impl.cc b/modules/audio_coding/neteq/neteq_impl.cc
index 0b7510d341..163a287a5e 100644
--- a/modules/audio_coding/neteq/neteq_impl.cc
+++ b/modules/audio_coding/neteq/neteq_impl.cc
@@ -140,7 +140,10 @@ NetEqImpl::NetEqImpl(const NetEq::Config& config,
                                 10,  // Report once every 10 s.
                                 tick_timer_.get()),
       no_time_stretching_(config.for_test_no_time_stretching),
-      enable_rtx_handling_(config.enable_rtx_handling) {
+      enable_rtx_handling_(config.enable_rtx_handling),
+      output_delay_chain_(
+          rtc::CheckedDivExact(config.extra_output_delay_ms, 10)),
+      output_delay_chain_ms_(config.extra_output_delay_ms) {
   RTC_LOG(LS_INFO) << "NetEq config: " << config.ToString();
   int fs = config.sample_rate_hz;
   if (fs != 8000 && fs != 16000 && fs != 32000 && fs != 48000) {
@@ -255,6 +258,25 @@ int NetEqImpl::GetAudio(AudioFrame* audio_frame,
              last_output_sample_rate_hz_ == 32000 ||
              last_output_sample_rate_hz_ == 48000)
       << "Unexpected sample rate " << last_output_sample_rate_hz_;
+
+  if (!output_delay_chain_.empty()) {
+    if (output_delay_chain_empty_) {
+      for (auto& f : output_delay_chain_) {
+        f.CopyFrom(*audio_frame);
+      }
+      output_delay_chain_empty_ = false;
+      delayed_last_output_sample_rate_hz_ = last_output_sample_rate_hz_;
+    } else {
+      RTC_DCHECK_GE(output_delay_chain_ix_, 0);
+      RTC_DCHECK_LT(output_delay_chain_ix_, output_delay_chain_.size());
+      swap(output_delay_chain_[output_delay_chain_ix_], *audio_frame);
+      *muted = audio_frame->muted();
+      output_delay_chain_ix_ =
+          (output_delay_chain_ix_ + 1) % output_delay_chain_.size();
+      delayed_last_output_sample_rate_hz_ = audio_frame->sample_rate_hz();
+    }
+  }
+
   return kOK;
 }
 
@@ -297,7 +319,8 @@ bool NetEqImpl::SetMinimumDelay(int delay_ms) {
   rtc::CritScope lock(&crit_sect_);
   if (delay_ms >= 0 && delay_ms <= 10000) {
     assert(controller_.get());
-    return controller_->SetMinimumDelay(delay_ms);
+    return controller_->SetMinimumDelay(
+        std::max(delay_ms - output_delay_chain_ms_, 0));
   }
   return false;
 }
@@ -306,7 +329,8 @@ bool NetEqImpl::SetMaximumDelay(int delay_ms) {
   rtc::CritScope lock(&crit_sect_);
   if (delay_ms >= 0 && delay_ms <= 10000) {
     assert(controller_.get());
-    return controller_->SetMaximumDelay(delay_ms);
+    return controller_->SetMaximumDelay(
+        std::max(delay_ms - output_delay_chain_ms_, 0));
   }
   return false;
 }
@@ -327,7 +351,7 @@ int NetEqImpl::GetBaseMinimumDelayMs() const {
 int NetEqImpl::TargetDelayMs() const {
   rtc::CritScope lock(&crit_sect_);
   RTC_DCHECK(controller_.get());
-  return controller_->TargetLevelMs();
+  return controller_->TargetLevelMs() + output_delay_chain_ms_;
 }
 
 int NetEqImpl::FilteredCurrentDelayMs() const {
@@ -337,7 +361,8 @@ int NetEqImpl::FilteredCurrentDelayMs() const {
   const int delay_samples =
       controller_->GetFilteredBufferLevel() + sync_buffer_->FutureLength();
   // The division below will truncate. The return value is in ms.
-  return delay_samples / rtc::CheckedDivExact(fs_hz_, 1000);
+  return delay_samples / rtc::CheckedDivExact(fs_hz_, 1000) +
+         output_delay_chain_ms_;
 }
 
 int NetEqImpl::NetworkStatistics(NetEqNetworkStatistics* stats) {
@@ -351,6 +376,13 @@ int NetEqImpl::NetworkStatistics(NetEqNetworkStatistics* stats) {
   stats->jitter_peaks_found = controller_->PeakFound();
   stats_->GetNetworkStatistics(fs_hz_, total_samples_in_buffers,
                                decoder_frame_length_, stats);
+  // Compensate for output delay chain.
+  stats->current_buffer_size_ms += output_delay_chain_ms_;
+  stats->preferred_buffer_size_ms += output_delay_chain_ms_;
+  stats->mean_waiting_time_ms += output_delay_chain_ms_;
+  stats->median_waiting_time_ms += output_delay_chain_ms_;
+  stats->min_waiting_time_ms += output_delay_chain_ms_;
+  stats->max_waiting_time_ms += output_delay_chain_ms_;
   return 0;
 }
 
@@ -394,12 +426,19 @@ absl::optional<uint32_t> NetEqImpl::GetPlayoutTimestamp() const {
     // which is indicated by returning an empty value.
     return absl::nullopt;
   }
-  return timestamp_scaler_->ToExternal(playout_timestamp_);
+  size_t sum_samples_in_output_delay_chain = 0;
+  for (const auto& audio_frame : output_delay_chain_) {
+    sum_samples_in_output_delay_chain += audio_frame.samples_per_channel();
+  }
+  return timestamp_scaler_->ToExternal(
+      playout_timestamp_ -
+      static_cast<uint32_t>(sum_samples_in_output_delay_chain));
 }
 
 int NetEqImpl::last_output_sample_rate_hz() const {
   rtc::CritScope lock(&crit_sect_);
-  return last_output_sample_rate_hz_;
+  return delayed_last_output_sample_rate_hz_.value_or(
+      last_output_sample_rate_hz_);
 }
 
 absl::optional<NetEq::DecoderFormat> NetEqImpl::GetDecoderFormat(
@@ -1988,8 +2027,9 @@ int NetEqImpl::ExtractPackets(size_t required_samples,
     extracted_samples = packet->timestamp - first_timestamp + packet_duration;
 
     RTC_DCHECK(controller_);
-    stats_->JitterBufferDelay(packet_duration, waiting_time_ms,
-                              controller_->TargetLevelMs());
+    stats_->JitterBufferDelay(
+        packet_duration, waiting_time_ms + output_delay_chain_ms_,
+        controller_->TargetLevelMs() + output_delay_chain_ms_);
 
     packet_list->push_back(std::move(*packet));  // Store packet in list.
     packet = absl::nullopt;  // Ensure it's never used after the move.
diff --git a/modules/audio_coding/neteq/neteq_impl.h b/modules/audio_coding/neteq/neteq_impl.h
index 956cb6ef17..7d5ebabb4c 100644
--- a/modules/audio_coding/neteq/neteq_impl.h
+++ b/modules/audio_coding/neteq/neteq_impl.h
@@ -402,6 +402,22 @@ class NetEqImpl : public webrtc::NetEq {
   bool no_time_stretching_ RTC_GUARDED_BY(crit_sect_);  // Only used for test.
   rtc::BufferT<int16_t> concealment_audio_ RTC_GUARDED_BY(crit_sect_);
   const bool enable_rtx_handling_ RTC_GUARDED_BY(crit_sect_);
+  // Data members used for adding extra delay to the output of NetEq.
+  // Vector of AudioFrames which contains the delayed audio. Accessed as a
+  // circular buffer.
+  std::vector<AudioFrame> output_delay_chain_ RTC_GUARDED_BY(crit_sect_);
+  // Index into output_delay_chain_.
+  size_t output_delay_chain_ix_ RTC_GUARDED_BY(crit_sect_) = 0;
+  // The delay in ms (which is 10 times the number of elements in
+  // output_delay_chain_).
+  const int output_delay_chain_ms_ RTC_GUARDED_BY(crit_sect_);
+  // Did output_delay_chain_ get populated yet?
+  bool output_delay_chain_empty_ RTC_GUARDED_BY(crit_sect_) = true;
+  // Contains the sample rate of the AudioFrame last emitted from the delay
+  // chain. If the extra output delay chain is not used, or if no audio has been
+  // emitted yet, the variable is empty.
+  absl::optional<int> delayed_last_output_sample_rate_hz_
+      RTC_GUARDED_BY(crit_sect_);
 
  private:
   RTC_DISALLOW_COPY_AND_ASSIGN(NetEqImpl);
diff --git a/modules/audio_coding/neteq/neteq_unittest.cc b/modules/audio_coding/neteq/neteq_unittest.cc
index d78e2c6488..f92ed1b2ac 100644
--- a/modules/audio_coding/neteq/neteq_unittest.cc
+++ b/modules/audio_coding/neteq/neteq_unittest.cc
@@ -1102,5 +1102,156 @@ TEST(NetEqNoTimeStretchingMode, RunTest) {
   EXPECT_EQ(0, stats.preemptive_rate);
 }
 
+namespace {
+// Helper classes and data types and functions for NetEqOutputDelayTest.
+
+class VectorAudioSink : public AudioSink {
+ public:
+  // Does not take ownership of the vector.
+  VectorAudioSink(std::vector<int16_t>* output_vector) : v_(output_vector) {}
+
+  virtual ~VectorAudioSink() = default;
+
+  bool WriteArray(const int16_t* audio, size_t num_samples) override {
+    v_->reserve(v_->size() + num_samples);
+    for (size_t i = 0; i < num_samples; ++i) {
+      v_->push_back(audio[i]);
+    }
+    return true;
+  }
+
+ private:
+  std::vector<int16_t>* const v_;
+};
+
+struct TestResult {
+  NetEqLifetimeStatistics lifetime_stats;
+  NetEqNetworkStatistics network_stats;
+  absl::optional<uint32_t> playout_timestamp;
+  int target_delay_ms;
+  int filtered_current_delay_ms;
+  int sample_rate_hz;
+};
+
+// This class is used as callback object to NetEqTest to collect some stats
+// at the end of the simulation.
+class SimEndStatsCollector : public NetEqSimulationEndedCallback {
+ public:
+  SimEndStatsCollector(TestResult& result) : result_(result) {}
+
+  void SimulationEnded(int64_t /*simulation_time_ms*/, NetEq* neteq) override {
+    result_.playout_timestamp = neteq->GetPlayoutTimestamp();
+    result_.target_delay_ms = neteq->TargetDelayMs();
+    result_.filtered_current_delay_ms = neteq->FilteredCurrentDelayMs();
+    result_.sample_rate_hz = neteq->last_output_sample_rate_hz();
+  }
+
+ private:
+  TestResult& result_;
+};
+
+TestResult DelayLineNetEqTest(int delay_ms,
+                              std::vector<int16_t>* output_vector) {
+  NetEq::Config config;
+  config.for_test_no_time_stretching = true;
+  config.extra_output_delay_ms = delay_ms;
+  auto codecs = NetEqTest::StandardDecoderMap();
+  NetEqPacketSourceInput::RtpHeaderExtensionMap rtp_ext_map = {
+      {1, kRtpExtensionAudioLevel},
+      {3, kRtpExtensionAbsoluteSendTime},
+      {5, kRtpExtensionTransportSequenceNumber},
+      {7, kRtpExtensionVideoContentType},
+      {8, kRtpExtensionVideoTiming}};
+  std::unique_ptr<NetEqInput> input = std::make_unique<NetEqRtpDumpInput>(
+      webrtc::test::ResourcePath("audio_coding/neteq_universal_new", "rtp"),
+      rtp_ext_map, absl::nullopt /*No SSRC filter*/);
+  std::unique_ptr<TimeLimitedNetEqInput> input_time_limit(
+      new TimeLimitedNetEqInput(std::move(input), 10000));
+  std::unique_ptr<AudioSink> output =
+      std::make_unique<VectorAudioSink>(output_vector);
+
+  TestResult result;
+  SimEndStatsCollector stats_collector(result);
+  NetEqTest::Callbacks callbacks;
+  callbacks.simulation_ended_callback = &stats_collector;
+
+  NetEqTest test(config, CreateBuiltinAudioDecoderFactory(), codecs,
+                 /*text_log=*/nullptr, /*neteq_factory=*/nullptr,
+                 /*input=*/std::move(input_time_limit), std::move(output),
+                 callbacks);
+  test.Run();
+  result.lifetime_stats = test.LifetimeStats();
+  result.network_stats = test.SimulationStats();
+  return result;
+}
+}  // namespace
+
+// Tests the extra output delay functionality of NetEq.
+TEST(NetEqOutputDelayTest, RunTest) {
+  std::vector<int16_t> output;
+  const auto result_no_delay = DelayLineNetEqTest(0, &output);
+  std::vector<int16_t> output_delayed;
+  constexpr int kDelayMs = 100;
+  const auto result_delay = DelayLineNetEqTest(kDelayMs, &output_delayed);
+
+  // Verify that the loss concealment remains unchanged. The point of the delay
+  // is to not affect the jitter buffering behavior.
+  // First verify that there are concealments in the test.
+  EXPECT_GT(result_no_delay.lifetime_stats.concealed_samples, 0u);
+  // And that not all of the output is concealment.
+  EXPECT_GT(result_no_delay.lifetime_stats.total_samples_received,
+            result_no_delay.lifetime_stats.concealed_samples);
+  // Now verify that they remain unchanged by the delay.
+  EXPECT_EQ(result_no_delay.lifetime_stats.concealed_samples,
+            result_delay.lifetime_stats.concealed_samples);
+  // Accelerate and pre-emptive expand should also be unchanged.
+  EXPECT_EQ(result_no_delay.lifetime_stats.inserted_samples_for_deceleration,
+            result_delay.lifetime_stats.inserted_samples_for_deceleration);
+  EXPECT_EQ(result_no_delay.lifetime_stats.removed_samples_for_acceleration,
+            result_delay.lifetime_stats.removed_samples_for_acceleration);
+  // Verify that delay stats are increased with the delay chain.
+  EXPECT_EQ(
+      result_no_delay.lifetime_stats.jitter_buffer_delay_ms +
+          kDelayMs * result_no_delay.lifetime_stats.jitter_buffer_emitted_count,
+      result_delay.lifetime_stats.jitter_buffer_delay_ms);
+  EXPECT_EQ(
+      result_no_delay.lifetime_stats.jitter_buffer_target_delay_ms +
+          kDelayMs * result_no_delay.lifetime_stats.jitter_buffer_emitted_count,
+      result_delay.lifetime_stats.jitter_buffer_target_delay_ms);
+  EXPECT_EQ(result_no_delay.network_stats.current_buffer_size_ms + kDelayMs,
+            result_delay.network_stats.current_buffer_size_ms);
+  EXPECT_EQ(result_no_delay.network_stats.preferred_buffer_size_ms + kDelayMs,
+            result_delay.network_stats.preferred_buffer_size_ms);
+  EXPECT_EQ(result_no_delay.network_stats.mean_waiting_time_ms + kDelayMs,
+            result_delay.network_stats.mean_waiting_time_ms);
+  EXPECT_EQ(result_no_delay.network_stats.median_waiting_time_ms + kDelayMs,
+            result_delay.network_stats.median_waiting_time_ms);
+  EXPECT_EQ(result_no_delay.network_stats.min_waiting_time_ms + kDelayMs,
+            result_delay.network_stats.min_waiting_time_ms);
+  EXPECT_EQ(result_no_delay.network_stats.max_waiting_time_ms + kDelayMs,
+            result_delay.network_stats.max_waiting_time_ms);
+
+  ASSERT_TRUE(result_no_delay.playout_timestamp);
+  ASSERT_TRUE(result_delay.playout_timestamp);
+  EXPECT_EQ(*result_no_delay.playout_timestamp -
+                static_cast<uint32_t>(
+                    kDelayMs *
+                    rtc::CheckedDivExact(result_no_delay.sample_rate_hz, 1000)),
+            *result_delay.playout_timestamp);
+  EXPECT_EQ(result_no_delay.target_delay_ms + kDelayMs,
+            result_delay.target_delay_ms);
+  EXPECT_EQ(result_no_delay.filtered_current_delay_ms + kDelayMs,
+            result_delay.filtered_current_delay_ms);
+
+  // Verify expected delay in decoded signal. The test vector uses 8 kHz sample
+  // rate, so the delay will be 8 times the delay in ms.
+  constexpr size_t kExpectedDelaySamples = kDelayMs * 8;
+  for (size_t i = 0;
+       i < output.size() && i + kExpectedDelaySamples < output_delayed.size();
+       ++i) {
+    EXPECT_EQ(output[i], output_delayed[i + kExpectedDelaySamples]);
+  }
+}
+
 }  // namespace test
 }  // namespace webrtc
diff --git a/modules/audio_coding/neteq/tools/neteq_stats_plotter.cc b/modules/audio_coding/neteq/tools/neteq_stats_plotter.cc
index 3f06b1cfc4..337f54ed6e 100644
--- a/modules/audio_coding/neteq/tools/neteq_stats_plotter.cc
+++ b/modules/audio_coding/neteq/tools/neteq_stats_plotter.cc
@@ -33,7 +33,8 @@ NetEqStatsPlotter::NetEqStatsPlotter(bool make_matlab_plot,
   stats_getter_.reset(new NetEqStatsGetter(std::move(delay_analyzer)));
 }
 
-void NetEqStatsPlotter::SimulationEnded(int64_t simulation_time_ms) {
+void NetEqStatsPlotter::SimulationEnded(int64_t simulation_time_ms,
+                                        NetEq* /*neteq*/) {
   if (make_matlab_plot_) {
     auto matlab_script_name = base_file_name_;
     std::replace(matlab_script_name.begin(), matlab_script_name.end(), '.',
diff --git a/modules/audio_coding/neteq/tools/neteq_stats_plotter.h b/modules/audio_coding/neteq/tools/neteq_stats_plotter.h
index c4df24e073..d6918670fd 100644
--- a/modules/audio_coding/neteq/tools/neteq_stats_plotter.h
+++ b/modules/audio_coding/neteq/tools/neteq_stats_plotter.h
@@ -28,7 +28,7 @@ class NetEqStatsPlotter : public NetEqSimulationEndedCallback {
                     bool show_concealment_events,
                     std::string base_file_name);
 
-  void SimulationEnded(int64_t simulation_time_ms) override;
+  void SimulationEnded(int64_t simulation_time_ms, NetEq* neteq) override;
 
   NetEqStatsGetter* stats_getter() { return stats_getter_.get(); }
 
diff --git a/modules/audio_coding/neteq/tools/neteq_test.cc b/modules/audio_coding/neteq/tools/neteq_test.cc
index f8b6161a98..a263a73721 100644
--- a/modules/audio_coding/neteq/tools/neteq_test.cc
+++ b/modules/audio_coding/neteq/tools/neteq_test.cc
@@ -91,7 +91,8 @@ int64_t NetEqTest::Run() {
     simulation_time += step_result.simulation_step_ms;
   } while (!step_result.is_simulation_finished);
   if (callbacks_.simulation_ended_callback) {
-    callbacks_.simulation_ended_callback->SimulationEnded(simulation_time);
+    callbacks_.simulation_ended_callback->SimulationEnded(simulation_time,
+                                                          neteq_.get());
   }
   return simulation_time;
 }
diff --git a/modules/audio_coding/neteq/tools/neteq_test.h b/modules/audio_coding/neteq/tools/neteq_test.h
index 0a6c24f3d6..3b787a6cfb 100644
--- a/modules/audio_coding/neteq/tools/neteq_test.h
+++ b/modules/audio_coding/neteq/tools/neteq_test.h
@@ -61,7 +61,7 @@ class NetEqGetAudioCallback {
 class NetEqSimulationEndedCallback {
  public:
   virtual ~NetEqSimulationEndedCallback() = default;
-  virtual void SimulationEnded(int64_t simulation_time_ms) = 0;
+  virtual void SimulationEnded(int64_t simulation_time_ms, NetEq* neteq) = 0;
 };
 
 // Class that provides an input--output test for NetEq. The input (both packets
author	Henrik Lundin <henrik.lundin@webrtc.org>	2020-05-25 11:26:15 +0200
committer	Commit Bot <commit-bot@chromium.org>	2020-05-25 12:03:39 +0000
commit	c49e9c253f53d7c01ce727ab84b4b321ae745669 (patch)
tree	2b868917f1600442c5b22ff085ed4d7e6e4c1469 /modules/audio_coding
parent	848ea9f0d3678118cb8926a2898454e5a4df58ae (diff)
download	webrtc-c49e9c253f53d7c01ce727ab84b4b321ae745669.tar.gz