diff options
author | Henrik Lundin <henrik.lundin@webrtc.org> | 2020-05-25 11:26:15 +0200 |
---|---|---|
committer | Commit Bot <commit-bot@chromium.org> | 2020-05-25 12:03:39 +0000 |
commit | c49e9c253f53d7c01ce727ab84b4b321ae745669 (patch) | |
tree | 2b868917f1600442c5b22ff085ed4d7e6e4c1469 /modules/audio_coding | |
parent | 848ea9f0d3678118cb8926a2898454e5a4df58ae (diff) | |
download | webrtc-c49e9c253f53d7c01ce727ab84b4b321ae745669.tar.gz |
Adding a delay line to NetEq's output
This change adds an optional delay to NetEq's output. Note, this is not
equivalent to increasing the jitter buffer with the same extra length.
Bug: b/156734419
Change-Id: I8b70b6b3bffcfd3da296ccf29853864baa03d6bb
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/175110
Commit-Queue: Henrik Lundin <henrik.lundin@webrtc.org>
Reviewed-by: Karl Wiberg <kwiberg@webrtc.org>
Reviewed-by: Ivo Creusen <ivoc@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#31343}
Diffstat (limited to 'modules/audio_coding')
-rw-r--r-- | modules/audio_coding/neteq/neteq_impl.cc | 58 | ||||
-rw-r--r-- | modules/audio_coding/neteq/neteq_impl.h | 16 | ||||
-rw-r--r-- | modules/audio_coding/neteq/neteq_unittest.cc | 151 | ||||
-rw-r--r-- | modules/audio_coding/neteq/tools/neteq_stats_plotter.cc | 3 | ||||
-rw-r--r-- | modules/audio_coding/neteq/tools/neteq_stats_plotter.h | 2 | ||||
-rw-r--r-- | modules/audio_coding/neteq/tools/neteq_test.cc | 3 | ||||
-rw-r--r-- | modules/audio_coding/neteq/tools/neteq_test.h | 2 |
7 files changed, 222 insertions, 13 deletions
diff --git a/modules/audio_coding/neteq/neteq_impl.cc b/modules/audio_coding/neteq/neteq_impl.cc index 0b7510d341..163a287a5e 100644 --- a/modules/audio_coding/neteq/neteq_impl.cc +++ b/modules/audio_coding/neteq/neteq_impl.cc @@ -140,7 +140,10 @@ NetEqImpl::NetEqImpl(const NetEq::Config& config, 10, // Report once every 10 s. tick_timer_.get()), no_time_stretching_(config.for_test_no_time_stretching), - enable_rtx_handling_(config.enable_rtx_handling) { + enable_rtx_handling_(config.enable_rtx_handling), + output_delay_chain_( + rtc::CheckedDivExact(config.extra_output_delay_ms, 10)), + output_delay_chain_ms_(config.extra_output_delay_ms) { RTC_LOG(LS_INFO) << "NetEq config: " << config.ToString(); int fs = config.sample_rate_hz; if (fs != 8000 && fs != 16000 && fs != 32000 && fs != 48000) { @@ -255,6 +258,25 @@ int NetEqImpl::GetAudio(AudioFrame* audio_frame, last_output_sample_rate_hz_ == 32000 || last_output_sample_rate_hz_ == 48000) << "Unexpected sample rate " << last_output_sample_rate_hz_; + + if (!output_delay_chain_.empty()) { + if (output_delay_chain_empty_) { + for (auto& f : output_delay_chain_) { + f.CopyFrom(*audio_frame); + } + output_delay_chain_empty_ = false; + delayed_last_output_sample_rate_hz_ = last_output_sample_rate_hz_; + } else { + RTC_DCHECK_GE(output_delay_chain_ix_, 0); + RTC_DCHECK_LT(output_delay_chain_ix_, output_delay_chain_.size()); + swap(output_delay_chain_[output_delay_chain_ix_], *audio_frame); + *muted = audio_frame->muted(); + output_delay_chain_ix_ = + (output_delay_chain_ix_ + 1) % output_delay_chain_.size(); + delayed_last_output_sample_rate_hz_ = audio_frame->sample_rate_hz(); + } + } + return kOK; } @@ -297,7 +319,8 @@ bool NetEqImpl::SetMinimumDelay(int delay_ms) { rtc::CritScope lock(&crit_sect_); if (delay_ms >= 0 && delay_ms <= 10000) { assert(controller_.get()); - return controller_->SetMinimumDelay(delay_ms); + return controller_->SetMinimumDelay( + std::max(delay_ms - output_delay_chain_ms_, 0)); } return false; } @@ -306,7 +329,8 @@ bool NetEqImpl::SetMaximumDelay(int delay_ms) { rtc::CritScope lock(&crit_sect_); if (delay_ms >= 0 && delay_ms <= 10000) { assert(controller_.get()); - return controller_->SetMaximumDelay(delay_ms); + return controller_->SetMaximumDelay( + std::max(delay_ms - output_delay_chain_ms_, 0)); } return false; } @@ -327,7 +351,7 @@ int NetEqImpl::GetBaseMinimumDelayMs() const { int NetEqImpl::TargetDelayMs() const { rtc::CritScope lock(&crit_sect_); RTC_DCHECK(controller_.get()); - return controller_->TargetLevelMs(); + return controller_->TargetLevelMs() + output_delay_chain_ms_; } int NetEqImpl::FilteredCurrentDelayMs() const { @@ -337,7 +361,8 @@ int NetEqImpl::FilteredCurrentDelayMs() const { const int delay_samples = controller_->GetFilteredBufferLevel() + sync_buffer_->FutureLength(); // The division below will truncate. The return value is in ms. - return delay_samples / rtc::CheckedDivExact(fs_hz_, 1000); + return delay_samples / rtc::CheckedDivExact(fs_hz_, 1000) + + output_delay_chain_ms_; } int NetEqImpl::NetworkStatistics(NetEqNetworkStatistics* stats) { @@ -351,6 +376,13 @@ int NetEqImpl::NetworkStatistics(NetEqNetworkStatistics* stats) { stats->jitter_peaks_found = controller_->PeakFound(); stats_->GetNetworkStatistics(fs_hz_, total_samples_in_buffers, decoder_frame_length_, stats); + // Compensate for output delay chain. + stats->current_buffer_size_ms += output_delay_chain_ms_; + stats->preferred_buffer_size_ms += output_delay_chain_ms_; + stats->mean_waiting_time_ms += output_delay_chain_ms_; + stats->median_waiting_time_ms += output_delay_chain_ms_; + stats->min_waiting_time_ms += output_delay_chain_ms_; + stats->max_waiting_time_ms += output_delay_chain_ms_; return 0; } @@ -394,12 +426,19 @@ absl::optional<uint32_t> NetEqImpl::GetPlayoutTimestamp() const { // which is indicated by returning an empty value. return absl::nullopt; } - return timestamp_scaler_->ToExternal(playout_timestamp_); + size_t sum_samples_in_output_delay_chain = 0; + for (const auto& audio_frame : output_delay_chain_) { + sum_samples_in_output_delay_chain += audio_frame.samples_per_channel(); + } + return timestamp_scaler_->ToExternal( + playout_timestamp_ - + static_cast<uint32_t>(sum_samples_in_output_delay_chain)); } int NetEqImpl::last_output_sample_rate_hz() const { rtc::CritScope lock(&crit_sect_); - return last_output_sample_rate_hz_; + return delayed_last_output_sample_rate_hz_.value_or( + last_output_sample_rate_hz_); } absl::optional<NetEq::DecoderFormat> NetEqImpl::GetDecoderFormat( @@ -1988,8 +2027,9 @@ int NetEqImpl::ExtractPackets(size_t required_samples, extracted_samples = packet->timestamp - first_timestamp + packet_duration; RTC_DCHECK(controller_); - stats_->JitterBufferDelay(packet_duration, waiting_time_ms, - controller_->TargetLevelMs()); + stats_->JitterBufferDelay( + packet_duration, waiting_time_ms + output_delay_chain_ms_, + controller_->TargetLevelMs() + output_delay_chain_ms_); packet_list->push_back(std::move(*packet)); // Store packet in list. packet = absl::nullopt; // Ensure it's never used after the move. diff --git a/modules/audio_coding/neteq/neteq_impl.h b/modules/audio_coding/neteq/neteq_impl.h index 956cb6ef17..7d5ebabb4c 100644 --- a/modules/audio_coding/neteq/neteq_impl.h +++ b/modules/audio_coding/neteq/neteq_impl.h @@ -402,6 +402,22 @@ class NetEqImpl : public webrtc::NetEq { bool no_time_stretching_ RTC_GUARDED_BY(crit_sect_); // Only used for test. rtc::BufferT<int16_t> concealment_audio_ RTC_GUARDED_BY(crit_sect_); const bool enable_rtx_handling_ RTC_GUARDED_BY(crit_sect_); + // Data members used for adding extra delay to the output of NetEq. + // Vector of AudioFrames which contains the delayed audio. Accessed as a + // circular buffer. + std::vector<AudioFrame> output_delay_chain_ RTC_GUARDED_BY(crit_sect_); + // Index into output_delay_chain_. + size_t output_delay_chain_ix_ RTC_GUARDED_BY(crit_sect_) = 0; + // The delay in ms (which is 10 times the number of elements in + // output_delay_chain_). + const int output_delay_chain_ms_ RTC_GUARDED_BY(crit_sect_); + // Did output_delay_chain_ get populated yet? + bool output_delay_chain_empty_ RTC_GUARDED_BY(crit_sect_) = true; + // Contains the sample rate of the AudioFrame last emitted from the delay + // chain. If the extra output delay chain is not used, or if no audio has been + // emitted yet, the variable is empty. + absl::optional<int> delayed_last_output_sample_rate_hz_ + RTC_GUARDED_BY(crit_sect_); private: RTC_DISALLOW_COPY_AND_ASSIGN(NetEqImpl); diff --git a/modules/audio_coding/neteq/neteq_unittest.cc b/modules/audio_coding/neteq/neteq_unittest.cc index d78e2c6488..f92ed1b2ac 100644 --- a/modules/audio_coding/neteq/neteq_unittest.cc +++ b/modules/audio_coding/neteq/neteq_unittest.cc @@ -1102,5 +1102,156 @@ TEST(NetEqNoTimeStretchingMode, RunTest) { EXPECT_EQ(0, stats.preemptive_rate); } +namespace { +// Helper classes and data types and functions for NetEqOutputDelayTest. + +class VectorAudioSink : public AudioSink { + public: + // Does not take ownership of the vector. + VectorAudioSink(std::vector<int16_t>* output_vector) : v_(output_vector) {} + + virtual ~VectorAudioSink() = default; + + bool WriteArray(const int16_t* audio, size_t num_samples) override { + v_->reserve(v_->size() + num_samples); + for (size_t i = 0; i < num_samples; ++i) { + v_->push_back(audio[i]); + } + return true; + } + + private: + std::vector<int16_t>* const v_; +}; + +struct TestResult { + NetEqLifetimeStatistics lifetime_stats; + NetEqNetworkStatistics network_stats; + absl::optional<uint32_t> playout_timestamp; + int target_delay_ms; + int filtered_current_delay_ms; + int sample_rate_hz; +}; + +// This class is used as callback object to NetEqTest to collect some stats +// at the end of the simulation. +class SimEndStatsCollector : public NetEqSimulationEndedCallback { + public: + SimEndStatsCollector(TestResult& result) : result_(result) {} + + void SimulationEnded(int64_t /*simulation_time_ms*/, NetEq* neteq) override { + result_.playout_timestamp = neteq->GetPlayoutTimestamp(); + result_.target_delay_ms = neteq->TargetDelayMs(); + result_.filtered_current_delay_ms = neteq->FilteredCurrentDelayMs(); + result_.sample_rate_hz = neteq->last_output_sample_rate_hz(); + } + + private: + TestResult& result_; +}; + +TestResult DelayLineNetEqTest(int delay_ms, + std::vector<int16_t>* output_vector) { + NetEq::Config config; + config.for_test_no_time_stretching = true; + config.extra_output_delay_ms = delay_ms; + auto codecs = NetEqTest::StandardDecoderMap(); + NetEqPacketSourceInput::RtpHeaderExtensionMap rtp_ext_map = { + {1, kRtpExtensionAudioLevel}, + {3, kRtpExtensionAbsoluteSendTime}, + {5, kRtpExtensionTransportSequenceNumber}, + {7, kRtpExtensionVideoContentType}, + {8, kRtpExtensionVideoTiming}}; + std::unique_ptr<NetEqInput> input = std::make_unique<NetEqRtpDumpInput>( + webrtc::test::ResourcePath("audio_coding/neteq_universal_new", "rtp"), + rtp_ext_map, absl::nullopt /*No SSRC filter*/); + std::unique_ptr<TimeLimitedNetEqInput> input_time_limit( + new TimeLimitedNetEqInput(std::move(input), 10000)); + std::unique_ptr<AudioSink> output = + std::make_unique<VectorAudioSink>(output_vector); + + TestResult result; + SimEndStatsCollector stats_collector(result); + NetEqTest::Callbacks callbacks; + callbacks.simulation_ended_callback = &stats_collector; + + NetEqTest test(config, CreateBuiltinAudioDecoderFactory(), codecs, + /*text_log=*/nullptr, /*neteq_factory=*/nullptr, + /*input=*/std::move(input_time_limit), std::move(output), + callbacks); + test.Run(); + result.lifetime_stats = test.LifetimeStats(); + result.network_stats = test.SimulationStats(); + return result; +} +} // namespace + +// Tests the extra output delay functionality of NetEq. +TEST(NetEqOutputDelayTest, RunTest) { + std::vector<int16_t> output; + const auto result_no_delay = DelayLineNetEqTest(0, &output); + std::vector<int16_t> output_delayed; + constexpr int kDelayMs = 100; + const auto result_delay = DelayLineNetEqTest(kDelayMs, &output_delayed); + + // Verify that the loss concealment remains unchanged. The point of the delay + // is to not affect the jitter buffering behavior. + // First verify that there are concealments in the test. + EXPECT_GT(result_no_delay.lifetime_stats.concealed_samples, 0u); + // And that not all of the output is concealment. + EXPECT_GT(result_no_delay.lifetime_stats.total_samples_received, + result_no_delay.lifetime_stats.concealed_samples); + // Now verify that they remain unchanged by the delay. + EXPECT_EQ(result_no_delay.lifetime_stats.concealed_samples, + result_delay.lifetime_stats.concealed_samples); + // Accelerate and pre-emptive expand should also be unchanged. + EXPECT_EQ(result_no_delay.lifetime_stats.inserted_samples_for_deceleration, + result_delay.lifetime_stats.inserted_samples_for_deceleration); + EXPECT_EQ(result_no_delay.lifetime_stats.removed_samples_for_acceleration, + result_delay.lifetime_stats.removed_samples_for_acceleration); + // Verify that delay stats are increased with the delay chain. + EXPECT_EQ( + result_no_delay.lifetime_stats.jitter_buffer_delay_ms + + kDelayMs * result_no_delay.lifetime_stats.jitter_buffer_emitted_count, + result_delay.lifetime_stats.jitter_buffer_delay_ms); + EXPECT_EQ( + result_no_delay.lifetime_stats.jitter_buffer_target_delay_ms + + kDelayMs * result_no_delay.lifetime_stats.jitter_buffer_emitted_count, + result_delay.lifetime_stats.jitter_buffer_target_delay_ms); + EXPECT_EQ(result_no_delay.network_stats.current_buffer_size_ms + kDelayMs, + result_delay.network_stats.current_buffer_size_ms); + EXPECT_EQ(result_no_delay.network_stats.preferred_buffer_size_ms + kDelayMs, + result_delay.network_stats.preferred_buffer_size_ms); + EXPECT_EQ(result_no_delay.network_stats.mean_waiting_time_ms + kDelayMs, + result_delay.network_stats.mean_waiting_time_ms); + EXPECT_EQ(result_no_delay.network_stats.median_waiting_time_ms + kDelayMs, + result_delay.network_stats.median_waiting_time_ms); + EXPECT_EQ(result_no_delay.network_stats.min_waiting_time_ms + kDelayMs, + result_delay.network_stats.min_waiting_time_ms); + EXPECT_EQ(result_no_delay.network_stats.max_waiting_time_ms + kDelayMs, + result_delay.network_stats.max_waiting_time_ms); + + ASSERT_TRUE(result_no_delay.playout_timestamp); + ASSERT_TRUE(result_delay.playout_timestamp); + EXPECT_EQ(*result_no_delay.playout_timestamp - + static_cast<uint32_t>( + kDelayMs * + rtc::CheckedDivExact(result_no_delay.sample_rate_hz, 1000)), + *result_delay.playout_timestamp); + EXPECT_EQ(result_no_delay.target_delay_ms + kDelayMs, + result_delay.target_delay_ms); + EXPECT_EQ(result_no_delay.filtered_current_delay_ms + kDelayMs, + result_delay.filtered_current_delay_ms); + + // Verify expected delay in decoded signal. The test vector uses 8 kHz sample + // rate, so the delay will be 8 times the delay in ms. + constexpr size_t kExpectedDelaySamples = kDelayMs * 8; + for (size_t i = 0; + i < output.size() && i + kExpectedDelaySamples < output_delayed.size(); + ++i) { + EXPECT_EQ(output[i], output_delayed[i + kExpectedDelaySamples]); + } +} + } // namespace test } // namespace webrtc diff --git a/modules/audio_coding/neteq/tools/neteq_stats_plotter.cc b/modules/audio_coding/neteq/tools/neteq_stats_plotter.cc index 3f06b1cfc4..337f54ed6e 100644 --- a/modules/audio_coding/neteq/tools/neteq_stats_plotter.cc +++ b/modules/audio_coding/neteq/tools/neteq_stats_plotter.cc @@ -33,7 +33,8 @@ NetEqStatsPlotter::NetEqStatsPlotter(bool make_matlab_plot, stats_getter_.reset(new NetEqStatsGetter(std::move(delay_analyzer))); } -void NetEqStatsPlotter::SimulationEnded(int64_t simulation_time_ms) { +void NetEqStatsPlotter::SimulationEnded(int64_t simulation_time_ms, + NetEq* /*neteq*/) { if (make_matlab_plot_) { auto matlab_script_name = base_file_name_; std::replace(matlab_script_name.begin(), matlab_script_name.end(), '.', diff --git a/modules/audio_coding/neteq/tools/neteq_stats_plotter.h b/modules/audio_coding/neteq/tools/neteq_stats_plotter.h index c4df24e073..d6918670fd 100644 --- a/modules/audio_coding/neteq/tools/neteq_stats_plotter.h +++ b/modules/audio_coding/neteq/tools/neteq_stats_plotter.h @@ -28,7 +28,7 @@ class NetEqStatsPlotter : public NetEqSimulationEndedCallback { bool show_concealment_events, std::string base_file_name); - void SimulationEnded(int64_t simulation_time_ms) override; + void SimulationEnded(int64_t simulation_time_ms, NetEq* neteq) override; NetEqStatsGetter* stats_getter() { return stats_getter_.get(); } diff --git a/modules/audio_coding/neteq/tools/neteq_test.cc b/modules/audio_coding/neteq/tools/neteq_test.cc index f8b6161a98..a263a73721 100644 --- a/modules/audio_coding/neteq/tools/neteq_test.cc +++ b/modules/audio_coding/neteq/tools/neteq_test.cc @@ -91,7 +91,8 @@ int64_t NetEqTest::Run() { simulation_time += step_result.simulation_step_ms; } while (!step_result.is_simulation_finished); if (callbacks_.simulation_ended_callback) { - callbacks_.simulation_ended_callback->SimulationEnded(simulation_time); + callbacks_.simulation_ended_callback->SimulationEnded(simulation_time, + neteq_.get()); } return simulation_time; } diff --git a/modules/audio_coding/neteq/tools/neteq_test.h b/modules/audio_coding/neteq/tools/neteq_test.h index 0a6c24f3d6..3b787a6cfb 100644 --- a/modules/audio_coding/neteq/tools/neteq_test.h +++ b/modules/audio_coding/neteq/tools/neteq_test.h @@ -61,7 +61,7 @@ class NetEqGetAudioCallback { class NetEqSimulationEndedCallback { public: virtual ~NetEqSimulationEndedCallback() = default; - virtual void SimulationEnded(int64_t simulation_time_ms) = 0; + virtual void SimulationEnded(int64_t simulation_time_ms, NetEq* neteq) = 0; }; // Class that provides an input--output test for NetEq. The input (both packets |