diff options
Diffstat (limited to 'modules/audio_processing')
58 files changed, 3273 insertions, 420 deletions
diff --git a/modules/audio_processing/aec3/aec_state.h b/modules/audio_processing/aec3/aec_state.h index 125ae83a2b..e2f70a4c68 100644 --- a/modules/audio_processing/aec3/aec_state.h +++ b/modules/audio_processing/aec3/aec_state.h @@ -75,6 +75,12 @@ class AecState { return erle_estimator_.Erle(onset_compensated); } + // Returns the non-capped ERLE. + rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> ErleUnbounded() + const { + return erle_estimator_.ErleUnbounded(); + } + // Returns the fullband ERLE estimate in log2 units. float FullBandErleLog2() const { return erle_estimator_.FullbandErleLog2(); } diff --git a/modules/audio_processing/aec3/echo_canceller3.cc b/modules/audio_processing/aec3/echo_canceller3.cc index 35a2cff7ea..181b649f6d 100644 --- a/modules/audio_processing/aec3/echo_canceller3.cc +++ b/modules/audio_processing/aec3/echo_canceller3.cc @@ -49,7 +49,11 @@ void RetrieveFieldTrialValue(const char* trial_name, ParseFieldTrial({&field_trial_param}, field_trial_str); float field_trial_value = static_cast<float>(field_trial_param.Get()); - if (field_trial_value >= min && field_trial_value <= max) { + if (field_trial_value >= min && field_trial_value <= max && + field_trial_value != *value_to_update) { + RTC_LOG(LS_INFO) << "Key " << trial_name + << " changing AEC3 parameter value from " + << *value_to_update << " to " << field_trial_value; *value_to_update = field_trial_value; } } @@ -65,7 +69,11 @@ void RetrieveFieldTrialValue(const char* trial_name, ParseFieldTrial({&field_trial_param}, field_trial_str); float field_trial_value = field_trial_param.Get(); - if (field_trial_value >= min && field_trial_value <= max) { + if (field_trial_value >= min && field_trial_value <= max && + field_trial_value != *value_to_update) { + RTC_LOG(LS_INFO) << "Key " << trial_name + << " changing AEC3 parameter value from " + << *value_to_update << " to " << field_trial_value; *value_to_update = field_trial_value; } } @@ -572,6 +580,12 @@ EchoCanceller3Config AdjustConfig(const EchoCanceller3Config& config) { RetrieveFieldTrialValue("WebRTC-Aec3SuppressorEpStrengthDefaultLenOverride", -1.f, 1.f, &adjusted_cfg.ep_strength.default_len); + // Field trial-based overrides of individual delay estimator parameters. + RetrieveFieldTrialValue("WebRTC-Aec3DelayEstimateSmoothingOverride", 0.f, 1.f, + &adjusted_cfg.delay.delay_estimate_smoothing); + RetrieveFieldTrialValue( + "WebRTC-Aec3DelayEstimateSmoothingDelayFoundOverride", 0.f, 1.f, + &adjusted_cfg.delay.delay_estimate_smoothing_delay_found); return adjusted_cfg; } @@ -731,6 +745,10 @@ EchoCanceller3::EchoCanceller3(const EchoCanceller3Config& config, std::vector<std::vector<rtc::ArrayView<float>>>( 1, std::vector<rtc::ArrayView<float>>(num_capture_channels_)); } + + RTC_LOG(LS_INFO) << "AEC3 created with sample rate: " << sample_rate_hz_ + << " Hz, num render channels: " << num_render_channels_ + << ", num capture channels: " << num_capture_channels_; } EchoCanceller3::~EchoCanceller3() = default; diff --git a/modules/audio_processing/aec3/echo_path_delay_estimator.cc b/modules/audio_processing/aec3/echo_path_delay_estimator.cc index 2c987f9341..8a78834143 100644 --- a/modules/audio_processing/aec3/echo_path_delay_estimator.cc +++ b/modules/audio_processing/aec3/echo_path_delay_estimator.cc @@ -42,6 +42,7 @@ EchoPathDelayEstimator::EchoPathDelayEstimator( ? config.render_levels.poor_excitation_render_limit_ds8 : config.render_levels.poor_excitation_render_limit, config.delay.delay_estimate_smoothing, + config.delay.delay_estimate_smoothing_delay_found, config.delay.delay_candidate_detection_threshold), matched_filter_lag_aggregator_(data_dumper_, matched_filter_.GetMaxFilterLag(), @@ -71,7 +72,8 @@ absl::optional<DelayEstimate> EchoPathDelayEstimator::EstimateDelay( data_dumper_->DumpWav("aec3_capture_decimator_output", downsampled_capture.size(), downsampled_capture.data(), 16000 / down_sampling_factor_, 1); - matched_filter_.Update(render_buffer, downsampled_capture); + matched_filter_.Update(render_buffer, downsampled_capture, + matched_filter_lag_aggregator_.ReliableDelayFound()); absl::optional<DelayEstimate> aggregated_matched_filter_lag = matched_filter_lag_aggregator_.Aggregate( diff --git a/modules/audio_processing/aec3/echo_remover.cc b/modules/audio_processing/aec3/echo_remover.cc index 6c177c9a10..2bfaa951d8 100644 --- a/modules/audio_processing/aec3/echo_remover.cc +++ b/modules/audio_processing/aec3/echo_remover.cc @@ -172,6 +172,7 @@ class EchoRemoverImpl final : public EchoRemover { std::vector<std::array<float, kFftLengthBy2Plus1>> Y2_heap_; std::vector<std::array<float, kFftLengthBy2Plus1>> E2_heap_; std::vector<std::array<float, kFftLengthBy2Plus1>> R2_heap_; + std::vector<std::array<float, kFftLengthBy2Plus1>> R2_unbounded_heap_; std::vector<std::array<float, kFftLengthBy2Plus1>> S2_linear_heap_; std::vector<FftData> Y_heap_; std::vector<FftData> E_heap_; @@ -218,6 +219,7 @@ EchoRemoverImpl::EchoRemoverImpl(const EchoCanceller3Config& config, Y2_heap_(NumChannelsOnHeap(num_capture_channels_)), E2_heap_(NumChannelsOnHeap(num_capture_channels_)), R2_heap_(NumChannelsOnHeap(num_capture_channels_)), + R2_unbounded_heap_(NumChannelsOnHeap(num_capture_channels_)), S2_linear_heap_(NumChannelsOnHeap(num_capture_channels_)), Y_heap_(NumChannelsOnHeap(num_capture_channels_)), E_heap_(NumChannelsOnHeap(num_capture_channels_)), @@ -265,6 +267,8 @@ void EchoRemoverImpl::ProcessCapture( std::array<std::array<float, kFftLengthBy2Plus1>, kMaxNumChannelsOnStack> R2_stack; std::array<std::array<float, kFftLengthBy2Plus1>, kMaxNumChannelsOnStack> + R2_unbounded_stack; + std::array<std::array<float, kFftLengthBy2Plus1>, kMaxNumChannelsOnStack> S2_linear_stack; std::array<FftData, kMaxNumChannelsOnStack> Y_stack; std::array<FftData, kMaxNumChannelsOnStack> E_stack; @@ -280,6 +284,8 @@ void EchoRemoverImpl::ProcessCapture( E2_stack.data(), num_capture_channels_); rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>> R2( R2_stack.data(), num_capture_channels_); + rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>> R2_unbounded( + R2_unbounded_stack.data(), num_capture_channels_); rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>> S2_linear( S2_linear_stack.data(), num_capture_channels_); rtc::ArrayView<FftData> Y(Y_stack.data(), num_capture_channels_); @@ -301,6 +307,8 @@ void EchoRemoverImpl::ProcessCapture( E2_heap_.data(), num_capture_channels_); R2 = rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>>( R2_heap_.data(), num_capture_channels_); + R2_unbounded = rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>>( + R2_unbounded_heap_.data(), num_capture_channels_); S2_linear = rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>>( S2_linear_heap_.data(), num_capture_channels_); Y = rtc::ArrayView<FftData>(Y_heap_.data(), num_capture_channels_); @@ -406,8 +414,8 @@ void EchoRemoverImpl::ProcessCapture( if (capture_output_used_) { // Estimate the residual echo power. residual_echo_estimator_.Estimate(aec_state_, *render_buffer, S2_linear, Y2, - suppression_gain_.IsDominantNearend(), - R2); + suppression_gain_.IsDominantNearend(), R2, + R2_unbounded); // Suppressor nearend estimate. if (aec_state_.UsableLinearEstimate()) { @@ -430,7 +438,7 @@ void EchoRemoverImpl::ProcessCapture( // Compute preferred gains. float high_bands_gain; - suppression_gain_.GetGain(nearend_spectrum, echo_spectrum, R2, + suppression_gain_.GetGain(nearend_spectrum, echo_spectrum, R2, R2_unbounded, cng_.NoiseSpectrum(), render_signal_analyzer_, aec_state_, x, clock_drift, &high_bands_gain, &G); diff --git a/modules/audio_processing/aec3/erle_estimator.h b/modules/audio_processing/aec3/erle_estimator.h index cae896e82c..55797592a9 100644 --- a/modules/audio_processing/aec3/erle_estimator.h +++ b/modules/audio_processing/aec3/erle_estimator.h @@ -62,6 +62,18 @@ class ErleEstimator { : subband_erle_estimator_.Erle(onset_compensated); } + // Returns the non-capped subband ERLE. + rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> ErleUnbounded() + const { + // Unbounded ERLE is only used with the subband erle estimator where the + // ERLE is often capped at low values. When the signal dependent ERLE + // estimator is used the capped ERLE is returned. + return !signal_dependent_erle_estimator_ + ? subband_erle_estimator_.ErleUnbounded() + : signal_dependent_erle_estimator_->Erle( + /*onset_compensated=*/false); + } + // Returns the subband ERLE that are estimated during onsets (only used for // testing). rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> ErleDuringOnsets() diff --git a/modules/audio_processing/aec3/erle_estimator_unittest.cc b/modules/audio_processing/aec3/erle_estimator_unittest.cc index 6df71424bc..e38f2386f7 100644 --- a/modules/audio_processing/aec3/erle_estimator_unittest.cc +++ b/modules/audio_processing/aec3/erle_estimator_unittest.cc @@ -50,6 +50,16 @@ void VerifyErle( EXPECT_NEAR(kTrueErle, erle_time_domain, 0.5); } +void VerifyErleGreaterOrEqual( + rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> erle1, + rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> erle2) { + for (size_t ch = 0; ch < erle1.size(); ++ch) { + for (size_t i = 0; i < kFftLengthBy2Plus1; ++i) { + EXPECT_GE(erle1[ch][i], erle2[ch][i]); + } + } +} + void FormFarendTimeFrame(std::vector<std::vector<std::vector<float>>>* x) { const std::array<float, kBlockSize> frame = { 7459.88, 17209.6, 17383, 20768.9, 16816.7, 18386.3, 4492.83, 9675.85, @@ -156,9 +166,10 @@ TEST_P(ErleEstimatorMultiChannel, VerifyErleIncreaseAndHold) { kNumBands, std::vector<std::vector<float>>( num_render_channels, std::vector<float>(kBlockSize, 0.f))); std::vector<std::vector<std::array<float, kFftLengthBy2Plus1>>> - filter_frequency_response( - config.filter.refined.length_blocks, - std::vector<std::array<float, kFftLengthBy2Plus1>>(num_capture_channels)); + filter_frequency_response( + config.filter.refined.length_blocks, + std::vector<std::array<float, kFftLengthBy2Plus1>>( + num_capture_channels)); std::unique_ptr<RenderDelayBuffer> render_delay_buffer( RenderDelayBuffer::Create(config, kSampleRateHz, num_render_channels)); @@ -181,6 +192,10 @@ TEST_P(ErleEstimatorMultiChannel, VerifyErleIncreaseAndHold) { VerifyErle(estimator.Erle(/*onset_compensated=*/true), std::pow(2.f, estimator.FullbandErleLog2()), config.erle.max_l, config.erle.max_h); + VerifyErleGreaterOrEqual(estimator.Erle(/*onset_compensated=*/false), + estimator.Erle(/*onset_compensated=*/true)); + VerifyErleGreaterOrEqual(estimator.ErleUnbounded(), + estimator.Erle(/*onset_compensated=*/false)); FormNearendFrame(&x, &X2, E2, Y2); // Verifies that the ERLE is not immediately decreased during nearend @@ -194,6 +209,10 @@ TEST_P(ErleEstimatorMultiChannel, VerifyErleIncreaseAndHold) { VerifyErle(estimator.Erle(/*onset_compensated=*/true), std::pow(2.f, estimator.FullbandErleLog2()), config.erle.max_l, config.erle.max_h); + VerifyErleGreaterOrEqual(estimator.Erle(/*onset_compensated=*/false), + estimator.Erle(/*onset_compensated=*/true)); + VerifyErleGreaterOrEqual(estimator.ErleUnbounded(), + estimator.Erle(/*onset_compensated=*/false)); } TEST_P(ErleEstimatorMultiChannel, VerifyErleTrackingOnOnsets) { @@ -212,9 +231,10 @@ TEST_P(ErleEstimatorMultiChannel, VerifyErleTrackingOnOnsets) { kNumBands, std::vector<std::vector<float>>( num_render_channels, std::vector<float>(kBlockSize, 0.f))); std::vector<std::vector<std::array<float, kFftLengthBy2Plus1>>> - filter_frequency_response( - config.filter.refined.length_blocks, - std::vector<std::array<float, kFftLengthBy2Plus1>>(num_capture_channels)); + filter_frequency_response( + config.filter.refined.length_blocks, + std::vector<std::array<float, kFftLengthBy2Plus1>>( + num_capture_channels)); std::unique_ptr<RenderDelayBuffer> render_delay_buffer( RenderDelayBuffer::Create(config, kSampleRateHz, num_render_channels)); diff --git a/modules/audio_processing/aec3/matched_filter.cc b/modules/audio_processing/aec3/matched_filter.cc index 64b2d4e697..1721e9c983 100644 --- a/modules/audio_processing/aec3/matched_filter.cc +++ b/modules/audio_processing/aec3/matched_filter.cc @@ -307,7 +307,8 @@ MatchedFilter::MatchedFilter(ApmDataDumper* data_dumper, int num_matched_filters, size_t alignment_shift_sub_blocks, float excitation_limit, - float smoothing, + float smoothing_fast, + float smoothing_slow, float matching_filter_threshold) : data_dumper_(data_dumper), optimization_(optimization), @@ -319,7 +320,8 @@ MatchedFilter::MatchedFilter(ApmDataDumper* data_dumper, lag_estimates_(num_matched_filters), filters_offsets_(num_matched_filters, 0), excitation_limit_(excitation_limit), - smoothing_(smoothing), + smoothing_fast_(smoothing_fast), + smoothing_slow_(smoothing_slow), matching_filter_threshold_(matching_filter_threshold) { RTC_DCHECK(data_dumper); RTC_DCHECK_LT(0, window_size_sub_blocks); @@ -340,10 +342,14 @@ void MatchedFilter::Reset() { } void MatchedFilter::Update(const DownsampledRenderBuffer& render_buffer, - rtc::ArrayView<const float> capture) { + rtc::ArrayView<const float> capture, + bool use_slow_smoothing) { RTC_DCHECK_EQ(sub_block_size_, capture.size()); auto& y = capture; + const float smoothing = + use_slow_smoothing ? smoothing_slow_ : smoothing_fast_; + const float x2_sum_threshold = filters_[0].size() * excitation_limit_ * excitation_limit_; @@ -360,25 +366,25 @@ void MatchedFilter::Update(const DownsampledRenderBuffer& render_buffer, switch (optimization_) { #if defined(WEBRTC_ARCH_X86_FAMILY) case Aec3Optimization::kSse2: - aec3::MatchedFilterCore_SSE2(x_start_index, x2_sum_threshold, - smoothing_, render_buffer.buffer, y, - filters_[n], &filters_updated, &error_sum); + aec3::MatchedFilterCore_SSE2(x_start_index, x2_sum_threshold, smoothing, + render_buffer.buffer, y, filters_[n], + &filters_updated, &error_sum); break; case Aec3Optimization::kAvx2: - aec3::MatchedFilterCore_AVX2(x_start_index, x2_sum_threshold, - smoothing_, render_buffer.buffer, y, - filters_[n], &filters_updated, &error_sum); + aec3::MatchedFilterCore_AVX2(x_start_index, x2_sum_threshold, smoothing, + render_buffer.buffer, y, filters_[n], + &filters_updated, &error_sum); break; #endif #if defined(WEBRTC_HAS_NEON) case Aec3Optimization::kNeon: - aec3::MatchedFilterCore_NEON(x_start_index, x2_sum_threshold, - smoothing_, render_buffer.buffer, y, - filters_[n], &filters_updated, &error_sum); + aec3::MatchedFilterCore_NEON(x_start_index, x2_sum_threshold, smoothing, + render_buffer.buffer, y, filters_[n], + &filters_updated, &error_sum); break; #endif default: - aec3::MatchedFilterCore(x_start_index, x2_sum_threshold, smoothing_, + aec3::MatchedFilterCore(x_start_index, x2_sum_threshold, smoothing, render_buffer.buffer, y, filters_[n], &filters_updated, &error_sum); } diff --git a/modules/audio_processing/aec3/matched_filter.h b/modules/audio_processing/aec3/matched_filter.h index fa44eb27fd..c6410ab4ee 100644 --- a/modules/audio_processing/aec3/matched_filter.h +++ b/modules/audio_processing/aec3/matched_filter.h @@ -100,7 +100,8 @@ class MatchedFilter { int num_matched_filters, size_t alignment_shift_sub_blocks, float excitation_limit, - float smoothing, + float smoothing_fast, + float smoothing_slow, float matching_filter_threshold); MatchedFilter() = delete; @@ -111,7 +112,8 @@ class MatchedFilter { // Updates the correlation with the values in the capture buffer. void Update(const DownsampledRenderBuffer& render_buffer, - rtc::ArrayView<const float> capture); + rtc::ArrayView<const float> capture, + bool use_slow_smoothing); // Resets the matched filter. void Reset(); @@ -140,7 +142,8 @@ class MatchedFilter { std::vector<LagEstimate> lag_estimates_; std::vector<size_t> filters_offsets_; const float excitation_limit_; - const float smoothing_; + const float smoothing_fast_; + const float smoothing_slow_; const float matching_filter_threshold_; }; diff --git a/modules/audio_processing/aec3/matched_filter_lag_aggregator.h b/modules/audio_processing/aec3/matched_filter_lag_aggregator.h index d48011e477..612bd5d942 100644 --- a/modules/audio_processing/aec3/matched_filter_lag_aggregator.h +++ b/modules/audio_processing/aec3/matched_filter_lag_aggregator.h @@ -45,6 +45,9 @@ class MatchedFilterLagAggregator { absl::optional<DelayEstimate> Aggregate( rtc::ArrayView<const MatchedFilter::LagEstimate> lag_estimates); + // Returns whether a reliable delay estimate has been found. + bool ReliableDelayFound() const { return significant_candidate_found_; } + private: ApmDataDumper* const data_dumper_; std::vector<int> histogram_; diff --git a/modules/audio_processing/aec3/matched_filter_unittest.cc b/modules/audio_processing/aec3/matched_filter_unittest.cc index 137275fd74..37b51fa624 100644 --- a/modules/audio_processing/aec3/matched_filter_unittest.cc +++ b/modules/audio_processing/aec3/matched_filter_unittest.cc @@ -206,6 +206,7 @@ TEST(MatchedFilter, LagEstimation) { kWindowSizeSubBlocks, kNumMatchedFilters, kAlignmentShiftSubBlocks, 150, config.delay.delay_estimate_smoothing, + config.delay.delay_estimate_smoothing_delay_found, config.delay.delay_candidate_detection_threshold); std::unique_ptr<RenderDelayBuffer> render_delay_buffer( @@ -231,7 +232,7 @@ TEST(MatchedFilter, LagEstimation) { downsampled_capture_data.data(), sub_block_size); capture_decimator.Decimate(capture[0], downsampled_capture); filter.Update(render_delay_buffer->GetDownsampledRenderBuffer(), - downsampled_capture); + downsampled_capture, false); } // Obtain the lag estimates. @@ -318,6 +319,7 @@ TEST(MatchedFilter, LagNotReliableForUncorrelatedRenderAndCapture) { kWindowSizeSubBlocks, kNumMatchedFilters, kAlignmentShiftSubBlocks, 150, config.delay.delay_estimate_smoothing, + config.delay.delay_estimate_smoothing_delay_found, config.delay.delay_candidate_detection_threshold); // Analyze the correlation between render and capture. @@ -325,7 +327,8 @@ TEST(MatchedFilter, LagNotReliableForUncorrelatedRenderAndCapture) { RandomizeSampleVector(&random_generator, render[0][0]); RandomizeSampleVector(&random_generator, capture); render_delay_buffer->Insert(render); - filter.Update(render_delay_buffer->GetDownsampledRenderBuffer(), capture); + filter.Update(render_delay_buffer->GetDownsampledRenderBuffer(), capture, + false); } // Obtain the lag estimates. @@ -361,6 +364,7 @@ TEST(MatchedFilter, LagNotUpdatedForLowLevelRender) { kWindowSizeSubBlocks, kNumMatchedFilters, kAlignmentShiftSubBlocks, 150, config.delay.delay_estimate_smoothing, + config.delay.delay_estimate_smoothing_delay_found, config.delay.delay_candidate_detection_threshold); std::unique_ptr<RenderDelayBuffer> render_delay_buffer( RenderDelayBuffer::Create(EchoCanceller3Config(), kSampleRateHz, @@ -379,7 +383,7 @@ TEST(MatchedFilter, LagNotUpdatedForLowLevelRender) { sub_block_size); capture_decimator.Decimate(capture[0], downsampled_capture); filter.Update(render_delay_buffer->GetDownsampledRenderBuffer(), - downsampled_capture); + downsampled_capture, false); } // Obtain the lag estimates. @@ -407,6 +411,7 @@ TEST(MatchedFilter, NumberOfLagEstimates) { MatchedFilter filter(&data_dumper, DetectOptimization(), sub_block_size, 32, num_matched_filters, 1, 150, config.delay.delay_estimate_smoothing, + config.delay.delay_estimate_smoothing_delay_found, config.delay.delay_candidate_detection_threshold); EXPECT_EQ(num_matched_filters, filter.GetLagEstimates().size()); } @@ -421,6 +426,7 @@ TEST(MatchedFilterDeathTest, ZeroWindowSize) { EchoCanceller3Config config; EXPECT_DEATH(MatchedFilter(&data_dumper, DetectOptimization(), 16, 0, 1, 1, 150, config.delay.delay_estimate_smoothing, + config.delay.delay_estimate_smoothing_delay_found, config.delay.delay_candidate_detection_threshold), ""); } @@ -430,6 +436,7 @@ TEST(MatchedFilterDeathTest, NullDataDumper) { EchoCanceller3Config config; EXPECT_DEATH(MatchedFilter(nullptr, DetectOptimization(), 16, 1, 1, 1, 150, config.delay.delay_estimate_smoothing, + config.delay.delay_estimate_smoothing_delay_found, config.delay.delay_candidate_detection_threshold), ""); } @@ -441,6 +448,7 @@ TEST(MatchedFilterDeathTest, DISABLED_BlockSizeMultipleOf4) { EchoCanceller3Config config; EXPECT_DEATH(MatchedFilter(&data_dumper, DetectOptimization(), 15, 1, 1, 1, 150, config.delay.delay_estimate_smoothing, + config.delay.delay_estimate_smoothing_delay_found, config.delay.delay_candidate_detection_threshold), ""); } @@ -453,6 +461,7 @@ TEST(MatchedFilterDeathTest, DISABLED_SubBlockSizeAddsUpToBlockSize) { EchoCanceller3Config config; EXPECT_DEATH(MatchedFilter(&data_dumper, DetectOptimization(), 12, 1, 1, 1, 150, config.delay.delay_estimate_smoothing, + config.delay.delay_estimate_smoothing_delay_found, config.delay.delay_candidate_detection_threshold), ""); } diff --git a/modules/audio_processing/aec3/residual_echo_estimator.cc b/modules/audio_processing/aec3/residual_echo_estimator.cc index 0688429d47..15bebecb5f 100644 --- a/modules/audio_processing/aec3/residual_echo_estimator.cc +++ b/modules/audio_processing/aec3/residual_echo_estimator.cc @@ -177,7 +177,8 @@ void ResidualEchoEstimator::Estimate( rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> S2_linear, rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> Y2, bool dominant_nearend, - rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>> R2) { + rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>> R2, + rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>> R2_unbounded) { RTC_DCHECK_EQ(R2.size(), Y2.size()); RTC_DCHECK_EQ(R2.size(), S2_linear.size()); @@ -193,14 +194,18 @@ void ResidualEchoEstimator::Estimate( if (aec_state.SaturatedEcho()) { for (size_t ch = 0; ch < num_capture_channels; ++ch) { std::copy(Y2[ch].begin(), Y2[ch].end(), R2[ch].begin()); + std::copy(Y2[ch].begin(), Y2[ch].end(), R2_unbounded[ch].begin()); } } else { const bool onset_compensated = erle_onset_compensation_in_dominant_nearend_ || !dominant_nearend; LinearEstimate(S2_linear, aec_state.Erle(onset_compensated), R2); + LinearEstimate(S2_linear, aec_state.ErleUnbounded(), R2_unbounded); } - AddReverb(ReverbType::kLinear, aec_state, render_buffer, R2); + UpdateReverb(ReverbType::kLinear, aec_state, render_buffer); + AddReverb(R2); + AddReverb(R2_unbounded); } else { const float echo_path_gain = GetEchoPathGain(aec_state, /*gain_for_early_reflections=*/true); @@ -210,6 +215,7 @@ void ResidualEchoEstimator::Estimate( if (aec_state.SaturatedEcho()) { for (size_t ch = 0; ch < num_capture_channels; ++ch) { std::copy(Y2[ch].begin(), Y2[ch].end(), R2[ch].begin()); + std::copy(Y2[ch].begin(), Y2[ch].end(), R2_unbounded[ch].begin()); } } else { // Estimate the echo generating signal power. @@ -229,11 +235,14 @@ void ResidualEchoEstimator::Estimate( } NonLinearEstimate(echo_path_gain, X2, R2); + NonLinearEstimate(echo_path_gain, X2, R2_unbounded); } if (config_.echo_model.model_reverb_in_nonlinear_mode && !aec_state.TransparentModeActive()) { - AddReverb(ReverbType::kNonLinear, aec_state, render_buffer, R2); + UpdateReverb(ReverbType::kNonLinear, aec_state, render_buffer); + AddReverb(R2); + AddReverb(R2_unbounded); } } @@ -244,6 +253,7 @@ void ResidualEchoEstimator::Estimate( for (size_t ch = 0; ch < num_capture_channels; ++ch) { for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) { R2[ch][k] *= residual_scaling[k]; + R2_unbounded[ch][k] *= residual_scaling[k]; } } } @@ -292,14 +302,10 @@ void ResidualEchoEstimator::UpdateRenderNoisePower( } } -// Adds the estimated power of the reverb to the residual echo power. -void ResidualEchoEstimator::AddReverb( - ReverbType reverb_type, - const AecState& aec_state, - const RenderBuffer& render_buffer, - rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>> R2) { - const size_t num_capture_channels = R2.size(); - +// Updates the reverb estimation. +void ResidualEchoEstimator::UpdateReverb(ReverbType reverb_type, + const AecState& aec_state, + const RenderBuffer& render_buffer) { // Choose reverb partition based on what type of echo power model is used. const size_t first_reverb_partition = reverb_type == ReverbType::kLinear @@ -334,6 +340,11 @@ void ResidualEchoEstimator::AddReverb( echo_reverb_.UpdateReverbNoFreqShaping(render_power, echo_path_gain, aec_state.ReverbDecay()); } +} +// Adds the estimated power of the reverb to the residual echo power. +void ResidualEchoEstimator::AddReverb( + rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>> R2) const { + const size_t num_capture_channels = R2.size(); // Add the reverb power. rtc::ArrayView<const float, kFftLengthBy2Plus1> reverb_power = diff --git a/modules/audio_processing/aec3/residual_echo_estimator.h b/modules/audio_processing/aec3/residual_echo_estimator.h index 9e977766cb..c071854c4a 100644 --- a/modules/audio_processing/aec3/residual_echo_estimator.h +++ b/modules/audio_processing/aec3/residual_echo_estimator.h @@ -40,7 +40,8 @@ class ResidualEchoEstimator { rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> S2_linear, rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> Y2, bool dominant_nearend, - rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>> R2); + rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>> R2, + rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>> R2_unbounded); private: enum class ReverbType { kLinear, kNonLinear }; @@ -52,12 +53,15 @@ class ResidualEchoEstimator { // render signal. void UpdateRenderNoisePower(const RenderBuffer& render_buffer); + // Updates the reverb estimation. + void UpdateReverb(ReverbType reverb_type, + const AecState& aec_state, + const RenderBuffer& render_buffer); + // Adds the estimated unmodelled echo power to the residual echo power // estimate. - void AddReverb(ReverbType reverb_type, - const AecState& aec_state, - const RenderBuffer& render_buffer, - rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>> R2); + void AddReverb( + rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>> R2) const; // Gets the echo path gain to apply. float GetEchoPathGain(const AecState& aec_state, diff --git a/modules/audio_processing/aec3/residual_echo_estimator_unittest.cc b/modules/audio_processing/aec3/residual_echo_estimator_unittest.cc index e80838b5f6..3d760b7dda 100644 --- a/modules/audio_processing/aec3/residual_echo_estimator_unittest.cc +++ b/modules/audio_processing/aec3/residual_echo_estimator_unittest.cc @@ -48,6 +48,8 @@ TEST_P(ResidualEchoEstimatorMultiChannel, BasicTest) { num_capture_channels); std::vector<std::array<float, kFftLengthBy2Plus1>> Y2(num_capture_channels); std::vector<std::array<float, kFftLengthBy2Plus1>> R2(num_capture_channels); + std::vector<std::array<float, kFftLengthBy2Plus1>> R2_unbounded( + num_capture_channels); std::vector<std::vector<std::vector<float>>> x( kNumBands, std::vector<std::vector<float>>( num_render_channels, std::vector<float>(kBlockSize, 0.f))); @@ -100,7 +102,8 @@ TEST_P(ResidualEchoEstimatorMultiChannel, BasicTest) { output); estimator.Estimate(aec_state, *render_delay_buffer->GetRenderBuffer(), - S2_linear, Y2, /*dominant_nearend=*/false, R2); + S2_linear, Y2, /*dominant_nearend=*/false, R2, + R2_unbounded); } } diff --git a/modules/audio_processing/aec3/reverb_model_estimator.cc b/modules/audio_processing/aec3/reverb_model_estimator.cc index 717431103f..00ae466409 100644 --- a/modules/audio_processing/aec3/reverb_model_estimator.cc +++ b/modules/audio_processing/aec3/reverb_model_estimator.cc @@ -9,6 +9,7 @@ */ #include "modules/audio_processing/aec3/reverb_model_estimator.h" +#include <memory> namespace webrtc { diff --git a/modules/audio_processing/aec3/reverb_model_estimator.h b/modules/audio_processing/aec3/reverb_model_estimator.h index 3b9971abae..e4e9540673 100644 --- a/modules/audio_processing/aec3/reverb_model_estimator.h +++ b/modules/audio_processing/aec3/reverb_model_estimator.h @@ -12,6 +12,7 @@ #define MODULES_AUDIO_PROCESSING_AEC3_REVERB_MODEL_ESTIMATOR_H_ #include <array> +#include <memory> #include <vector> #include "absl/types/optional.h" diff --git a/modules/audio_processing/aec3/subband_erle_estimator.cc b/modules/audio_processing/aec3/subband_erle_estimator.cc index 1e957f23ac..dc7f92fd99 100644 --- a/modules/audio_processing/aec3/subband_erle_estimator.cc +++ b/modules/audio_processing/aec3/subband_erle_estimator.cc @@ -49,6 +49,7 @@ SubbandErleEstimator::SubbandErleEstimator(const EchoCanceller3Config& config, accum_spectra_(num_capture_channels), erle_(num_capture_channels), erle_onset_compensated_(num_capture_channels), + erle_unbounded_(num_capture_channels), erle_during_onsets_(num_capture_channels), coming_onset_(num_capture_channels), hold_counters_(num_capture_channels) { @@ -62,6 +63,7 @@ void SubbandErleEstimator::Reset() { for (size_t ch = 0; ch < num_capture_channels; ++ch) { erle_[ch].fill(min_erle_); erle_onset_compensated_[ch].fill(min_erle_); + erle_unbounded_[ch].fill(min_erle_); erle_during_onsets_[ch].fill(min_erle_); coming_onset_[ch].fill(true); hold_counters_[ch].fill(0); @@ -90,6 +92,10 @@ void SubbandErleEstimator::Update( auto& erle_oc = erle_onset_compensated_[ch]; erle_oc[0] = erle_oc[1]; erle_oc[kFftLengthBy2] = erle_oc[kFftLengthBy2 - 1]; + + auto& erle_u = erle_unbounded_[ch]; + erle_u[0] = erle_u[1]; + erle_u[kFftLengthBy2] = erle_u[kFftLengthBy2 - 1]; } } @@ -163,6 +169,11 @@ void SubbandErleEstimator::UpdateBands( update_erle_band(erle_onset_compensated_[ch][k], new_erle[k], low_render_energy, min_erle_, max_erle_[k]); } + + // Virtually unbounded ERLE. + constexpr float kUnboundedErleMax = 100000.0f; + update_erle_band(erle_unbounded_[ch][k], new_erle[k], low_render_energy, + min_erle_, kUnboundedErleMax); } } } diff --git a/modules/audio_processing/aec3/subband_erle_estimator.h b/modules/audio_processing/aec3/subband_erle_estimator.h index ffed6a57a5..8bf9c4d645 100644 --- a/modules/audio_processing/aec3/subband_erle_estimator.h +++ b/modules/audio_processing/aec3/subband_erle_estimator.h @@ -47,6 +47,12 @@ class SubbandErleEstimator { : erle_; } + // Returns the non-capped ERLE estimate. + rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> ErleUnbounded() + const { + return erle_unbounded_; + } + // Returns the ERLE estimate at onsets (only used for testing). rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> ErleDuringOnsets() const { @@ -88,6 +94,7 @@ class SubbandErleEstimator { std::vector<std::array<float, kFftLengthBy2Plus1>> erle_; // ERLE lowered during render onsets. std::vector<std::array<float, kFftLengthBy2Plus1>> erle_onset_compensated_; + std::vector<std::array<float, kFftLengthBy2Plus1>> erle_unbounded_; // Estimation of ERLE during render onsets. std::vector<std::array<float, kFftLengthBy2Plus1>> erle_during_onsets_; std::vector<std::array<bool, kFftLengthBy2Plus1>> coming_onset_; diff --git a/modules/audio_processing/aec3/subtractor.cc b/modules/audio_processing/aec3/subtractor.cc index d10e4ffc52..2eae686752 100644 --- a/modules/audio_processing/aec3/subtractor.cc +++ b/modules/audio_processing/aec3/subtractor.cc @@ -91,7 +91,20 @@ Subtractor::Subtractor(const EchoCanceller3Config& config, std::vector<float>(GetTimeDomainLength(std::max( config_.filter.refined_initial.length_blocks, config_.filter.refined.length_blocks)), - 0.f)) { + 0.f)), + coarse_impulse_responses_(0) { + // Set up the storing of coarse impulse responses if data dumping is + // available. + if (ApmDataDumper::IsAvailable()) { + coarse_impulse_responses_.resize(num_capture_channels_); + const size_t filter_size = GetTimeDomainLength( + std::max(config_.filter.coarse_initial.length_blocks, + config_.filter.coarse.length_blocks)); + for (std::vector<float>& impulse_response : coarse_impulse_responses_) { + impulse_response.resize(filter_size, 0.f); + } + } + for (size_t ch = 0; ch < num_capture_channels_; ++ch) { refined_filters_[ch] = std::make_unique<AdaptiveFirFilter>( config_.filter.refined.length_blocks, @@ -285,7 +298,14 @@ void Subtractor::Process(const RenderBuffer& render_buffer, config_.filter.coarse_reset_hangover_blocks; } - coarse_filter_[ch]->Adapt(render_buffer, G); + if (ApmDataDumper::IsAvailable()) { + RTC_DCHECK_LT(ch, coarse_impulse_responses_.size()); + coarse_filter_[ch]->Adapt(render_buffer, G, + &coarse_impulse_responses_[ch]); + } else { + coarse_filter_[ch]->Adapt(render_buffer, G); + } + if (ch == 0) { data_dumper_->DumpRaw("aec3_subtractor_G_coarse", G.re); data_dumper_->DumpRaw("aec3_subtractor_G_coarse", G.im); diff --git a/modules/audio_processing/aec3/subtractor.h b/modules/audio_processing/aec3/subtractor.h index 560f6568eb..767e4aad46 100644 --- a/modules/audio_processing/aec3/subtractor.h +++ b/modules/audio_processing/aec3/subtractor.h @@ -78,6 +78,15 @@ class Subtractor { refined_impulse_responses_[0].data(), GetTimeDomainLength( refined_filters_[0]->max_filter_size_partitions()))); + if (ApmDataDumper::IsAvailable()) { + RTC_DCHECK_GT(coarse_impulse_responses_.size(), 0); + data_dumper_->DumpRaw( + "aec3_subtractor_h_coarse", + rtc::ArrayView<const float>( + coarse_impulse_responses_[0].data(), + GetTimeDomainLength( + coarse_filter_[0]->max_filter_size_partitions()))); + } refined_filters_[0]->DumpFilter("aec3_subtractor_H_refined"); coarse_filter_[0]->DumpFilter("aec3_subtractor_H_coarse"); @@ -132,6 +141,7 @@ class Subtractor { std::vector<std::vector<std::array<float, kFftLengthBy2Plus1>>> refined_frequency_responses_; std::vector<std::vector<float>> refined_impulse_responses_; + std::vector<std::vector<float>> coarse_impulse_responses_; }; } // namespace webrtc diff --git a/modules/audio_processing/aec3/suppression_gain.cc b/modules/audio_processing/aec3/suppression_gain.cc index 5b01c52908..6405d71c2d 100644 --- a/modules/audio_processing/aec3/suppression_gain.cc +++ b/modules/audio_processing/aec3/suppression_gain.cc @@ -23,10 +23,15 @@ #include "modules/audio_processing/logging/apm_data_dumper.h" #include "rtc_base/atomic_ops.h" #include "rtc_base/checks.h" +#include "system_wrappers/include/field_trial.h" namespace webrtc { namespace { +bool UseUnboundedEchoSpectrum() { + return field_trial::IsEnabled("WebRTC-Aec3UseUnboundedEchoSpectrum"); +} + void LimitLowFrequencyGains(std::array<float, kFftLengthBy2Plus1>* gain) { // Limit the low frequency gains to avoid the impact of the high-pass filter // on the lower-frequency gain influencing the overall achieved gain. @@ -230,16 +235,20 @@ void SuppressionGain::GetMinGain( min_gain[k] = std::min(min_gain[k], 1.f); } - const bool is_nearend_state = dominant_nearend_detector_->IsNearendState(); - for (size_t k = 0; k < 6; ++k) { - const auto& dec = is_nearend_state ? nearend_params_.max_dec_factor_lf - : normal_params_.max_dec_factor_lf; - - // Make sure the gains of the low frequencies do not decrease too - // quickly after strong nearend. - if (last_nearend[k] > last_echo[k]) { - min_gain[k] = std::max(min_gain[k], last_gain_[k] * dec); - min_gain[k] = std::min(min_gain[k], 1.f); + if (!initial_state_ || + config_.suppressor.lf_smoothing_during_initial_phase) { + const float& dec = dominant_nearend_detector_->IsNearendState() + ? nearend_params_.max_dec_factor_lf + : normal_params_.max_dec_factor_lf; + + for (int k = 0; k <= config_.suppressor.last_lf_smoothing_band; ++k) { + // Make sure the gains of the low frequencies do not decrease too + // quickly after strong nearend. + if (last_nearend[k] > last_echo[k] || + k <= config_.suppressor.last_permanent_lf_smoothing_band) { + min_gain[k] = std::max(min_gain[k], last_gain_[k] * dec); + min_gain[k] = std::min(min_gain[k], 1.f); + } } } } else { @@ -333,8 +342,13 @@ SuppressionGain::SuppressionGain(const EchoCanceller3Config& config, num_capture_channels_, aec3::MovingAverage(kFftLengthBy2Plus1, config.suppressor.nearend_average_blocks)), - nearend_params_(config_.suppressor.nearend_tuning), - normal_params_(config_.suppressor.normal_tuning) { + nearend_params_(config_.suppressor.last_lf_band, + config_.suppressor.first_hf_band, + config_.suppressor.nearend_tuning), + normal_params_(config_.suppressor.last_lf_band, + config_.suppressor.first_hf_band, + config_.suppressor.normal_tuning), + use_unbounded_echo_spectrum_(UseUnboundedEchoSpectrum()) { RTC_DCHECK_LT(0, state_change_duration_blocks_); last_gain_.fill(1.f); if (config_.suppressor.use_subband_nearend_detection) { @@ -356,6 +370,8 @@ void SuppressionGain::GetGain( rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> residual_echo_spectrum, rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> + residual_echo_spectrum_unbounded, + rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> comfort_noise_spectrum, const RenderSignalAnalyzer& render_signal_analyzer, const AecState& aec_state, @@ -366,8 +382,13 @@ void SuppressionGain::GetGain( RTC_DCHECK(high_bands_gain); RTC_DCHECK(low_band_gain); + // Choose residual echo spectrum for the dominant nearend detector. + const auto echo = use_unbounded_echo_spectrum_ + ? residual_echo_spectrum_unbounded + : residual_echo_spectrum; + // Update the nearend state selection. - dominant_nearend_detector_->Update(nearend_spectrum, residual_echo_spectrum, + dominant_nearend_detector_->Update(nearend_spectrum, echo, comfort_noise_spectrum, initial_state_); // Compute gain for the lower band. @@ -383,6 +404,9 @@ void SuppressionGain::GetGain( *high_bands_gain = UpperBandsGain(echo_spectrum, comfort_noise_spectrum, narrow_peak_band, aec_state.SaturatedEcho(), render, *low_band_gain); + + data_dumper_->DumpRaw("aec3_dominant_nearend", + dominant_nearend_detector_->IsNearendState()); } void SuppressionGain::SetInitialState(bool state) { @@ -419,23 +443,23 @@ bool SuppressionGain::LowNoiseRenderDetector::Detect( } SuppressionGain::GainParameters::GainParameters( + int last_lf_band, + int first_hf_band, const EchoCanceller3Config::Suppressor::Tuning& tuning) : max_inc_factor(tuning.max_inc_factor), max_dec_factor_lf(tuning.max_dec_factor_lf) { // Compute per-band masking thresholds. - constexpr size_t kLastLfBand = 5; - constexpr size_t kFirstHfBand = 8; - RTC_DCHECK_LT(kLastLfBand, kFirstHfBand); + RTC_DCHECK_LT(last_lf_band, first_hf_band); auto& lf = tuning.mask_lf; auto& hf = tuning.mask_hf; RTC_DCHECK_LT(lf.enr_transparent, lf.enr_suppress); RTC_DCHECK_LT(hf.enr_transparent, hf.enr_suppress); - for (size_t k = 0; k < kFftLengthBy2Plus1; k++) { + for (int k = 0; k < static_cast<int>(kFftLengthBy2Plus1); k++) { float a; - if (k <= kLastLfBand) { + if (k <= last_lf_band) { a = 0.f; - } else if (k < kFirstHfBand) { - a = (k - kLastLfBand) / static_cast<float>(kFirstHfBand - kLastLfBand); + } else if (k < first_hf_band) { + a = (k - last_lf_band) / static_cast<float>(first_hf_band - last_lf_band); } else { a = 1.f; } diff --git a/modules/audio_processing/aec3/suppression_gain.h b/modules/audio_processing/aec3/suppression_gain.h index d049baeaaf..7c4a1c9f7d 100644 --- a/modules/audio_processing/aec3/suppression_gain.h +++ b/modules/audio_processing/aec3/suppression_gain.h @@ -43,6 +43,8 @@ class SuppressionGain { rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> residual_echo_spectrum, rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> + residual_echo_spectrum_unbounded, + rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> comfort_noise_spectrum, const RenderSignalAnalyzer& render_signal_analyzer, const AecState& aec_state, @@ -103,6 +105,8 @@ class SuppressionGain { struct GainParameters { explicit GainParameters( + int last_lf_band, + int first_hf_band, const EchoCanceller3Config::Suppressor::Tuning& tuning); const float max_inc_factor; const float max_dec_factor_lf; @@ -126,6 +130,9 @@ class SuppressionGain { std::vector<aec3::MovingAverage> nearend_smoothers_; const GainParameters nearend_params_; const GainParameters normal_params_; + // Determines if the dominant nearend detector uses the unbounded residual + // echo spectrum. + const bool use_unbounded_echo_spectrum_; std::unique_ptr<NearendDetector> dominant_nearend_detector_; RTC_DISALLOW_COPY_AND_ASSIGN(SuppressionGain); diff --git a/modules/audio_processing/aec3/suppression_gain_unittest.cc b/modules/audio_processing/aec3/suppression_gain_unittest.cc index 26bfc24ebb..999b0f27ab 100644 --- a/modules/audio_processing/aec3/suppression_gain_unittest.cc +++ b/modules/audio_processing/aec3/suppression_gain_unittest.cc @@ -26,29 +26,30 @@ namespace aec3 { // Verifies that the check for non-null output gains works. TEST(SuppressionGainDeathTest, NullOutputGains) { - std::vector<std::array<float, kFftLengthBy2Plus1>> E2(1, {0.f}); - std::vector<std::array<float, kFftLengthBy2Plus1>> R2(1, {0.f}); + std::vector<std::array<float, kFftLengthBy2Plus1>> E2(1, {0.0f}); + std::vector<std::array<float, kFftLengthBy2Plus1>> R2(1, {0.0f}); + std::vector<std::array<float, kFftLengthBy2Plus1>> R2_unbounded(1, {0.0f}); std::vector<std::array<float, kFftLengthBy2Plus1>> S2(1); - std::vector<std::array<float, kFftLengthBy2Plus1>> N2(1, {0.f}); + std::vector<std::array<float, kFftLengthBy2Plus1>> N2(1, {0.0f}); for (auto& S2_k : S2) { - S2_k.fill(.1f); + S2_k.fill(0.1f); } FftData E; FftData Y; - E.re.fill(0.f); - E.im.fill(0.f); - Y.re.fill(0.f); - Y.im.fill(0.f); + E.re.fill(0.0f); + E.im.fill(0.0f); + Y.re.fill(0.0f); + Y.im.fill(0.0f); float high_bands_gain; AecState aec_state(EchoCanceller3Config{}, 1); EXPECT_DEATH( SuppressionGain(EchoCanceller3Config{}, DetectOptimization(), 16000, 1) - .GetGain(E2, S2, R2, N2, + .GetGain(E2, S2, R2, R2_unbounded, N2, RenderSignalAnalyzer((EchoCanceller3Config{})), aec_state, std::vector<std::vector<std::vector<float>>>( 3, std::vector<std::vector<float>>( - 1, std::vector<float>(kBlockSize, 0.f))), + 1, std::vector<float>(kBlockSize, 0.0f))), false, &high_bands_gain, nullptr), ""); } @@ -67,15 +68,17 @@ TEST(SuppressionGain, BasicGainComputation) { float high_bands_gain; std::vector<std::array<float, kFftLengthBy2Plus1>> E2(kNumCaptureChannels); std::vector<std::array<float, kFftLengthBy2Plus1>> S2(kNumCaptureChannels, - {0.f}); + {0.0f}); std::vector<std::array<float, kFftLengthBy2Plus1>> Y2(kNumCaptureChannels); std::vector<std::array<float, kFftLengthBy2Plus1>> R2(kNumCaptureChannels); + std::vector<std::array<float, kFftLengthBy2Plus1>> R2_unbounded( + kNumCaptureChannels); std::vector<std::array<float, kFftLengthBy2Plus1>> N2(kNumCaptureChannels); std::array<float, kFftLengthBy2Plus1> g; std::vector<SubtractorOutput> output(kNumCaptureChannels); std::vector<std::vector<std::vector<float>>> x( kNumBands, std::vector<std::vector<float>>( - kNumRenderChannels, std::vector<float>(kBlockSize, 0.f))); + kNumRenderChannels, std::vector<float>(kBlockSize, 0.0f))); EchoCanceller3Config config; AecState aec_state(config, kNumCaptureChannels); ApmDataDumper data_dumper(42); @@ -89,8 +92,9 @@ TEST(SuppressionGain, BasicGainComputation) { for (size_t ch = 0; ch < kNumCaptureChannels; ++ch) { E2[ch].fill(10.f); Y2[ch].fill(10.f); - R2[ch].fill(.1f); - N2[ch].fill(100.f); + R2[ch].fill(0.1f); + R2_unbounded[ch].fill(0.1f); + N2[ch].fill(100.0f); } for (auto& subtractor_output : output) { subtractor_output.Reset(); @@ -107,17 +111,18 @@ TEST(SuppressionGain, BasicGainComputation) { aec_state.Update(delay_estimate, subtractor.FilterFrequencyResponses(), subtractor.FilterImpulseResponses(), *render_delay_buffer->GetRenderBuffer(), E2, Y2, output); - suppression_gain.GetGain(E2, S2, R2, N2, analyzer, aec_state, x, false, - &high_bands_gain, &g); + suppression_gain.GetGain(E2, S2, R2, R2_unbounded, N2, analyzer, aec_state, + x, false, &high_bands_gain, &g); } std::for_each(g.begin(), g.end(), - [](float a) { EXPECT_NEAR(1.f, a, 0.001); }); + [](float a) { EXPECT_NEAR(1.0f, a, 0.001f); }); // Ensure that a strong nearend is detected to mask any echoes. for (size_t ch = 0; ch < kNumCaptureChannels; ++ch) { E2[ch].fill(100.f); Y2[ch].fill(100.f); R2[ch].fill(0.1f); + R2_unbounded[ch].fill(0.1f); S2[ch].fill(0.1f); N2[ch].fill(0.f); } @@ -126,22 +131,23 @@ TEST(SuppressionGain, BasicGainComputation) { aec_state.Update(delay_estimate, subtractor.FilterFrequencyResponses(), subtractor.FilterImpulseResponses(), *render_delay_buffer->GetRenderBuffer(), E2, Y2, output); - suppression_gain.GetGain(E2, S2, R2, N2, analyzer, aec_state, x, false, - &high_bands_gain, &g); + suppression_gain.GetGain(E2, S2, R2, R2_unbounded, N2, analyzer, aec_state, + x, false, &high_bands_gain, &g); } std::for_each(g.begin(), g.end(), - [](float a) { EXPECT_NEAR(1.f, a, 0.001); }); + [](float a) { EXPECT_NEAR(1.0f, a, 0.001f); }); // Add a strong echo to one of the channels and ensure that it is suppressed. - E2[1].fill(1000000000.f); - R2[1].fill(10000000000000.f); + E2[1].fill(1000000000.0f); + R2[1].fill(10000000000000.0f); + R2_unbounded[1].fill(10000000000000.0f); for (int k = 0; k < 10; ++k) { - suppression_gain.GetGain(E2, S2, R2, N2, analyzer, aec_state, x, false, - &high_bands_gain, &g); + suppression_gain.GetGain(E2, S2, R2, R2_unbounded, N2, analyzer, aec_state, + x, false, &high_bands_gain, &g); } std::for_each(g.begin(), g.end(), - [](float a) { EXPECT_NEAR(0.f, a, 0.001); }); + [](float a) { EXPECT_NEAR(0.0f, a, 0.001f); }); } } // namespace aec3 diff --git a/modules/audio_processing/aec3/transparent_mode.cc b/modules/audio_processing/aec3/transparent_mode.cc index 7cfa3e8eae..489f53f4f1 100644 --- a/modules/audio_processing/aec3/transparent_mode.cc +++ b/modules/audio_processing/aec3/transparent_mode.cc @@ -11,6 +11,7 @@ #include "modules/audio_processing/aec3/transparent_mode.h" #include "rtc_base/checks.h" +#include "rtc_base/logging.h" #include "system_wrappers/include/field_trial.h" namespace webrtc { @@ -228,11 +229,14 @@ class LegacyTransparentModeImpl : public TransparentMode { std::unique_ptr<TransparentMode> TransparentMode::Create( const EchoCanceller3Config& config) { if (config.ep_strength.bounded_erl || DeactivateTransparentMode()) { + RTC_LOG(LS_INFO) << "AEC3 Transparent Mode: Disabled"; return nullptr; } if (ActivateTransparentModeHmm()) { + RTC_LOG(LS_INFO) << "AEC3 Transparent Mode: HMM"; return std::make_unique<TransparentModeImpl>(); } + RTC_LOG(LS_INFO) << "AEC3 Transparent Mode: Legacy"; return std::make_unique<LegacyTransparentModeImpl>(config); } diff --git a/modules/audio_processing/agc/BUILD.gn b/modules/audio_processing/agc/BUILD.gn index 5ad6644323..4bb8c5494b 100644 --- a/modules/audio_processing/agc/BUILD.gn +++ b/modules/audio_processing/agc/BUILD.gn @@ -19,11 +19,14 @@ rtc_library("agc") { ] configs += [ "..:apm_debug_dump" ] deps = [ + ":clipping_predictor", + ":clipping_predictor_evaluator", ":gain_control_interface", ":gain_map", ":level_estimation", "..:apm_logging", "..:audio_buffer", + "..:audio_frame_view", "../../../common_audio", "../../../common_audio:common_audio_c", "../../../rtc_base:checks", @@ -38,6 +41,49 @@ rtc_library("agc") { absl_deps = [ "//third_party/abseil-cpp/absl/types:optional" ] } +rtc_library("clipping_predictor") { + sources = [ + "clipping_predictor.cc", + "clipping_predictor.h", + ] + deps = [ + ":clipping_predictor_level_buffer", + ":gain_map", + "..:api", + "..:audio_frame_view", + "../../../common_audio", + "../../../rtc_base:checks", + "../../../rtc_base:logging", + "../../../rtc_base:safe_minmax", + ] + absl_deps = [ "//third_party/abseil-cpp/absl/types:optional" ] +} + +rtc_library("clipping_predictor_evaluator") { + sources = [ + "clipping_predictor_evaluator.cc", + "clipping_predictor_evaluator.h", + ] + deps = [ + "../../../rtc_base:checks", + "../../../rtc_base:logging", + ] + absl_deps = [ "//third_party/abseil-cpp/absl/types:optional" ] +} + +rtc_library("clipping_predictor_level_buffer") { + sources = [ + "clipping_predictor_level_buffer.cc", + "clipping_predictor_level_buffer.h", + ] + deps = [ + "../../../rtc_base:checks", + "../../../rtc_base:logging", + "../../../rtc_base:rtc_base_approved", + ] + absl_deps = [ "//third_party/abseil-cpp/absl/types:optional" ] +} + rtc_library("level_estimation") { sources = [ "agc.cc", @@ -96,6 +142,9 @@ if (rtc_include_tests) { testonly = true sources = [ "agc_manager_direct_unittest.cc", + "clipping_predictor_evaluator_unittest.cc", + "clipping_predictor_level_buffer_unittest.cc", + "clipping_predictor_unittest.cc", "loudness_histogram_unittest.cc", "mock_agc.h", ] @@ -103,13 +152,20 @@ if (rtc_include_tests) { deps = [ ":agc", + ":clipping_predictor", + ":clipping_predictor_evaluator", + ":clipping_predictor_level_buffer", ":gain_control_interface", ":level_estimation", "..:mocks", + "../../../rtc_base:checks", + "../../../rtc_base:rtc_base_approved", + "../../../rtc_base:safe_conversions", "../../../test:field_trial", "../../../test:fileutils", "../../../test:test_support", "//testing/gtest", ] + absl_deps = [ "//third_party/abseil-cpp/absl/types:optional" ] } } diff --git a/modules/audio_processing/agc/agc_manager_direct.cc b/modules/audio_processing/agc/agc_manager_direct.cc index 2454d1bbb1..817678801e 100644 --- a/modules/audio_processing/agc/agc_manager_direct.cc +++ b/modules/audio_processing/agc/agc_manager_direct.cc @@ -16,6 +16,7 @@ #include "common_audio/include/audio_util.h" #include "modules/audio_processing/agc/gain_control.h" #include "modules/audio_processing/agc/gain_map_internal.h" +#include "modules/audio_processing/include/audio_frame_view.h" #include "rtc_base/atomic_ops.h" #include "rtc_base/checks.h" #include "rtc_base/logging.h" @@ -27,33 +28,33 @@ namespace webrtc { namespace { -// Amount the microphone level is lowered with every clipping event. -const int kClippedLevelStep = 15; -// Proportion of clipped samples required to declare a clipping event. -const float kClippedRatioThreshold = 0.1f; -// Time in frames to wait after a clipping event before checking again. -const int kClippedWaitFrames = 300; - // Amount of error we tolerate in the microphone level (presumably due to OS // quantization) before we assume the user has manually adjusted the microphone. -const int kLevelQuantizationSlack = 25; +constexpr int kLevelQuantizationSlack = 25; -const int kDefaultCompressionGain = 7; -const int kMaxCompressionGain = 12; -const int kMinCompressionGain = 2; +constexpr int kDefaultCompressionGain = 7; +constexpr int kMaxCompressionGain = 12; +constexpr int kMinCompressionGain = 2; // Controls the rate of compression changes towards the target. -const float kCompressionGainStep = 0.05f; +constexpr float kCompressionGainStep = 0.05f; -const int kMaxMicLevel = 255; +constexpr int kMaxMicLevel = 255; static_assert(kGainMapSize > kMaxMicLevel, "gain map too small"); -const int kMinMicLevel = 12; +constexpr int kMinMicLevel = 12; // Prevent very large microphone level changes. -const int kMaxResidualGainChange = 15; +constexpr int kMaxResidualGainChange = 15; // Maximum additional gain allowed to compensate for microphone level // restrictions from clipping events. -const int kSurplusCompressionGain = 6; +constexpr int kSurplusCompressionGain = 6; + +// History size for the clipping predictor evaluator (unit: number of 10 ms +// frames). +constexpr int kClippingPredictorEvaluatorHistorySize = 32; + +using ClippingPredictorConfig = AudioProcessing::Config::GainController1:: + AnalogGainController::ClippingPredictor; // Returns whether a fall-back solution to choose the maximum level should be // chosen. @@ -132,6 +133,33 @@ float ComputeClippedRatio(const float* const* audio, return static_cast<float>(num_clipped) / (samples_per_channel); } +void LogClippingPredictorMetrics(const ClippingPredictorEvaluator& evaluator) { + RTC_LOG(LS_INFO) << "Clipping predictor metrics: TP " + << evaluator.true_positives() << " TN " + << evaluator.true_negatives() << " FP " + << evaluator.false_positives() << " FN " + << evaluator.false_negatives(); + const float precision_denominator = + evaluator.true_positives() + evaluator.false_positives(); + const float recall_denominator = + evaluator.true_positives() + evaluator.false_negatives(); + if (precision_denominator > 0 && recall_denominator > 0) { + const float precision = evaluator.true_positives() / precision_denominator; + const float recall = evaluator.true_positives() / recall_denominator; + RTC_LOG(LS_INFO) << "Clipping predictor metrics: P " << precision << " R " + << recall; + const float f1_score_denominator = precision + recall; + if (f1_score_denominator > 0.0f) { + const float f1_score = 2 * precision * recall / f1_score_denominator; + RTC_LOG(LS_INFO) << "Clipping predictor metrics: F1 " << f1_score; + RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.Agc.ClippingPredictor.F1Score", + std::round(f1_score * 100.0f), /*min=*/0, + /*max=*/100, + /*bucket_count=*/50); + } + } +} + } // namespace MonoAgc::MonoAgc(ApmDataDumper* data_dumper, @@ -182,19 +210,19 @@ void MonoAgc::Process(const int16_t* audio, } } -void MonoAgc::HandleClipping() { +void MonoAgc::HandleClipping(int clipped_level_step) { // Always decrease the maximum level, even if the current level is below // threshold. - SetMaxLevel(std::max(clipped_level_min_, max_level_ - kClippedLevelStep)); + SetMaxLevel(std::max(clipped_level_min_, max_level_ - clipped_level_step)); if (log_to_histograms_) { RTC_HISTOGRAM_BOOLEAN("WebRTC.Audio.AgcClippingAdjustmentAllowed", - level_ - kClippedLevelStep >= clipped_level_min_); + level_ - clipped_level_step >= clipped_level_min_); } if (level_ > clipped_level_min_) { // Don't try to adjust the level if we're already below the limit. As // a consequence, if the user has brought the level above the limit, we // will still not react until the postproc updates the level. - SetLevel(std::max(clipped_level_min_, level_ - kClippedLevelStep)); + SetLevel(std::max(clipped_level_min_, level_ - clipped_level_step)); // Reset the AGCs for all channels since the level has changed. agc_->Reset(); } @@ -401,35 +429,58 @@ void MonoAgc::UpdateCompressor() { int AgcManagerDirect::instance_counter_ = 0; -AgcManagerDirect::AgcManagerDirect(Agc* agc, - int startup_min_level, - int clipped_level_min, - int sample_rate_hz) +AgcManagerDirect::AgcManagerDirect( + Agc* agc, + int startup_min_level, + int clipped_level_min, + int sample_rate_hz, + int clipped_level_step, + float clipped_ratio_threshold, + int clipped_wait_frames, + const ClippingPredictorConfig& clipping_config) : AgcManagerDirect(/*num_capture_channels*/ 1, startup_min_level, clipped_level_min, /*disable_digital_adaptive*/ false, - sample_rate_hz) { + sample_rate_hz, + clipped_level_step, + clipped_ratio_threshold, + clipped_wait_frames, + clipping_config) { RTC_DCHECK(channel_agcs_[0]); RTC_DCHECK(agc); channel_agcs_[0]->set_agc(agc); } -AgcManagerDirect::AgcManagerDirect(int num_capture_channels, - int startup_min_level, - int clipped_level_min, - bool disable_digital_adaptive, - int sample_rate_hz) +AgcManagerDirect::AgcManagerDirect( + int num_capture_channels, + int startup_min_level, + int clipped_level_min, + bool disable_digital_adaptive, + int sample_rate_hz, + int clipped_level_step, + float clipped_ratio_threshold, + int clipped_wait_frames, + const ClippingPredictorConfig& clipping_config) : data_dumper_( new ApmDataDumper(rtc::AtomicOps::Increment(&instance_counter_))), use_min_channel_level_(!UseMaxAnalogChannelLevel()), sample_rate_hz_(sample_rate_hz), num_capture_channels_(num_capture_channels), disable_digital_adaptive_(disable_digital_adaptive), - frames_since_clipped_(kClippedWaitFrames), + frames_since_clipped_(clipped_wait_frames), capture_output_used_(true), + clipped_level_step_(clipped_level_step), + clipped_ratio_threshold_(clipped_ratio_threshold), + clipped_wait_frames_(clipped_wait_frames), channel_agcs_(num_capture_channels), - new_compressions_to_set_(num_capture_channels) { + new_compressions_to_set_(num_capture_channels), + clipping_predictor_( + CreateClippingPredictor(num_capture_channels, clipping_config)), + use_clipping_predictor_step_(!!clipping_predictor_ && + clipping_config.use_predicted_step), + clipping_predictor_evaluator_(kClippingPredictorEvaluatorHistorySize), + clipping_predictor_log_counter_(0) { const int min_mic_level = GetMinMicLevel(); for (size_t ch = 0; ch < channel_agcs_.size(); ++ch) { ApmDataDumper* data_dumper_ch = ch == 0 ? data_dumper_.get() : nullptr; @@ -438,7 +489,12 @@ AgcManagerDirect::AgcManagerDirect(int num_capture_channels, data_dumper_ch, startup_min_level, clipped_level_min, disable_digital_adaptive_, min_mic_level); } - RTC_DCHECK_LT(0, channel_agcs_.size()); + RTC_DCHECK(!channel_agcs_.empty()); + RTC_DCHECK_GT(clipped_level_step, 0); + RTC_DCHECK_LE(clipped_level_step, 255); + RTC_DCHECK_GT(clipped_ratio_threshold, 0.f); + RTC_DCHECK_LT(clipped_ratio_threshold, 1.f); + RTC_DCHECK_GT(clipped_wait_frames, 0); channel_agcs_[0]->ActivateLogging(); } @@ -453,6 +509,8 @@ void AgcManagerDirect::Initialize() { capture_output_used_ = true; AggregateChannelLevels(); + clipping_predictor_evaluator_.Reset(); + clipping_predictor_log_counter_ = 0; } void AgcManagerDirect::SetupDigitalGainControl( @@ -489,7 +547,13 @@ void AgcManagerDirect::AnalyzePreProcess(const float* const* audio, return; } - if (frames_since_clipped_ < kClippedWaitFrames) { + if (!!clipping_predictor_) { + AudioFrameView<const float> frame = AudioFrameView<const float>( + audio, num_capture_channels_, static_cast<int>(samples_per_channel)); + clipping_predictor_->Analyze(frame); + } + + if (frames_since_clipped_ < clipped_wait_frames_) { ++frames_since_clipped_; return; } @@ -505,14 +569,54 @@ void AgcManagerDirect::AnalyzePreProcess(const float* const* audio, // gain is increased, through SetMaxLevel(). float clipped_ratio = ComputeClippedRatio(audio, num_capture_channels_, samples_per_channel); - - if (clipped_ratio > kClippedRatioThreshold) { - RTC_DLOG(LS_INFO) << "[agc] Clipping detected. clipped_ratio=" - << clipped_ratio; + const bool clipping_detected = clipped_ratio > clipped_ratio_threshold_; + bool clipping_predicted = false; + int predicted_step = 0; + if (!!clipping_predictor_) { + for (int channel = 0; channel < num_capture_channels_; ++channel) { + const auto step = clipping_predictor_->EstimateClippedLevelStep( + channel, stream_analog_level_, clipped_level_step_, + channel_agcs_[channel]->min_mic_level(), kMaxMicLevel); + if (use_clipping_predictor_step_ && step.has_value()) { + predicted_step = std::max(predicted_step, step.value()); + clipping_predicted = true; + } + } + // Clipping prediction evaluation. + absl::optional<int> prediction_interval = + clipping_predictor_evaluator_.Observe(clipping_detected, + clipping_predicted); + if (prediction_interval.has_value()) { + RTC_HISTOGRAM_COUNTS_LINEAR( + "WebRTC.Audio.Agc.ClippingPredictor.PredictionInterval", + prediction_interval.value(), /*min=*/0, + /*max=*/49, /*bucket_count=*/50); + } + constexpr int kNumFramesIn30Seconds = 3000; + clipping_predictor_log_counter_++; + if (clipping_predictor_log_counter_ == kNumFramesIn30Seconds) { + LogClippingPredictorMetrics(clipping_predictor_evaluator_); + clipping_predictor_log_counter_ = 0; + } + } + if (clipping_detected || clipping_predicted) { + int step = clipped_level_step_; + if (clipping_detected) { + RTC_DLOG(LS_INFO) << "[agc] Clipping detected. clipped_ratio=" + << clipped_ratio; + } + if (clipping_predicted) { + step = std::max(predicted_step, clipped_level_step_); + RTC_DLOG(LS_INFO) << "[agc] Clipping predicted. step=" << step; + } for (auto& state_ch : channel_agcs_) { - state_ch->HandleClipping(); + state_ch->HandleClipping(step); } frames_since_clipped_ = 0; + if (!!clipping_predictor_) { + clipping_predictor_->Reset(); + clipping_predictor_evaluator_.Reset(); + } } AggregateChannelLevels(); } diff --git a/modules/audio_processing/agc/agc_manager_direct.h b/modules/audio_processing/agc/agc_manager_direct.h index f9417cffff..7ac96a661c 100644 --- a/modules/audio_processing/agc/agc_manager_direct.h +++ b/modules/audio_processing/agc/agc_manager_direct.h @@ -15,6 +15,8 @@ #include "absl/types/optional.h" #include "modules/audio_processing/agc/agc.h" +#include "modules/audio_processing/agc/clipping_predictor.h" +#include "modules/audio_processing/agc/clipping_predictor_evaluator.h" #include "modules/audio_processing/audio_buffer.h" #include "modules/audio_processing/logging/apm_data_dumper.h" #include "rtc_base/gtest_prod_util.h" @@ -34,12 +36,23 @@ class AgcManagerDirect final { // AgcManagerDirect will configure GainControl internally. The user is // responsible for processing the audio using it after the call to Process. // The operating range of startup_min_level is [12, 255] and any input value - // outside that range will be clamped. - AgcManagerDirect(int num_capture_channels, - int startup_min_level, - int clipped_level_min, - bool disable_digital_adaptive, - int sample_rate_hz); + // outside that range will be clamped. `clipped_level_step` is the amount + // the microphone level is lowered with every clipping event, limited to + // (0, 255]. `clipped_ratio_threshold` is the proportion of clipped + // samples required to declare a clipping event, limited to (0.f, 1.f). + // `clipped_wait_frames` is the time in frames to wait after a clipping event + // before checking again, limited to values higher than 0. + AgcManagerDirect( + int num_capture_channels, + int startup_min_level, + int clipped_level_min, + bool disable_digital_adaptive, + int sample_rate_hz, + int clipped_level_step, + float clipped_ratio_threshold, + int clipped_wait_frames, + const AudioProcessing::Config::GainController1::AnalogGainController:: + ClippingPredictor& clipping_config); ~AgcManagerDirect(); AgcManagerDirect(const AgcManagerDirect&) = delete; @@ -64,6 +77,14 @@ class AgcManagerDirect final { // If available, returns a new compression gain for the digital gain control. absl::optional<int> GetDigitalComressionGain(); + // Returns true if clipping prediction is enabled. + bool clipping_predictor_enabled() const { return !!clipping_predictor_; } + + // Returns true if clipping prediction is used to adjust the analog gain. + bool use_clipping_predictor_step() const { + return use_clipping_predictor_step_; + } + private: friend class AgcManagerDirectTest; @@ -81,13 +102,28 @@ class AgcManagerDirect final { AgcMinMicLevelExperimentEnabled50); FRIEND_TEST_ALL_PREFIXES(AgcManagerDirectStandaloneTest, AgcMinMicLevelExperimentEnabledAboveStartupLevel); + FRIEND_TEST_ALL_PREFIXES(AgcManagerDirectStandaloneTest, + ClippingParametersVerified); + FRIEND_TEST_ALL_PREFIXES(AgcManagerDirectStandaloneTest, + DisableClippingPredictorDoesNotLowerVolume); + FRIEND_TEST_ALL_PREFIXES( + AgcManagerDirectStandaloneTest, + EnableClippingPredictorWithUnusedPredictedStepDoesNotLowerVolume); + FRIEND_TEST_ALL_PREFIXES(AgcManagerDirectStandaloneTest, + EnableClippingPredictorLowersVolume); // Dependency injection for testing. Don't delete |agc| as the memory is owned // by the manager. - AgcManagerDirect(Agc* agc, - int startup_min_level, - int clipped_level_min, - int sample_rate_hz); + AgcManagerDirect( + Agc* agc, + int startup_min_level, + int clipped_level_min, + int sample_rate_hz, + int clipped_level_step, + float clipped_ratio_threshold, + int clipped_wait_frames, + const AudioProcessing::Config::GainController1::AnalogGainController:: + ClippingPredictor& clipping_config); void AnalyzePreProcess(const float* const* audio, size_t samples_per_channel); @@ -105,8 +141,17 @@ class AgcManagerDirect final { bool capture_output_used_; int channel_controlling_gain_ = 0; + const int clipped_level_step_; + const float clipped_ratio_threshold_; + const int clipped_wait_frames_; + std::vector<std::unique_ptr<MonoAgc>> channel_agcs_; std::vector<absl::optional<int>> new_compressions_to_set_; + + const std::unique_ptr<ClippingPredictor> clipping_predictor_; + const bool use_clipping_predictor_step_; + ClippingPredictorEvaluator clipping_predictor_evaluator_; + int clipping_predictor_log_counter_; }; class MonoAgc { @@ -123,7 +168,7 @@ class MonoAgc { void Initialize(); void HandleCaptureOutputUsedChange(bool capture_output_used); - void HandleClipping(); + void HandleClipping(int clipped_level_step); void Process(const int16_t* audio, size_t samples_per_channel, diff --git a/modules/audio_processing/agc/agc_manager_direct_unittest.cc b/modules/audio_processing/agc/agc_manager_direct_unittest.cc index 1954ed4b21..bb284f9abc 100644 --- a/modules/audio_processing/agc/agc_manager_direct_unittest.cc +++ b/modules/audio_processing/agc/agc_manager_direct_unittest.cc @@ -26,13 +26,19 @@ using ::testing::SetArgPointee; namespace webrtc { namespace { -const int kSampleRateHz = 32000; -const int kNumChannels = 1; -const int kSamplesPerChannel = kSampleRateHz / 100; -const int kInitialVolume = 128; +constexpr int kSampleRateHz = 32000; +constexpr int kNumChannels = 1; +constexpr int kSamplesPerChannel = kSampleRateHz / 100; +constexpr int kInitialVolume = 128; constexpr int kClippedMin = 165; // Arbitrary, but different from the default. -const float kAboveClippedThreshold = 0.2f; -const int kMinMicLevel = 12; +constexpr float kAboveClippedThreshold = 0.2f; +constexpr int kMinMicLevel = 12; +constexpr int kClippedLevelStep = 15; +constexpr float kClippedRatioThreshold = 0.1f; +constexpr int kClippedWaitFrames = 300; + +using ClippingPredictorConfig = AudioProcessing::Config::GainController1:: + AnalogGainController::ClippingPredictor; class MockGainControl : public GainControl { public: @@ -57,10 +63,53 @@ class MockGainControl : public GainControl { }; std::unique_ptr<AgcManagerDirect> CreateAgcManagerDirect( - int startup_min_level) { + int startup_min_level, + int clipped_level_step, + float clipped_ratio_threshold, + int clipped_wait_frames) { return std::make_unique<AgcManagerDirect>( /*num_capture_channels=*/1, startup_min_level, kClippedMin, - /*disable_digital_adaptive=*/true, kSampleRateHz); + /*disable_digital_adaptive=*/true, kSampleRateHz, clipped_level_step, + clipped_ratio_threshold, clipped_wait_frames, ClippingPredictorConfig()); +} + +std::unique_ptr<AgcManagerDirect> CreateAgcManagerDirect( + int startup_min_level, + int clipped_level_step, + float clipped_ratio_threshold, + int clipped_wait_frames, + const ClippingPredictorConfig& clipping_cfg) { + return std::make_unique<AgcManagerDirect>( + /*num_capture_channels=*/1, startup_min_level, kClippedMin, + /*disable_digital_adaptive=*/true, kSampleRateHz, clipped_level_step, + clipped_ratio_threshold, clipped_wait_frames, clipping_cfg); +} + +void CallPreProcessAudioBuffer(int num_calls, + float peak_ratio, + AgcManagerDirect& manager) { + RTC_DCHECK_GE(1.f, peak_ratio); + AudioBuffer audio_buffer(kSampleRateHz, 1, kSampleRateHz, 1, kSampleRateHz, + 1); + const int num_channels = audio_buffer.num_channels(); + const int num_frames = audio_buffer.num_frames(); + for (int ch = 0; ch < num_channels; ++ch) { + for (int i = 0; i < num_frames; i += 2) { + audio_buffer.channels()[ch][i] = peak_ratio * 32767.f; + audio_buffer.channels()[ch][i + 1] = 0.0f; + } + } + for (int n = 0; n < num_calls / 2; ++n) { + manager.AnalyzePreProcess(&audio_buffer); + } + for (int ch = 0; ch < num_channels; ++ch) { + for (int i = 0; i < num_frames; ++i) { + audio_buffer.channels()[ch][i] = peak_ratio * 32767.f; + } + } + for (int n = 0; n < num_calls - num_calls / 2; ++n) { + manager.AnalyzePreProcess(&audio_buffer); + } } } // namespace @@ -69,7 +118,14 @@ class AgcManagerDirectTest : public ::testing::Test { protected: AgcManagerDirectTest() : agc_(new MockAgc), - manager_(agc_, kInitialVolume, kClippedMin, kSampleRateHz), + manager_(agc_, + kInitialVolume, + kClippedMin, + kSampleRateHz, + kClippedLevelStep, + kClippedRatioThreshold, + kClippedWaitFrames, + ClippingPredictorConfig()), audio(kNumChannels), audio_data(kNumChannels * kSamplesPerChannel, 0.f) { ExpectInitialize(); @@ -124,12 +180,32 @@ class AgcManagerDirectTest : public ::testing::Test { audio[ch][k] = 32767.f; } } - for (int i = 0; i < num_calls; ++i) { manager_.AnalyzePreProcess(audio.data(), kSamplesPerChannel); } } + void CallPreProcForChangingAudio(int num_calls, float peak_ratio) { + RTC_DCHECK_GE(1.f, peak_ratio); + std::fill(audio_data.begin(), audio_data.end(), 0.f); + for (size_t ch = 0; ch < kNumChannels; ++ch) { + for (size_t k = 0; k < kSamplesPerChannel; k += 2) { + audio[ch][k] = peak_ratio * 32767.f; + } + } + for (int i = 0; i < num_calls / 2; ++i) { + manager_.AnalyzePreProcess(audio.data(), kSamplesPerChannel); + } + for (size_t ch = 0; ch < kNumChannels; ++ch) { + for (size_t k = 0; k < kSamplesPerChannel; ++k) { + audio[ch][k] = peak_ratio * 32767.f; + } + } + for (int i = 0; i < num_calls - num_calls / 2; ++i) { + manager_.AnalyzePreProcess(audio.data(), kSamplesPerChannel); + } + } + MockAgc* agc_; MockGainControl gctrl_; AgcManagerDirect manager_; @@ -696,6 +772,25 @@ TEST_F(AgcManagerDirectTest, TakesNoActionOnZeroMicVolume) { EXPECT_EQ(0, manager_.stream_analog_level()); } +TEST_F(AgcManagerDirectTest, ClippingDetectionLowersVolume) { + SetVolumeAndProcess(255); + EXPECT_EQ(255, manager_.stream_analog_level()); + CallPreProcForChangingAudio(/*num_calls=*/100, /*peak_ratio=*/0.99f); + EXPECT_EQ(255, manager_.stream_analog_level()); + CallPreProcForChangingAudio(/*num_calls=*/100, /*peak_ratio=*/1.0f); + EXPECT_EQ(240, manager_.stream_analog_level()); +} + +TEST_F(AgcManagerDirectTest, DisabledClippingPredictorDoesNotLowerVolume) { + SetVolumeAndProcess(255); + EXPECT_FALSE(manager_.clipping_predictor_enabled()); + EXPECT_EQ(255, manager_.stream_analog_level()); + CallPreProcForChangingAudio(/*num_calls=*/100, /*peak_ratio=*/0.99f); + EXPECT_EQ(255, manager_.stream_analog_level()); + CallPreProcForChangingAudio(/*num_calls=*/100, /*peak_ratio=*/0.99f); + EXPECT_EQ(255, manager_.stream_analog_level()); +} + TEST(AgcManagerDirectStandaloneTest, DisableDigitalDisablesDigital) { auto agc = std::unique_ptr<Agc>(new ::testing::NiceMock<MockAgc>()); MockGainControl gctrl; @@ -705,14 +800,16 @@ TEST(AgcManagerDirectStandaloneTest, DisableDigitalDisablesDigital) { EXPECT_CALL(gctrl, enable_limiter(false)); std::unique_ptr<AgcManagerDirect> manager = - CreateAgcManagerDirect(kInitialVolume); + CreateAgcManagerDirect(kInitialVolume, kClippedLevelStep, + kClippedRatioThreshold, kClippedWaitFrames); manager->Initialize(); manager->SetupDigitalGainControl(&gctrl); } TEST(AgcManagerDirectStandaloneTest, AgcMinMicLevelExperiment) { std::unique_ptr<AgcManagerDirect> manager = - CreateAgcManagerDirect(kInitialVolume); + CreateAgcManagerDirect(kInitialVolume, kClippedLevelStep, + kClippedRatioThreshold, kClippedWaitFrames); EXPECT_EQ(manager->channel_agcs_[0]->min_mic_level(), kMinMicLevel); EXPECT_EQ(manager->channel_agcs_[0]->startup_min_level(), kInitialVolume); } @@ -721,7 +818,8 @@ TEST(AgcManagerDirectStandaloneTest, AgcMinMicLevelExperimentDisabled) { test::ScopedFieldTrials field_trial( "WebRTC-Audio-AgcMinMicLevelExperiment/Disabled/"); std::unique_ptr<AgcManagerDirect> manager = - CreateAgcManagerDirect(kInitialVolume); + CreateAgcManagerDirect(kInitialVolume, kClippedLevelStep, + kClippedRatioThreshold, kClippedWaitFrames); EXPECT_EQ(manager->channel_agcs_[0]->min_mic_level(), kMinMicLevel); EXPECT_EQ(manager->channel_agcs_[0]->startup_min_level(), kInitialVolume); } @@ -732,7 +830,8 @@ TEST(AgcManagerDirectStandaloneTest, AgcMinMicLevelExperimentOutOfRangeAbove) { test::ScopedFieldTrials field_trial( "WebRTC-Audio-AgcMinMicLevelExperiment/Enabled-256/"); std::unique_ptr<AgcManagerDirect> manager = - CreateAgcManagerDirect(kInitialVolume); + CreateAgcManagerDirect(kInitialVolume, kClippedLevelStep, + kClippedRatioThreshold, kClippedWaitFrames); EXPECT_EQ(manager->channel_agcs_[0]->min_mic_level(), kMinMicLevel); EXPECT_EQ(manager->channel_agcs_[0]->startup_min_level(), kInitialVolume); } @@ -743,7 +842,8 @@ TEST(AgcManagerDirectStandaloneTest, AgcMinMicLevelExperimentOutOfRangeBelow) { test::ScopedFieldTrials field_trial( "WebRTC-Audio-AgcMinMicLevelExperiment/Enabled--1/"); std::unique_ptr<AgcManagerDirect> manager = - CreateAgcManagerDirect(kInitialVolume); + CreateAgcManagerDirect(kInitialVolume, kClippedLevelStep, + kClippedRatioThreshold, kClippedWaitFrames); EXPECT_EQ(manager->channel_agcs_[0]->min_mic_level(), kMinMicLevel); EXPECT_EQ(manager->channel_agcs_[0]->startup_min_level(), kInitialVolume); } @@ -755,7 +855,8 @@ TEST(AgcManagerDirectStandaloneTest, AgcMinMicLevelExperimentEnabled50) { test::ScopedFieldTrials field_trial( "WebRTC-Audio-AgcMinMicLevelExperiment/Enabled-50/"); std::unique_ptr<AgcManagerDirect> manager = - CreateAgcManagerDirect(kInitialVolume); + CreateAgcManagerDirect(kInitialVolume, kClippedLevelStep, + kClippedRatioThreshold, kClippedWaitFrames); EXPECT_EQ(manager->channel_agcs_[0]->min_mic_level(), 50); EXPECT_EQ(manager->channel_agcs_[0]->startup_min_level(), kInitialVolume); } @@ -768,9 +869,130 @@ TEST(AgcManagerDirectStandaloneTest, test::ScopedFieldTrials field_trial( "WebRTC-Audio-AgcMinMicLevelExperiment/Enabled-50/"); std::unique_ptr<AgcManagerDirect> manager = - CreateAgcManagerDirect(/*startup_min_level=*/30); + CreateAgcManagerDirect(/*startup_min_level=*/30, kClippedLevelStep, + kClippedRatioThreshold, kClippedWaitFrames); EXPECT_EQ(manager->channel_agcs_[0]->min_mic_level(), 50); EXPECT_EQ(manager->channel_agcs_[0]->startup_min_level(), 50); } +// TODO(bugs.webrtc.org/12774): Test the bahavior of `clipped_level_step`. +// TODO(bugs.webrtc.org/12774): Test the bahavior of `clipped_ratio_threshold`. +// TODO(bugs.webrtc.org/12774): Test the bahavior of `clipped_wait_frames`. +// Verifies that configurable clipping parameters are initialized as intended. +TEST(AgcManagerDirectStandaloneTest, ClippingParametersVerified) { + std::unique_ptr<AgcManagerDirect> manager = + CreateAgcManagerDirect(kInitialVolume, kClippedLevelStep, + kClippedRatioThreshold, kClippedWaitFrames); + manager->Initialize(); + EXPECT_EQ(manager->clipped_level_step_, kClippedLevelStep); + EXPECT_EQ(manager->clipped_ratio_threshold_, kClippedRatioThreshold); + EXPECT_EQ(manager->clipped_wait_frames_, kClippedWaitFrames); + std::unique_ptr<AgcManagerDirect> manager_custom = + CreateAgcManagerDirect(kInitialVolume, + /*clipped_level_step=*/10, + /*clipped_ratio_threshold=*/0.2f, + /*clipped_wait_frames=*/50); + manager_custom->Initialize(); + EXPECT_EQ(manager_custom->clipped_level_step_, 10); + EXPECT_EQ(manager_custom->clipped_ratio_threshold_, 0.2f); + EXPECT_EQ(manager_custom->clipped_wait_frames_, 50); +} + +TEST(AgcManagerDirectStandaloneTest, + DisableClippingPredictorDisablesClippingPredictor) { + ClippingPredictorConfig default_config; + EXPECT_FALSE(default_config.enabled); + std::unique_ptr<AgcManagerDirect> manager = CreateAgcManagerDirect( + kInitialVolume, kClippedLevelStep, kClippedRatioThreshold, + kClippedWaitFrames, default_config); + manager->Initialize(); + EXPECT_FALSE(manager->clipping_predictor_enabled()); + EXPECT_FALSE(manager->use_clipping_predictor_step()); +} + +TEST(AgcManagerDirectStandaloneTest, ClippingPredictorDisabledByDefault) { + constexpr ClippingPredictorConfig kDefaultConfig; + EXPECT_FALSE(kDefaultConfig.enabled); +} + +TEST(AgcManagerDirectStandaloneTest, + EnableClippingPredictorEnablesClippingPredictor) { + // TODO(bugs.webrtc.org/12874): Use designated initializers one fixed. + ClippingPredictorConfig config; + config.enabled = true; + config.use_predicted_step = true; + std::unique_ptr<AgcManagerDirect> manager = CreateAgcManagerDirect( + kInitialVolume, kClippedLevelStep, kClippedRatioThreshold, + kClippedWaitFrames, config); + manager->Initialize(); + EXPECT_TRUE(manager->clipping_predictor_enabled()); + EXPECT_TRUE(manager->use_clipping_predictor_step()); +} + +TEST(AgcManagerDirectStandaloneTest, + DisableClippingPredictorDoesNotLowerVolume) { + // TODO(bugs.webrtc.org/12874): Use designated initializers one fixed. + constexpr ClippingPredictorConfig kConfig{/*enabled=*/false}; + AgcManagerDirect manager(new ::testing::NiceMock<MockAgc>(), kInitialVolume, + kClippedMin, kSampleRateHz, kClippedLevelStep, + kClippedRatioThreshold, kClippedWaitFrames, kConfig); + manager.Initialize(); + manager.set_stream_analog_level(/*level=*/255); + EXPECT_FALSE(manager.clipping_predictor_enabled()); + EXPECT_FALSE(manager.use_clipping_predictor_step()); + EXPECT_EQ(manager.stream_analog_level(), 255); + manager.Process(nullptr); + CallPreProcessAudioBuffer(/*num_calls=*/10, /*peak_ratio=*/0.99f, manager); + EXPECT_EQ(manager.stream_analog_level(), 255); + CallPreProcessAudioBuffer(/*num_calls=*/300, /*peak_ratio=*/0.99f, manager); + EXPECT_EQ(manager.stream_analog_level(), 255); + CallPreProcessAudioBuffer(/*num_calls=*/10, /*peak_ratio=*/0.99f, manager); + EXPECT_EQ(manager.stream_analog_level(), 255); +} + +TEST(AgcManagerDirectStandaloneTest, + EnableClippingPredictorWithUnusedPredictedStepDoesNotLowerVolume) { + // TODO(bugs.webrtc.org/12874): Use designated initializers one fixed. + ClippingPredictorConfig config; + config.enabled = true; + config.use_predicted_step = false; + AgcManagerDirect manager(new ::testing::NiceMock<MockAgc>(), kInitialVolume, + kClippedMin, kSampleRateHz, kClippedLevelStep, + kClippedRatioThreshold, kClippedWaitFrames, config); + manager.Initialize(); + manager.set_stream_analog_level(/*level=*/255); + EXPECT_TRUE(manager.clipping_predictor_enabled()); + EXPECT_FALSE(manager.use_clipping_predictor_step()); + EXPECT_EQ(manager.stream_analog_level(), 255); + manager.Process(nullptr); + CallPreProcessAudioBuffer(/*num_calls=*/10, /*peak_ratio=*/0.99f, manager); + EXPECT_EQ(manager.stream_analog_level(), 255); + CallPreProcessAudioBuffer(/*num_calls=*/300, /*peak_ratio=*/0.99f, manager); + EXPECT_EQ(manager.stream_analog_level(), 255); + CallPreProcessAudioBuffer(/*num_calls=*/10, /*peak_ratio=*/0.99f, manager); + EXPECT_EQ(manager.stream_analog_level(), 255); +} + +TEST(AgcManagerDirectStandaloneTest, EnableClippingPredictorLowersVolume) { + // TODO(bugs.webrtc.org/12874): Use designated initializers one fixed. + ClippingPredictorConfig config; + config.enabled = true; + config.use_predicted_step = true; + AgcManagerDirect manager(new ::testing::NiceMock<MockAgc>(), kInitialVolume, + kClippedMin, kSampleRateHz, kClippedLevelStep, + kClippedRatioThreshold, kClippedWaitFrames, config); + manager.Initialize(); + manager.set_stream_analog_level(/*level=*/255); + EXPECT_TRUE(manager.clipping_predictor_enabled()); + EXPECT_TRUE(manager.use_clipping_predictor_step()); + EXPECT_EQ(manager.stream_analog_level(), 255); + manager.Process(nullptr); + CallPreProcessAudioBuffer(/*num_calls=*/10, /*peak_ratio=*/0.99f, manager); + EXPECT_EQ(manager.stream_analog_level(), 240); + CallPreProcessAudioBuffer(/*num_calls=*/300, /*peak_ratio=*/0.99f, manager); + EXPECT_EQ(manager.stream_analog_level(), 240); + CallPreProcessAudioBuffer(/*num_calls=*/10, /*peak_ratio=*/0.99f, manager); + EXPECT_EQ(manager.stream_analog_level(), 225); +} + } // namespace webrtc diff --git a/modules/audio_processing/agc/clipping_predictor.cc b/modules/audio_processing/agc/clipping_predictor.cc new file mode 100644 index 0000000000..982bbca2ee --- /dev/null +++ b/modules/audio_processing/agc/clipping_predictor.cc @@ -0,0 +1,383 @@ +/* + * Copyright (c) 2021 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc/clipping_predictor.h" + +#include <algorithm> +#include <memory> + +#include "common_audio/include/audio_util.h" +#include "modules/audio_processing/agc/clipping_predictor_level_buffer.h" +#include "modules/audio_processing/agc/gain_map_internal.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" +#include "rtc_base/numerics/safe_minmax.h" + +namespace webrtc { +namespace { + +constexpr int kClippingPredictorMaxGainChange = 15; + +// Estimates the new level from the gain error; a copy of the function +// `LevelFromGainError` in agc_manager_direct.cc. +int LevelFromGainError(int gain_error, + int level, + int min_mic_level, + int max_mic_level) { + RTC_DCHECK_GE(level, 0); + RTC_DCHECK_LE(level, max_mic_level); + if (gain_error == 0) { + return level; + } + int new_level = level; + if (gain_error > 0) { + while (kGainMap[new_level] - kGainMap[level] < gain_error && + new_level < max_mic_level) { + ++new_level; + } + } else { + while (kGainMap[new_level] - kGainMap[level] > gain_error && + new_level > min_mic_level) { + --new_level; + } + } + return new_level; +} + +float ComputeCrestFactor(const ClippingPredictorLevelBuffer::Level& level) { + const float crest_factor = + FloatS16ToDbfs(level.max) - FloatS16ToDbfs(std::sqrt(level.average)); + return crest_factor; +} + +// Crest factor-based clipping prediction and clipped level step estimation. +class ClippingEventPredictor : public ClippingPredictor { + public: + // ClippingEventPredictor with `num_channels` channels (limited to values + // higher than zero); window size `window_length` and reference window size + // `reference_window_length` (both referring to the number of frames in the + // respective sliding windows and limited to values higher than zero); + // reference window delay `reference_window_delay` (delay in frames, limited + // to values zero and higher with an additional requirement of + // `window_length` < `reference_window_length` + reference_window_delay`); + // and an estimation peak threshold `clipping_threshold` and a crest factor + // drop threshold `crest_factor_margin` (both in dB). + ClippingEventPredictor(int num_channels, + int window_length, + int reference_window_length, + int reference_window_delay, + float clipping_threshold, + float crest_factor_margin) + : window_length_(window_length), + reference_window_length_(reference_window_length), + reference_window_delay_(reference_window_delay), + clipping_threshold_(clipping_threshold), + crest_factor_margin_(crest_factor_margin) { + RTC_DCHECK_GT(num_channels, 0); + RTC_DCHECK_GT(window_length, 0); + RTC_DCHECK_GT(reference_window_length, 0); + RTC_DCHECK_GE(reference_window_delay, 0); + RTC_DCHECK_GT(reference_window_length + reference_window_delay, + window_length); + const int buffer_length = GetMinFramesProcessed(); + RTC_DCHECK_GT(buffer_length, 0); + for (int i = 0; i < num_channels; ++i) { + ch_buffers_.push_back( + std::make_unique<ClippingPredictorLevelBuffer>(buffer_length)); + } + } + + ClippingEventPredictor(const ClippingEventPredictor&) = delete; + ClippingEventPredictor& operator=(const ClippingEventPredictor&) = delete; + ~ClippingEventPredictor() {} + + void Reset() { + const int num_channels = ch_buffers_.size(); + for (int i = 0; i < num_channels; ++i) { + ch_buffers_[i]->Reset(); + } + } + + // Analyzes a frame of audio and stores the framewise metrics in + // `ch_buffers_`. + void Analyze(const AudioFrameView<const float>& frame) { + const int num_channels = frame.num_channels(); + RTC_DCHECK_EQ(num_channels, ch_buffers_.size()); + const int samples_per_channel = frame.samples_per_channel(); + RTC_DCHECK_GT(samples_per_channel, 0); + for (int channel = 0; channel < num_channels; ++channel) { + float sum_squares = 0.0f; + float peak = 0.0f; + for (const auto& sample : frame.channel(channel)) { + sum_squares += sample * sample; + peak = std::max(std::fabs(sample), peak); + } + ch_buffers_[channel]->Push( + {sum_squares / static_cast<float>(samples_per_channel), peak}); + } + } + + // Estimates the analog gain adjustment for channel `channel` using a + // sliding window over the frame-wise metrics in `ch_buffers_`. Returns an + // estimate for the clipped level step equal to `default_clipped_level_step_` + // if at least `GetMinFramesProcessed()` frames have been processed since the + // last reset and a clipping event is predicted. `level`, `min_mic_level`, and + // `max_mic_level` are limited to [0, 255] and `default_step` to [1, 255]. + absl::optional<int> EstimateClippedLevelStep(int channel, + int level, + int default_step, + int min_mic_level, + int max_mic_level) const { + RTC_CHECK_GE(channel, 0); + RTC_CHECK_LT(channel, ch_buffers_.size()); + RTC_DCHECK_GE(level, 0); + RTC_DCHECK_LE(level, 255); + RTC_DCHECK_GT(default_step, 0); + RTC_DCHECK_LE(default_step, 255); + RTC_DCHECK_GE(min_mic_level, 0); + RTC_DCHECK_LE(min_mic_level, 255); + RTC_DCHECK_GE(max_mic_level, 0); + RTC_DCHECK_LE(max_mic_level, 255); + if (level <= min_mic_level) { + return absl::nullopt; + } + if (PredictClippingEvent(channel)) { + const int new_level = + rtc::SafeClamp(level - default_step, min_mic_level, max_mic_level); + const int step = level - new_level; + if (step > 0) { + return step; + } + } + return absl::nullopt; + } + + private: + int GetMinFramesProcessed() const { + return reference_window_delay_ + reference_window_length_; + } + + // Predicts clipping events based on the processed audio frames. Returns + // true if a clipping event is likely. + bool PredictClippingEvent(int channel) const { + const auto metrics = + ch_buffers_[channel]->ComputePartialMetrics(0, window_length_); + if (!metrics.has_value() || + !(FloatS16ToDbfs(metrics.value().max) > clipping_threshold_)) { + return false; + } + const auto reference_metrics = ch_buffers_[channel]->ComputePartialMetrics( + reference_window_delay_, reference_window_length_); + if (!reference_metrics.has_value()) { + return false; + } + const float crest_factor = ComputeCrestFactor(metrics.value()); + const float reference_crest_factor = + ComputeCrestFactor(reference_metrics.value()); + if (crest_factor < reference_crest_factor - crest_factor_margin_) { + return true; + } + return false; + } + + std::vector<std::unique_ptr<ClippingPredictorLevelBuffer>> ch_buffers_; + const int window_length_; + const int reference_window_length_; + const int reference_window_delay_; + const float clipping_threshold_; + const float crest_factor_margin_; +}; + +// Performs crest factor-based clipping peak prediction. +class ClippingPeakPredictor : public ClippingPredictor { + public: + // Ctor. ClippingPeakPredictor with `num_channels` channels (limited to values + // higher than zero); window size `window_length` and reference window size + // `reference_window_length` (both referring to the number of frames in the + // respective sliding windows and limited to values higher than zero); + // reference window delay `reference_window_delay` (delay in frames, limited + // to values zero and higher with an additional requirement of + // `window_length` < `reference_window_length` + reference_window_delay`); + // and a clipping prediction threshold `clipping_threshold` (in dB). Adaptive + // clipped level step estimation is used if `adaptive_step_estimation` is + // true. + explicit ClippingPeakPredictor(int num_channels, + int window_length, + int reference_window_length, + int reference_window_delay, + int clipping_threshold, + bool adaptive_step_estimation) + : window_length_(window_length), + reference_window_length_(reference_window_length), + reference_window_delay_(reference_window_delay), + clipping_threshold_(clipping_threshold), + adaptive_step_estimation_(adaptive_step_estimation) { + RTC_DCHECK_GT(num_channels, 0); + RTC_DCHECK_GT(window_length, 0); + RTC_DCHECK_GT(reference_window_length, 0); + RTC_DCHECK_GE(reference_window_delay, 0); + RTC_DCHECK_GT(reference_window_length + reference_window_delay, + window_length); + const int buffer_length = GetMinFramesProcessed(); + RTC_DCHECK_GT(buffer_length, 0); + for (int i = 0; i < num_channels; ++i) { + ch_buffers_.push_back( + std::make_unique<ClippingPredictorLevelBuffer>(buffer_length)); + } + } + + ClippingPeakPredictor(const ClippingPeakPredictor&) = delete; + ClippingPeakPredictor& operator=(const ClippingPeakPredictor&) = delete; + ~ClippingPeakPredictor() {} + + void Reset() { + const int num_channels = ch_buffers_.size(); + for (int i = 0; i < num_channels; ++i) { + ch_buffers_[i]->Reset(); + } + } + + // Analyzes a frame of audio and stores the framewise metrics in + // `ch_buffers_`. + void Analyze(const AudioFrameView<const float>& frame) { + const int num_channels = frame.num_channels(); + RTC_DCHECK_EQ(num_channels, ch_buffers_.size()); + const int samples_per_channel = frame.samples_per_channel(); + RTC_DCHECK_GT(samples_per_channel, 0); + for (int channel = 0; channel < num_channels; ++channel) { + float sum_squares = 0.0f; + float peak = 0.0f; + for (const auto& sample : frame.channel(channel)) { + sum_squares += sample * sample; + peak = std::max(std::fabs(sample), peak); + } + ch_buffers_[channel]->Push( + {sum_squares / static_cast<float>(samples_per_channel), peak}); + } + } + + // Estimates the analog gain adjustment for channel `channel` using a + // sliding window over the frame-wise metrics in `ch_buffers_`. Returns an + // estimate for the clipped level step (equal to + // `default_clipped_level_step_` if `adaptive_estimation_` is false) if at + // least `GetMinFramesProcessed()` frames have been processed since the last + // reset and a clipping event is predicted. `level`, `min_mic_level`, and + // `max_mic_level` are limited to [0, 255] and `default_step` to [1, 255]. + absl::optional<int> EstimateClippedLevelStep(int channel, + int level, + int default_step, + int min_mic_level, + int max_mic_level) const { + RTC_DCHECK_GE(channel, 0); + RTC_DCHECK_LT(channel, ch_buffers_.size()); + RTC_DCHECK_GE(level, 0); + RTC_DCHECK_LE(level, 255); + RTC_DCHECK_GT(default_step, 0); + RTC_DCHECK_LE(default_step, 255); + RTC_DCHECK_GE(min_mic_level, 0); + RTC_DCHECK_LE(min_mic_level, 255); + RTC_DCHECK_GE(max_mic_level, 0); + RTC_DCHECK_LE(max_mic_level, 255); + if (level <= min_mic_level) { + return absl::nullopt; + } + absl::optional<float> estimate_db = EstimatePeakValue(channel); + if (estimate_db.has_value() && estimate_db.value() > clipping_threshold_) { + int step = 0; + if (!adaptive_step_estimation_) { + step = default_step; + } else { + const int estimated_gain_change = + rtc::SafeClamp(-static_cast<int>(std::ceil(estimate_db.value())), + -kClippingPredictorMaxGainChange, 0); + step = + std::max(level - LevelFromGainError(estimated_gain_change, level, + min_mic_level, max_mic_level), + default_step); + } + const int new_level = + rtc::SafeClamp(level - step, min_mic_level, max_mic_level); + if (level > new_level) { + return level - new_level; + } + } + return absl::nullopt; + } + + private: + int GetMinFramesProcessed() { + return reference_window_delay_ + reference_window_length_; + } + + // Predicts clipping sample peaks based on the processed audio frames. + // Returns the estimated peak value if clipping is predicted. Otherwise + // returns absl::nullopt. + absl::optional<float> EstimatePeakValue(int channel) const { + const auto reference_metrics = ch_buffers_[channel]->ComputePartialMetrics( + reference_window_delay_, reference_window_length_); + if (!reference_metrics.has_value()) { + return absl::nullopt; + } + const auto metrics = + ch_buffers_[channel]->ComputePartialMetrics(0, window_length_); + if (!metrics.has_value() || + !(FloatS16ToDbfs(metrics.value().max) > clipping_threshold_)) { + return absl::nullopt; + } + const float reference_crest_factor = + ComputeCrestFactor(reference_metrics.value()); + const float& mean_squares = metrics.value().average; + const float projected_peak = + reference_crest_factor + FloatS16ToDbfs(std::sqrt(mean_squares)); + return projected_peak; + } + + std::vector<std::unique_ptr<ClippingPredictorLevelBuffer>> ch_buffers_; + const int window_length_; + const int reference_window_length_; + const int reference_window_delay_; + const int clipping_threshold_; + const bool adaptive_step_estimation_; +}; + +} // namespace + +std::unique_ptr<ClippingPredictor> CreateClippingPredictor( + int num_channels, + const AudioProcessing::Config::GainController1::AnalogGainController:: + ClippingPredictor& config) { + if (!config.enabled) { + RTC_LOG(LS_INFO) << "[agc] Clipping prediction disabled."; + return nullptr; + } + RTC_LOG(LS_INFO) << "[agc] Clipping prediction enabled."; + using ClippingPredictorMode = AudioProcessing::Config::GainController1:: + AnalogGainController::ClippingPredictor::Mode; + switch (config.mode) { + case ClippingPredictorMode::kClippingEventPrediction: + return std::make_unique<ClippingEventPredictor>( + num_channels, config.window_length, config.reference_window_length, + config.reference_window_delay, config.clipping_threshold, + config.crest_factor_margin); + case ClippingPredictorMode::kAdaptiveStepClippingPeakPrediction: + return std::make_unique<ClippingPeakPredictor>( + num_channels, config.window_length, config.reference_window_length, + config.reference_window_delay, config.clipping_threshold, + /*adaptive_step_estimation=*/true); + case ClippingPredictorMode::kFixedStepClippingPeakPrediction: + return std::make_unique<ClippingPeakPredictor>( + num_channels, config.window_length, config.reference_window_length, + config.reference_window_delay, config.clipping_threshold, + /*adaptive_step_estimation=*/false); + } + RTC_NOTREACHED(); +} + +} // namespace webrtc diff --git a/modules/audio_processing/agc/clipping_predictor.h b/modules/audio_processing/agc/clipping_predictor.h new file mode 100644 index 0000000000..ee2b6ef1e7 --- /dev/null +++ b/modules/audio_processing/agc/clipping_predictor.h @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2021 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC_CLIPPING_PREDICTOR_H_ +#define MODULES_AUDIO_PROCESSING_AGC_CLIPPING_PREDICTOR_H_ + +#include <memory> +#include <vector> + +#include "absl/types/optional.h" +#include "modules/audio_processing/include/audio_frame_view.h" +#include "modules/audio_processing/include/audio_processing.h" + +namespace webrtc { + +// Frame-wise clipping prediction and clipped level step estimation. Analyzes +// 10 ms multi-channel frames and estimates an analog mic level decrease step +// to possibly avoid clipping when predicted. `Analyze()` and +// `EstimateClippedLevelStep()` can be called in any order. +class ClippingPredictor { + public: + virtual ~ClippingPredictor() = default; + + virtual void Reset() = 0; + + // Analyzes a 10 ms multi-channel audio frame. + virtual void Analyze(const AudioFrameView<const float>& frame) = 0; + + // Predicts if clipping is going to occur for the specified `channel` in the + // near-future and, if so, it returns a recommended analog mic level decrease + // step. Returns absl::nullopt if clipping is not predicted. + // `level` is the current analog mic level, `default_step` is the amount the + // mic level is lowered by the analog controller with every clipping event and + // `min_mic_level` and `max_mic_level` is the range of allowed analog mic + // levels. + virtual absl::optional<int> EstimateClippedLevelStep( + int channel, + int level, + int default_step, + int min_mic_level, + int max_mic_level) const = 0; + +}; + +// Creates a ClippingPredictor based on the provided `config`. When enabled, +// the following must hold for `config`: +// `window_length < reference_window_length + reference_window_delay`. +// Returns `nullptr` if `config.enabled` is false. +std::unique_ptr<ClippingPredictor> CreateClippingPredictor( + int num_channels, + const AudioProcessing::Config::GainController1::AnalogGainController:: + ClippingPredictor& config); + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC_CLIPPING_PREDICTOR_H_ diff --git a/modules/audio_processing/agc/clipping_predictor_evaluator.cc b/modules/audio_processing/agc/clipping_predictor_evaluator.cc new file mode 100644 index 0000000000..2a4ea922cf --- /dev/null +++ b/modules/audio_processing/agc/clipping_predictor_evaluator.cc @@ -0,0 +1,175 @@ +/* + * Copyright (c) 2021 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc/clipping_predictor_evaluator.h" + +#include <algorithm> + +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" + +namespace webrtc { +namespace { + +// Returns the index of the oldest item in the ring buffer for a non-empty +// ring buffer with give `size`, `tail` index and `capacity`. +int OldestExpectedDetectionIndex(int size, int tail, int capacity) { + RTC_DCHECK_GT(size, 0); + return tail - size + (tail < size ? capacity : 0); +} + +} // namespace + +ClippingPredictorEvaluator::ClippingPredictorEvaluator(int history_size) + : history_size_(history_size), + ring_buffer_capacity_(history_size + 1), + ring_buffer_(ring_buffer_capacity_), + true_positives_(0), + true_negatives_(0), + false_positives_(0), + false_negatives_(0) { + RTC_DCHECK_GT(history_size_, 0); + Reset(); +} + +ClippingPredictorEvaluator::~ClippingPredictorEvaluator() = default; + +absl::optional<int> ClippingPredictorEvaluator::Observe( + bool clipping_detected, + bool clipping_predicted) { + RTC_DCHECK_GE(ring_buffer_size_, 0); + RTC_DCHECK_LE(ring_buffer_size_, ring_buffer_capacity_); + RTC_DCHECK_GE(ring_buffer_tail_, 0); + RTC_DCHECK_LT(ring_buffer_tail_, ring_buffer_capacity_); + + DecreaseTimesToLive(); + if (clipping_predicted) { + // TODO(bugs.webrtc.org/12874): Use designated initializers one fixed. + Push(/*expected_detection=*/{/*ttl=*/history_size_, /*detected=*/false}); + } + // Clipping is expected if there are expected detections regardless of + // whether all the expected detections have been previously matched - i.e., + // `ExpectedDetection::detected` is true. + const bool clipping_expected = ring_buffer_size_ > 0; + + absl::optional<int> prediction_interval; + if (clipping_expected && clipping_detected) { + prediction_interval = FindEarliestPredictionInterval(); + // Add a true positive for each unexpired expected detection. + const int num_modified_items = MarkExpectedDetectionAsDetected(); + true_positives_ += num_modified_items; + RTC_DCHECK(prediction_interval.has_value() || num_modified_items == 0); + RTC_DCHECK(!prediction_interval.has_value() || num_modified_items > 0); + } else if (clipping_expected && !clipping_detected) { + // Add a false positive if there is one expected detection that has expired + // and that has never been matched before. Note that there is at most one + // unmatched expired detection. + if (HasExpiredUnmatchedExpectedDetection()) { + false_positives_++; + } + } else if (!clipping_expected && clipping_detected) { + false_negatives_++; + } else { + RTC_DCHECK(!clipping_expected && !clipping_detected); + true_negatives_++; + } + return prediction_interval; +} + +void ClippingPredictorEvaluator::Reset() { + // Empty the ring buffer of expected detections. + ring_buffer_tail_ = 0; + ring_buffer_size_ = 0; +} + +// Cost: O(1). +void ClippingPredictorEvaluator::Push(ExpectedDetection value) { + ring_buffer_[ring_buffer_tail_] = value; + ring_buffer_tail_++; + if (ring_buffer_tail_ == ring_buffer_capacity_) { + ring_buffer_tail_ = 0; + } + ring_buffer_size_ = std::min(ring_buffer_capacity_, ring_buffer_size_ + 1); +} + +// Cost: O(N). +void ClippingPredictorEvaluator::DecreaseTimesToLive() { + bool expired_found = false; + for (int i = ring_buffer_tail_ - ring_buffer_size_; i < ring_buffer_tail_; + ++i) { + int index = i >= 0 ? i : ring_buffer_capacity_ + i; + RTC_DCHECK_GE(index, 0); + RTC_DCHECK_LT(index, ring_buffer_.size()); + RTC_DCHECK_GE(ring_buffer_[index].ttl, 0); + if (ring_buffer_[index].ttl == 0) { + RTC_DCHECK(!expired_found) + << "There must be at most one expired item in the ring buffer."; + expired_found = true; + RTC_DCHECK_EQ(index, OldestExpectedDetectionIndex(ring_buffer_size_, + ring_buffer_tail_, + ring_buffer_capacity_)) + << "The expired item must be the oldest in the ring buffer."; + } + ring_buffer_[index].ttl--; + } + if (expired_found) { + ring_buffer_size_--; + } +} + +// Cost: O(N). +absl::optional<int> ClippingPredictorEvaluator::FindEarliestPredictionInterval() + const { + absl::optional<int> prediction_interval; + for (int i = ring_buffer_tail_ - ring_buffer_size_; i < ring_buffer_tail_; + ++i) { + int index = i >= 0 ? i : ring_buffer_capacity_ + i; + RTC_DCHECK_GE(index, 0); + RTC_DCHECK_LT(index, ring_buffer_.size()); + if (!ring_buffer_[index].detected) { + prediction_interval = std::max(prediction_interval.value_or(0), + history_size_ - ring_buffer_[index].ttl); + } + } + return prediction_interval; +} + +// Cost: O(N). +int ClippingPredictorEvaluator::MarkExpectedDetectionAsDetected() { + int num_modified_items = 0; + for (int i = ring_buffer_tail_ - ring_buffer_size_; i < ring_buffer_tail_; + ++i) { + int index = i >= 0 ? i : ring_buffer_capacity_ + i; + RTC_DCHECK_GE(index, 0); + RTC_DCHECK_LT(index, ring_buffer_.size()); + if (!ring_buffer_[index].detected) { + num_modified_items++; + } + ring_buffer_[index].detected = true; + } + return num_modified_items; +} + +// Cost: O(1). +bool ClippingPredictorEvaluator::HasExpiredUnmatchedExpectedDetection() const { + if (ring_buffer_size_ == 0) { + return false; + } + // If an expired item, that is `ttl` equal to 0, exists, it must be the + // oldest. + const int oldest_index = OldestExpectedDetectionIndex( + ring_buffer_size_, ring_buffer_tail_, ring_buffer_capacity_); + RTC_DCHECK_GE(oldest_index, 0); + RTC_DCHECK_LT(oldest_index, ring_buffer_.size()); + return ring_buffer_[oldest_index].ttl == 0 && + !ring_buffer_[oldest_index].detected; +} + +} // namespace webrtc diff --git a/modules/audio_processing/agc/clipping_predictor_evaluator.h b/modules/audio_processing/agc/clipping_predictor_evaluator.h new file mode 100644 index 0000000000..e76f25d5e1 --- /dev/null +++ b/modules/audio_processing/agc/clipping_predictor_evaluator.h @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2021 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC_CLIPPING_PREDICTOR_EVALUATOR_H_ +#define MODULES_AUDIO_PROCESSING_AGC_CLIPPING_PREDICTOR_EVALUATOR_H_ + +#include <vector> + +#include "absl/types/optional.h" + +namespace webrtc { + +// Counts true/false positives/negatives while observing sequences of flag pairs +// that indicate whether clipping has been detected and/or if clipping is +// predicted. When a true positive is found measures the time interval between +// prediction and detection events. +// From the time a prediction is observed and for a period equal to +// `history_size` calls to `Observe()`, one or more detections are expected. If +// the expectation is met, a true positives is added and the time interval +// between the earliest prediction and the detection is recorded; otherwise, +// when the deadline is reached, a false positive is added. Note that one +// detection matches all the expected detections that have not expired - i.e., +// one detection counts as multiple true positives. +// If a detection is observed, but no prediction has been observed over the past +// `history_size` calls to `Observe()`, then a false negative is added; +// otherwise, a true negative is added. +class ClippingPredictorEvaluator { + public: + // Ctor. `history_size` indicates how long to wait for a call to `Observe()` + // having `clipping_detected` set to true from the time clipping is predicted. + explicit ClippingPredictorEvaluator(int history_size); + ClippingPredictorEvaluator(const ClippingPredictorEvaluator&) = delete; + ClippingPredictorEvaluator& operator=(const ClippingPredictorEvaluator&) = + delete; + ~ClippingPredictorEvaluator(); + + // Observes whether clipping has been detected and/or if clipping is + // predicted. When predicted one or more detections are expected in the next + // `history_size_` calls of `Observe()`. When true positives are found returns + // the prediction interval between the earliest prediction and the detection. + absl::optional<int> Observe(bool clipping_detected, bool clipping_predicted); + + // Removes any expectation recently set after a call to `Observe()` having + // `clipping_predicted` set to true. + void Reset(); + + // Metrics getters. + int true_positives() const { return true_positives_; } + int true_negatives() const { return true_negatives_; } + int false_positives() const { return false_positives_; } + int false_negatives() const { return false_negatives_; } + + private: + const int history_size_; + + // State of a detection expected to be observed after a prediction. + struct ExpectedDetection { + // Time to live (TTL); remaining number of `Observe()` calls to match a call + // having `clipping_detected` set to true. + int ttl; + // True if an `Observe()` call having `clipping_detected` set to true has + // been observed. + bool detected; + }; + // Ring buffer of expected detections. + const int ring_buffer_capacity_; + std::vector<ExpectedDetection> ring_buffer_; + int ring_buffer_tail_; + int ring_buffer_size_; + + // Pushes `expected_detection` into `expected_matches_ring_buffer_`. + void Push(ExpectedDetection expected_detection); + // Decreased the TTLs in `expected_matches_ring_buffer_` and removes expired + // items. + void DecreaseTimesToLive(); + // Returns the prediction interval for the earliest unexpired expected + // detection if any. + absl::optional<int> FindEarliestPredictionInterval() const; + // Marks all the items in `expected_matches_ring_buffer_` as `detected` and + // returns the number of updated items. + int MarkExpectedDetectionAsDetected(); + // Returns true if `expected_matches_ring_buffer_` has an item having `ttl` + // equal to 0 (expired) and `detected` equal to false (unmatched). + bool HasExpiredUnmatchedExpectedDetection() const; + + // Metrics. + int true_positives_; + int true_negatives_; + int false_positives_; + int false_negatives_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC_CLIPPING_PREDICTOR_EVALUATOR_H_ diff --git a/modules/audio_processing/agc/clipping_predictor_evaluator_unittest.cc b/modules/audio_processing/agc/clipping_predictor_evaluator_unittest.cc new file mode 100644 index 0000000000..1eb83eae61 --- /dev/null +++ b/modules/audio_processing/agc/clipping_predictor_evaluator_unittest.cc @@ -0,0 +1,568 @@ +/* + * Copyright (c) 2021 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc/clipping_predictor_evaluator.h" + +#include <cstdint> +#include <memory> +#include <tuple> +#include <vector> + +#include "absl/types/optional.h" +#include "rtc_base/numerics/safe_conversions.h" +#include "rtc_base/random.h" +#include "test/gmock.h" +#include "test/gtest.h" + +namespace webrtc { +namespace { + +using testing::Eq; +using testing::Optional; + +constexpr bool kDetected = true; +constexpr bool kNotDetected = false; + +constexpr bool kPredicted = true; +constexpr bool kNotPredicted = false; + +int SumTrueFalsePositivesNegatives( + const ClippingPredictorEvaluator& evaluator) { + return evaluator.true_positives() + evaluator.true_negatives() + + evaluator.false_positives() + evaluator.false_negatives(); +} + +// Checks the metrics after init - i.e., no call to `Observe()`. +TEST(ClippingPredictorEvaluatorTest, Init) { + ClippingPredictorEvaluator evaluator(/*history_size=*/3); + EXPECT_EQ(evaluator.true_positives(), 0); + EXPECT_EQ(evaluator.true_negatives(), 0); + EXPECT_EQ(evaluator.false_positives(), 0); + EXPECT_EQ(evaluator.false_negatives(), 0); +} + +class ClippingPredictorEvaluatorParameterization + : public ::testing::TestWithParam<std::tuple<int, int>> { + protected: + uint64_t seed() const { + return rtc::checked_cast<uint64_t>(std::get<0>(GetParam())); + } + int history_size() const { return std::get<1>(GetParam()); } +}; + +// Checks that after each call to `Observe()` at most one metric changes. +TEST_P(ClippingPredictorEvaluatorParameterization, AtMostOneMetricChanges) { + constexpr int kNumCalls = 123; + Random random_generator(seed()); + ClippingPredictorEvaluator evaluator(history_size()); + + for (int i = 0; i < kNumCalls; ++i) { + SCOPED_TRACE(i); + // Read metrics before `Observe()` is called. + const int last_tp = evaluator.true_positives(); + const int last_tn = evaluator.true_negatives(); + const int last_fp = evaluator.false_positives(); + const int last_fn = evaluator.false_negatives(); + // `Observe()` a random observation. + bool clipping_detected = random_generator.Rand<bool>(); + bool clipping_predicted = random_generator.Rand<bool>(); + evaluator.Observe(clipping_detected, clipping_predicted); + + // Check that at most one metric has changed. + int num_changes = 0; + num_changes += last_tp == evaluator.true_positives() ? 0 : 1; + num_changes += last_tn == evaluator.true_negatives() ? 0 : 1; + num_changes += last_fp == evaluator.false_positives() ? 0 : 1; + num_changes += last_fn == evaluator.false_negatives() ? 0 : 1; + EXPECT_GE(num_changes, 0); + EXPECT_LE(num_changes, 1); + } +} + +// Checks that after each call to `Observe()` each metric either remains +// unchanged or grows. +TEST_P(ClippingPredictorEvaluatorParameterization, MetricsAreWeaklyMonotonic) { + constexpr int kNumCalls = 123; + Random random_generator(seed()); + ClippingPredictorEvaluator evaluator(history_size()); + + for (int i = 0; i < kNumCalls; ++i) { + SCOPED_TRACE(i); + // Read metrics before `Observe()` is called. + const int last_tp = evaluator.true_positives(); + const int last_tn = evaluator.true_negatives(); + const int last_fp = evaluator.false_positives(); + const int last_fn = evaluator.false_negatives(); + // `Observe()` a random observation. + bool clipping_detected = random_generator.Rand<bool>(); + bool clipping_predicted = random_generator.Rand<bool>(); + evaluator.Observe(clipping_detected, clipping_predicted); + + // Check that metrics are weakly monotonic. + EXPECT_GE(evaluator.true_positives(), last_tp); + EXPECT_GE(evaluator.true_negatives(), last_tn); + EXPECT_GE(evaluator.false_positives(), last_fp); + EXPECT_GE(evaluator.false_negatives(), last_fn); + } +} + +// Checks that after each call to `Observe()` the growth speed of each metrics +// is bounded. +TEST_P(ClippingPredictorEvaluatorParameterization, BoundedMetricsGrowth) { + constexpr int kNumCalls = 123; + Random random_generator(seed()); + ClippingPredictorEvaluator evaluator(history_size()); + + for (int i = 0; i < kNumCalls; ++i) { + SCOPED_TRACE(i); + // Read metrics before `Observe()` is called. + const int last_tp = evaluator.true_positives(); + const int last_tn = evaluator.true_negatives(); + const int last_fp = evaluator.false_positives(); + const int last_fn = evaluator.false_negatives(); + // `Observe()` a random observation. + bool clipping_detected = random_generator.Rand<bool>(); + bool clipping_predicted = random_generator.Rand<bool>(); + evaluator.Observe(clipping_detected, clipping_predicted); + + // Check that TPs grow by at most `history_size() + 1`. Such an upper bound + // is reached when multiple predictions are matched by a single detection. + EXPECT_LE(evaluator.true_positives() - last_tp, history_size() + 1); + // Check that TNs, FPs and FNs grow by at most one. `max_growth`. + EXPECT_LE(evaluator.true_negatives() - last_tn, 1); + EXPECT_LE(evaluator.false_positives() - last_fp, 1); + EXPECT_LE(evaluator.false_negatives() - last_fn, 1); + } +} + +// Checks that `Observe()` returns a prediction interval if and only if one or +// more true positives are found. +TEST_P(ClippingPredictorEvaluatorParameterization, + PredictionIntervalIfAndOnlyIfTruePositives) { + constexpr int kNumCalls = 123; + Random random_generator(seed()); + ClippingPredictorEvaluator evaluator(history_size()); + + for (int i = 0; i < kNumCalls; ++i) { + SCOPED_TRACE(i); + // Read true positives before `Observe()` is called. + const int last_tp = evaluator.true_positives(); + // `Observe()` a random observation. + bool clipping_detected = random_generator.Rand<bool>(); + bool clipping_predicted = random_generator.Rand<bool>(); + absl::optional<int> prediction_interval = + evaluator.Observe(clipping_detected, clipping_predicted); + + // Check that the prediction interval is returned when a true positive is + // found. + if (evaluator.true_positives() == last_tp) { + EXPECT_FALSE(prediction_interval.has_value()); + } else { + EXPECT_TRUE(prediction_interval.has_value()); + } + } +} + +INSTANTIATE_TEST_SUITE_P( + ClippingPredictorEvaluatorTest, + ClippingPredictorEvaluatorParameterization, + ::testing::Combine(::testing::Values(4, 8, 15, 16, 23, 42), + ::testing::Values(1, 10, 21))); + +// Checks that, observing a detection and a prediction after init, produces a +// true positive. +TEST(ClippingPredictorEvaluatorTest, OneTruePositiveAfterInit) { + ClippingPredictorEvaluator evaluator(/*history_size=*/3); + evaluator.Observe(kDetected, kPredicted); + EXPECT_EQ(evaluator.true_positives(), 1); + + EXPECT_EQ(evaluator.true_negatives(), 0); + EXPECT_EQ(evaluator.false_positives(), 0); + EXPECT_EQ(evaluator.false_negatives(), 0); +} + +// Checks that, observing a detection but no prediction after init, produces a +// false negative. +TEST(ClippingPredictorEvaluatorTest, OneFalseNegativeAfterInit) { + ClippingPredictorEvaluator evaluator(/*history_size=*/3); + evaluator.Observe(kDetected, kNotPredicted); + EXPECT_EQ(evaluator.false_negatives(), 1); + + EXPECT_EQ(evaluator.true_positives(), 0); + EXPECT_EQ(evaluator.true_negatives(), 0); + EXPECT_EQ(evaluator.false_positives(), 0); +} + +// Checks that, observing no detection but a prediction after init, produces a +// false positive after expiration. +TEST(ClippingPredictorEvaluatorTest, OneFalsePositiveAfterInit) { + ClippingPredictorEvaluator evaluator(/*history_size=*/3); + evaluator.Observe(kNotDetected, kPredicted); + EXPECT_EQ(evaluator.false_positives(), 0); + evaluator.Observe(kNotDetected, kNotPredicted); + evaluator.Observe(kNotDetected, kNotPredicted); + evaluator.Observe(kNotDetected, kNotPredicted); + EXPECT_EQ(evaluator.false_positives(), 1); + + EXPECT_EQ(evaluator.true_positives(), 0); + EXPECT_EQ(evaluator.true_negatives(), 0); + EXPECT_EQ(evaluator.false_negatives(), 0); +} + +// Checks that, observing no detection and no prediction after init, produces a +// true negative. +TEST(ClippingPredictorEvaluatorTest, OneTrueNegativeAfterInit) { + ClippingPredictorEvaluator evaluator(/*history_size=*/3); + evaluator.Observe(kNotDetected, kNotPredicted); + EXPECT_EQ(evaluator.true_negatives(), 1); + + EXPECT_EQ(evaluator.true_positives(), 0); + EXPECT_EQ(evaluator.false_positives(), 0); + EXPECT_EQ(evaluator.false_negatives(), 0); +} + +// Checks that the evaluator detects true negatives when clipping is neither +// predicted nor detected. +TEST(ClippingPredictorEvaluatorTest, NeverDetectedAndNotPredicted) { + ClippingPredictorEvaluator evaluator(/*history_size=*/3); + evaluator.Observe(kNotDetected, kNotPredicted); + evaluator.Observe(kNotDetected, kNotPredicted); + evaluator.Observe(kNotDetected, kNotPredicted); + evaluator.Observe(kNotDetected, kNotPredicted); + EXPECT_EQ(evaluator.true_negatives(), 4); + + EXPECT_EQ(evaluator.true_positives(), 0); + EXPECT_EQ(evaluator.false_positives(), 0); + EXPECT_EQ(evaluator.false_negatives(), 0); +} + +// Checks that the evaluator detects a false negative when clipping is detected +// but not predicted. +TEST(ClippingPredictorEvaluatorTest, DetectedButNotPredicted) { + ClippingPredictorEvaluator evaluator(/*history_size=*/3); + evaluator.Observe(kNotDetected, kNotPredicted); + evaluator.Observe(kNotDetected, kNotPredicted); + evaluator.Observe(kNotDetected, kNotPredicted); + evaluator.Observe(kDetected, kNotPredicted); + EXPECT_EQ(evaluator.false_negatives(), 1); + + EXPECT_EQ(evaluator.true_positives(), 0); + EXPECT_EQ(evaluator.true_negatives(), 3); + EXPECT_EQ(evaluator.false_positives(), 0); +} + +// Checks that the evaluator does not detect a false positive when clipping is +// predicted but not detected until the observation period expires. +TEST(ClippingPredictorEvaluatorTest, + PredictedOnceAndNeverDetectedBeforeDeadline) { + ClippingPredictorEvaluator evaluator(/*history_size=*/3); + evaluator.Observe(kNotDetected, kPredicted); + evaluator.Observe(kNotDetected, kNotPredicted); + EXPECT_EQ(evaluator.false_positives(), 0); + evaluator.Observe(kNotDetected, kNotPredicted); + EXPECT_EQ(evaluator.false_positives(), 0); + evaluator.Observe(kNotDetected, kNotPredicted); + EXPECT_EQ(evaluator.false_positives(), 1); + + EXPECT_EQ(evaluator.true_positives(), 0); + EXPECT_EQ(evaluator.true_negatives(), 0); + EXPECT_EQ(evaluator.false_negatives(), 0); +} + +// Checks that the evaluator detects a false positive when clipping is predicted +// but detected after the observation period expires. +TEST(ClippingPredictorEvaluatorTest, PredictedOnceButDetectedAfterDeadline) { + ClippingPredictorEvaluator evaluator(/*history_size=*/3); + evaluator.Observe(kNotDetected, kPredicted); + evaluator.Observe(kNotDetected, kNotPredicted); + evaluator.Observe(kNotDetected, kNotPredicted); + evaluator.Observe(kNotDetected, kNotPredicted); + evaluator.Observe(kDetected, kNotPredicted); + EXPECT_EQ(evaluator.false_positives(), 1); + + EXPECT_EQ(evaluator.true_positives(), 0); + EXPECT_EQ(evaluator.true_negatives(), 0); + EXPECT_EQ(evaluator.false_negatives(), 1); +} + +// Checks that a prediction followed by a detection counts as true positive. +TEST(ClippingPredictorEvaluatorTest, PredictedOnceAndThenImmediatelyDetected) { + ClippingPredictorEvaluator evaluator(/*history_size=*/3); + evaluator.Observe(kNotDetected, kPredicted); + EXPECT_EQ(evaluator.false_positives(), 0); + evaluator.Observe(kDetected, kNotPredicted); + EXPECT_EQ(evaluator.true_positives(), 1); + + EXPECT_EQ(evaluator.true_negatives(), 0); + EXPECT_EQ(evaluator.false_positives(), 0); + EXPECT_EQ(evaluator.false_negatives(), 0); +} + +// Checks that a prediction followed by a delayed detection counts as true +// positive if the delay is within the observation period. +TEST(ClippingPredictorEvaluatorTest, PredictedOnceAndDetectedBeforeDeadline) { + ClippingPredictorEvaluator evaluator(/*history_size=*/3); + evaluator.Observe(kNotDetected, kPredicted); + EXPECT_EQ(evaluator.false_positives(), 0); + evaluator.Observe(kNotDetected, kNotPredicted); + EXPECT_EQ(evaluator.false_positives(), 0); + evaluator.Observe(kDetected, kNotPredicted); + EXPECT_EQ(evaluator.true_positives(), 1); + + EXPECT_EQ(evaluator.true_negatives(), 0); + EXPECT_EQ(evaluator.false_positives(), 0); + EXPECT_EQ(evaluator.false_negatives(), 0); +} + +// Checks that a prediction followed by a delayed detection counts as true +// positive if the delay equals the observation period. +TEST(ClippingPredictorEvaluatorTest, PredictedOnceAndDetectedAtDeadline) { + ClippingPredictorEvaluator evaluator(/*history_size=*/3); + evaluator.Observe(kNotDetected, kPredicted); + EXPECT_EQ(evaluator.false_positives(), 0); + evaluator.Observe(kNotDetected, kNotPredicted); + EXPECT_EQ(evaluator.false_positives(), 0); + evaluator.Observe(kNotDetected, kNotPredicted); + EXPECT_EQ(evaluator.false_positives(), 0); + evaluator.Observe(kDetected, kNotPredicted); + EXPECT_EQ(evaluator.true_positives(), 1); + + EXPECT_EQ(evaluator.true_negatives(), 0); + EXPECT_EQ(evaluator.false_positives(), 0); + EXPECT_EQ(evaluator.false_negatives(), 0); +} + +// Checks that a prediction followed by a multiple adjacent detections within +// the deadline counts as a single true positive and that, after the deadline, +// a detection counts as a false negative. +TEST(ClippingPredictorEvaluatorTest, PredictedOnceAndDetectedMultipleTimes) { + ClippingPredictorEvaluator evaluator(/*history_size=*/3); + evaluator.Observe(kNotDetected, kPredicted); + evaluator.Observe(kNotDetected, kNotPredicted); + // Multiple detections. + evaluator.Observe(kDetected, kNotPredicted); + EXPECT_EQ(evaluator.true_positives(), 1); + evaluator.Observe(kDetected, kNotPredicted); + EXPECT_EQ(evaluator.true_positives(), 1); + // A detection outside of the observation period counts as false negative. + evaluator.Observe(kDetected, kNotPredicted); + EXPECT_EQ(evaluator.false_negatives(), 1); + EXPECT_EQ(SumTrueFalsePositivesNegatives(evaluator), 2); + + EXPECT_EQ(evaluator.true_negatives(), 0); + EXPECT_EQ(evaluator.false_positives(), 0); +} + +// Checks that a false positive is added when clipping is detected after a too +// early prediction. +TEST(ClippingPredictorEvaluatorTest, + PredictedMultipleTimesAndDetectedOnceAfterDeadline) { + ClippingPredictorEvaluator evaluator(/*history_size=*/3); + evaluator.Observe(kNotDetected, kPredicted); // ---+ + evaluator.Observe(kNotDetected, kPredicted); // | + evaluator.Observe(kNotDetected, kPredicted); // | + evaluator.Observe(kNotDetected, kPredicted); // <--+ Not matched. + // The time to match a detection after the first prediction expired. + EXPECT_EQ(evaluator.false_positives(), 1); + evaluator.Observe(kDetected, kNotPredicted); + // The detection above does not match the first prediction because it happened + // after the deadline of the 1st prediction. + EXPECT_EQ(evaluator.false_positives(), 1); + + EXPECT_EQ(evaluator.true_positives(), 3); + EXPECT_EQ(evaluator.true_negatives(), 0); + EXPECT_EQ(evaluator.false_negatives(), 0); +} + +// Checks that multiple consecutive predictions match the first detection +// observed before the expected detection deadline expires. +TEST(ClippingPredictorEvaluatorTest, PredictedMultipleTimesAndDetectedOnce) { + ClippingPredictorEvaluator evaluator(/*history_size=*/3); + evaluator.Observe(kNotDetected, kPredicted); // --+ + evaluator.Observe(kNotDetected, kPredicted); // | --+ + evaluator.Observe(kNotDetected, kPredicted); // | | --+ + evaluator.Observe(kDetected, kNotPredicted); // <-+ <-+ <-+ + EXPECT_EQ(evaluator.true_positives(), 3); + // The following observations do not generate any true negatives as they + // belong to the observation period of the last prediction - for which a + // detection has already been matched. + const int true_negatives = evaluator.true_negatives(); + evaluator.Observe(kNotDetected, kNotPredicted); + evaluator.Observe(kNotDetected, kNotPredicted); + EXPECT_EQ(evaluator.true_negatives(), true_negatives); + + EXPECT_EQ(evaluator.true_negatives(), 0); + EXPECT_EQ(evaluator.false_positives(), 0); + EXPECT_EQ(evaluator.false_negatives(), 0); +} + +// Checks that multiple consecutive predictions match the multiple detections +// observed before the expected detection deadline expires. +TEST(ClippingPredictorEvaluatorTest, + PredictedMultipleTimesAndDetectedMultipleTimes) { + ClippingPredictorEvaluator evaluator(/*history_size=*/3); + evaluator.Observe(kNotDetected, kPredicted); // --+ + evaluator.Observe(kNotDetected, kPredicted); // | --+ + evaluator.Observe(kNotDetected, kPredicted); // | | --+ + evaluator.Observe(kDetected, kNotPredicted); // <-+ <-+ <-+ + evaluator.Observe(kDetected, kNotPredicted); // <-+ <-+ + EXPECT_EQ(evaluator.true_positives(), 3); + // The following observation does not generate a true negative as it belongs + // to the observation period of the last prediction - for which two detections + // have already been matched. + const int true_negatives = evaluator.true_negatives(); + evaluator.Observe(kNotDetected, kNotPredicted); + EXPECT_EQ(evaluator.true_negatives(), true_negatives); + + EXPECT_EQ(evaluator.true_negatives(), 0); + EXPECT_EQ(evaluator.false_positives(), 0); + EXPECT_EQ(evaluator.false_negatives(), 0); +} + +// Checks that multiple consecutive predictions match all the detections +// observed before the expected detection deadline expires. +TEST(ClippingPredictorEvaluatorTest, PredictedMultipleTimesAndAllDetected) { + ClippingPredictorEvaluator evaluator(/*history_size=*/3); + evaluator.Observe(kNotDetected, kPredicted); // --+ + evaluator.Observe(kNotDetected, kPredicted); // | --+ + evaluator.Observe(kNotDetected, kPredicted); // | | --+ + evaluator.Observe(kDetected, kNotPredicted); // <-+ <-+ <-+ + evaluator.Observe(kDetected, kNotPredicted); // <-+ <-+ + evaluator.Observe(kDetected, kNotPredicted); // <-+ + EXPECT_EQ(evaluator.true_positives(), 3); + EXPECT_EQ(evaluator.true_negatives(), 0); + EXPECT_EQ(evaluator.false_positives(), 0); + EXPECT_EQ(evaluator.false_negatives(), 0); +} + +// Checks that multiple non-consecutive predictions match all the detections +// observed before the expected detection deadline expires. +TEST(ClippingPredictorEvaluatorTest, + PredictedMultipleTimesWithGapAndAllDetected) { + ClippingPredictorEvaluator evaluator(/*history_size=*/3); + evaluator.Observe(kNotDetected, kPredicted); // --+ + evaluator.Observe(kNotDetected, kNotPredicted); // | + evaluator.Observe(kNotDetected, kPredicted); // | --+ + evaluator.Observe(kDetected, kNotPredicted); // <-+ <-+ + evaluator.Observe(kDetected, kNotPredicted); // <-+ + evaluator.Observe(kDetected, kNotPredicted); // <-+ + EXPECT_EQ(evaluator.true_positives(), 2); + EXPECT_EQ(evaluator.true_negatives(), 0); + EXPECT_EQ(evaluator.false_positives(), 0); + EXPECT_EQ(evaluator.false_negatives(), 0); +} + +class ClippingPredictorEvaluatorPredictionIntervalParameterization + : public ::testing::TestWithParam<std::tuple<int, int>> { + protected: + int num_extra_observe_calls() const { return std::get<0>(GetParam()); } + int history_size() const { return std::get<1>(GetParam()); } +}; + +// Checks that the minimum prediction interval is returned if clipping is +// correctly predicted as soon as detected - i.e., no anticipation. +TEST_P(ClippingPredictorEvaluatorPredictionIntervalParameterization, + MinimumPredictionInterval) { + ClippingPredictorEvaluator evaluator(history_size()); + for (int i = 0; i < num_extra_observe_calls(); ++i) { + EXPECT_EQ(evaluator.Observe(kNotDetected, kNotPredicted), absl::nullopt); + } + absl::optional<int> prediction_interval = + evaluator.Observe(kDetected, kPredicted); + EXPECT_THAT(prediction_interval, Optional(Eq(0))); +} + +// Checks that a prediction interval between the minimum and the maximum is +// returned if clipping is correctly predicted before it is detected but not as +// early as possible. +TEST_P(ClippingPredictorEvaluatorPredictionIntervalParameterization, + IntermediatePredictionInterval) { + ClippingPredictorEvaluator evaluator(history_size()); + for (int i = 0; i < num_extra_observe_calls(); ++i) { + EXPECT_EQ(evaluator.Observe(kNotDetected, kNotPredicted), absl::nullopt); + } + EXPECT_EQ(evaluator.Observe(kNotDetected, kPredicted), absl::nullopt); + EXPECT_EQ(evaluator.Observe(kNotDetected, kPredicted), absl::nullopt); + EXPECT_EQ(evaluator.Observe(kNotDetected, kPredicted), absl::nullopt); + absl::optional<int> prediction_interval = + evaluator.Observe(kDetected, kPredicted); + EXPECT_THAT(prediction_interval, Optional(Eq(3))); +} + +// Checks that the maximum prediction interval is returned if clipping is +// correctly predicted as early as possible. +TEST_P(ClippingPredictorEvaluatorPredictionIntervalParameterization, + MaximumPredictionInterval) { + ClippingPredictorEvaluator evaluator(history_size()); + for (int i = 0; i < num_extra_observe_calls(); ++i) { + EXPECT_EQ(evaluator.Observe(kNotDetected, kNotPredicted), absl::nullopt); + } + for (int i = 0; i < history_size(); ++i) { + EXPECT_EQ(evaluator.Observe(kNotDetected, kPredicted), absl::nullopt); + } + absl::optional<int> prediction_interval = + evaluator.Observe(kDetected, kPredicted); + EXPECT_THAT(prediction_interval, Optional(Eq(history_size()))); +} + +// Checks that `Observe()` returns the prediction interval as soon as a true +// positive is found and never again while ongoing detections are matched to a +// previously observed prediction. +TEST_P(ClippingPredictorEvaluatorPredictionIntervalParameterization, + PredictionIntervalReturnedOnce) { + ASSERT_LT(num_extra_observe_calls(), history_size()); + ClippingPredictorEvaluator evaluator(history_size()); + // Observe predictions before detection. + for (int i = 0; i < num_extra_observe_calls(); ++i) { + EXPECT_EQ(evaluator.Observe(kNotDetected, kPredicted), absl::nullopt); + } + // Observe a detection. + absl::optional<int> prediction_interval = + evaluator.Observe(kDetected, kPredicted); + EXPECT_TRUE(prediction_interval.has_value()); + // `Observe()` does not return a prediction interval anymore during ongoing + // detections observed while a detection is still expected. + for (int i = 0; i < history_size(); ++i) { + EXPECT_EQ(evaluator.Observe(kDetected, kNotPredicted), absl::nullopt); + } +} + +INSTANTIATE_TEST_SUITE_P( + ClippingPredictorEvaluatorTest, + ClippingPredictorEvaluatorPredictionIntervalParameterization, + ::testing::Combine(::testing::Values(0, 3, 5), ::testing::Values(7, 11))); + +// Checks that, when a detection is expected, the expectation is removed if and +// only if `Reset()` is called after a prediction is observed. +TEST(ClippingPredictorEvaluatorTest, NoFalsePositivesAfterReset) { + constexpr int kHistorySize = 2; + + ClippingPredictorEvaluator with_reset(kHistorySize); + with_reset.Observe(kNotDetected, kPredicted); + with_reset.Reset(); + with_reset.Observe(kNotDetected, kNotPredicted); + with_reset.Observe(kNotDetected, kNotPredicted); + EXPECT_EQ(with_reset.true_positives(), 0); + EXPECT_EQ(with_reset.true_negatives(), 2); + EXPECT_EQ(with_reset.false_positives(), 0); + EXPECT_EQ(with_reset.false_negatives(), 0); + + ClippingPredictorEvaluator no_reset(kHistorySize); + no_reset.Observe(kNotDetected, kPredicted); + no_reset.Observe(kNotDetected, kNotPredicted); + no_reset.Observe(kNotDetected, kNotPredicted); + EXPECT_EQ(no_reset.true_positives(), 0); + EXPECT_EQ(no_reset.true_negatives(), 0); + EXPECT_EQ(no_reset.false_positives(), 1); + EXPECT_EQ(no_reset.false_negatives(), 0); +} + +} // namespace +} // namespace webrtc diff --git a/modules/audio_processing/agc/clipping_predictor_level_buffer.cc b/modules/audio_processing/agc/clipping_predictor_level_buffer.cc new file mode 100644 index 0000000000..bc33cda040 --- /dev/null +++ b/modules/audio_processing/agc/clipping_predictor_level_buffer.cc @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2021 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc/clipping_predictor_level_buffer.h" + +#include <algorithm> +#include <cmath> + +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" + +namespace webrtc { + +bool ClippingPredictorLevelBuffer::Level::operator==(const Level& level) const { + constexpr float kEpsilon = 1e-6f; + return std::fabs(average - level.average) < kEpsilon && + std::fabs(max - level.max) < kEpsilon; +} + +ClippingPredictorLevelBuffer::ClippingPredictorLevelBuffer(int capacity) + : tail_(-1), size_(0), data_(std::max(1, capacity)) { + if (capacity > kMaxCapacity) { + RTC_LOG(LS_WARNING) << "[agc]: ClippingPredictorLevelBuffer exceeds the " + << "maximum allowed capacity. Capacity: " << capacity; + } + RTC_DCHECK(!data_.empty()); +} + +void ClippingPredictorLevelBuffer::Reset() { + tail_ = -1; + size_ = 0; +} + +void ClippingPredictorLevelBuffer::Push(Level level) { + ++tail_; + if (tail_ == Capacity()) { + tail_ = 0; + } + if (size_ < Capacity()) { + size_++; + } + data_[tail_] = level; +} + +// TODO(bugs.webrtc.org/12774): Optimize partial computation for long buffers. +absl::optional<ClippingPredictorLevelBuffer::Level> +ClippingPredictorLevelBuffer::ComputePartialMetrics(int delay, + int num_items) const { + RTC_DCHECK_GE(delay, 0); + RTC_DCHECK_LT(delay, Capacity()); + RTC_DCHECK_GT(num_items, 0); + RTC_DCHECK_LE(num_items, Capacity()); + RTC_DCHECK_LE(delay + num_items, Capacity()); + if (delay + num_items > Size()) { + return absl::nullopt; + } + float sum = 0.0f; + float max = 0.0f; + for (int i = 0; i < num_items && i < Size(); ++i) { + int idx = tail_ - delay - i; + if (idx < 0) { + idx += Capacity(); + } + sum += data_[idx].average; + max = std::fmax(data_[idx].max, max); + } + return absl::optional<Level>({sum / static_cast<float>(num_items), max}); +} + +} // namespace webrtc diff --git a/modules/audio_processing/agc/clipping_predictor_level_buffer.h b/modules/audio_processing/agc/clipping_predictor_level_buffer.h new file mode 100644 index 0000000000..f3e8368194 --- /dev/null +++ b/modules/audio_processing/agc/clipping_predictor_level_buffer.h @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2021 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC_CLIPPING_PREDICTOR_LEVEL_BUFFER_H_ +#define MODULES_AUDIO_PROCESSING_AGC_CLIPPING_PREDICTOR_LEVEL_BUFFER_H_ + +#include <memory> +#include <vector> + +#include "absl/types/optional.h" + +namespace webrtc { + +// A circular buffer to store frame-wise `Level` items for clipping prediction. +// The current implementation is not optimized for large buffer lengths. +class ClippingPredictorLevelBuffer { + public: + struct Level { + float average; + float max; + bool operator==(const Level& level) const; + }; + + // Recommended maximum capacity. It is possible to create a buffer with a + // larger capacity, but the implementation is not optimized for large values. + static constexpr int kMaxCapacity = 100; + + // Ctor. Sets the buffer capacity to max(1, `capacity`) and logs a warning + // message if the capacity is greater than `kMaxCapacity`. + explicit ClippingPredictorLevelBuffer(int capacity); + ~ClippingPredictorLevelBuffer() {} + ClippingPredictorLevelBuffer(const ClippingPredictorLevelBuffer&) = delete; + ClippingPredictorLevelBuffer& operator=(const ClippingPredictorLevelBuffer&) = + delete; + + void Reset(); + + // Returns the current number of items stored in the buffer. + int Size() const { return size_; } + + // Returns the capacity of the buffer. + int Capacity() const { return data_.size(); } + + // Adds a `level` item into the circular buffer `data_`. Stores at most + // `Capacity()` items. If more items are pushed, the new item replaces the + // least recently pushed item. + void Push(Level level); + + // If at least `num_items` + `delay` items have been pushed, returns the + // average and maximum value for the `num_items` most recently pushed items + // from `delay` to `delay` - `num_items` (a delay equal to zero corresponds + // to the most recently pushed item). The value of `delay` is limited to + // [0, N] and `num_items` to [1, M] where N + M is the capacity of the buffer. + absl::optional<Level> ComputePartialMetrics(int delay, int num_items) const; + + private: + int tail_; + int size_; + std::vector<Level> data_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC_CLIPPING_PREDICTOR_LEVEL_BUFFER_H_ diff --git a/modules/audio_processing/agc/clipping_predictor_level_buffer_unittest.cc b/modules/audio_processing/agc/clipping_predictor_level_buffer_unittest.cc new file mode 100644 index 0000000000..7e594a1eca --- /dev/null +++ b/modules/audio_processing/agc/clipping_predictor_level_buffer_unittest.cc @@ -0,0 +1,131 @@ +/* + * Copyright (c) 2021 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc/clipping_predictor_level_buffer.h" + +#include <algorithm> + +#include "test/gmock.h" +#include "test/gtest.h" + +namespace webrtc { +namespace { + +using ::testing::Eq; +using ::testing::Optional; + +class ClippingPredictorLevelBufferParametrization + : public ::testing::TestWithParam<int> { + protected: + int capacity() const { return GetParam(); } +}; + +TEST_P(ClippingPredictorLevelBufferParametrization, CheckEmptyBufferSize) { + ClippingPredictorLevelBuffer buffer(capacity()); + EXPECT_EQ(buffer.Capacity(), std::max(capacity(), 1)); + EXPECT_EQ(buffer.Size(), 0); +} + +TEST_P(ClippingPredictorLevelBufferParametrization, CheckHalfEmptyBufferSize) { + ClippingPredictorLevelBuffer buffer(capacity()); + for (int i = 0; i < buffer.Capacity() / 2; ++i) { + buffer.Push({2, 4}); + } + EXPECT_EQ(buffer.Capacity(), std::max(capacity(), 1)); + EXPECT_EQ(buffer.Size(), std::max(capacity(), 1) / 2); +} + +TEST_P(ClippingPredictorLevelBufferParametrization, CheckFullBufferSize) { + ClippingPredictorLevelBuffer buffer(capacity()); + for (int i = 0; i < buffer.Capacity(); ++i) { + buffer.Push({2, 4}); + } + EXPECT_EQ(buffer.Capacity(), std::max(capacity(), 1)); + EXPECT_EQ(buffer.Size(), std::max(capacity(), 1)); +} + +TEST_P(ClippingPredictorLevelBufferParametrization, CheckLargeBufferSize) { + ClippingPredictorLevelBuffer buffer(capacity()); + for (int i = 0; i < 2 * buffer.Capacity(); ++i) { + buffer.Push({2, 4}); + } + EXPECT_EQ(buffer.Capacity(), std::max(capacity(), 1)); + EXPECT_EQ(buffer.Size(), std::max(capacity(), 1)); +} + +TEST_P(ClippingPredictorLevelBufferParametrization, CheckSizeAfterReset) { + ClippingPredictorLevelBuffer buffer(capacity()); + buffer.Push({1, 1}); + buffer.Push({1, 1}); + buffer.Reset(); + EXPECT_EQ(buffer.Capacity(), std::max(capacity(), 1)); + EXPECT_EQ(buffer.Size(), 0); + buffer.Push({1, 1}); + EXPECT_EQ(buffer.Capacity(), std::max(capacity(), 1)); + EXPECT_EQ(buffer.Size(), 1); +} + +INSTANTIATE_TEST_SUITE_P(ClippingPredictorLevelBufferTest, + ClippingPredictorLevelBufferParametrization, + ::testing::Values(-1, 0, 1, 123)); + +TEST(ClippingPredictorLevelBufferTest, CheckMetricsAfterFullBuffer) { + ClippingPredictorLevelBuffer buffer(/*capacity=*/2); + buffer.Push({1, 2}); + buffer.Push({3, 6}); + EXPECT_THAT(buffer.ComputePartialMetrics(/*delay=*/0, /*num_items=*/1), + Optional(Eq(ClippingPredictorLevelBuffer::Level{3, 6}))); + EXPECT_THAT(buffer.ComputePartialMetrics(/*delay=*/1, /*num_items=*/1), + Optional(Eq(ClippingPredictorLevelBuffer::Level{1, 2}))); + EXPECT_THAT(buffer.ComputePartialMetrics(/*delay=*/0, /*num_items=*/2), + Optional(Eq(ClippingPredictorLevelBuffer::Level{2, 6}))); +} + +TEST(ClippingPredictorLevelBufferTest, CheckMetricsAfterPushBeyondCapacity) { + ClippingPredictorLevelBuffer buffer(/*capacity=*/2); + buffer.Push({1, 1}); + buffer.Push({3, 6}); + buffer.Push({5, 10}); + buffer.Push({7, 14}); + buffer.Push({6, 12}); + EXPECT_THAT(buffer.ComputePartialMetrics(/*delay=*/0, /*num_items=*/1), + Optional(Eq(ClippingPredictorLevelBuffer::Level{6, 12}))); + EXPECT_THAT(buffer.ComputePartialMetrics(/*delay=*/1, /*num_items=*/1), + Optional(Eq(ClippingPredictorLevelBuffer::Level{7, 14}))); + EXPECT_THAT(buffer.ComputePartialMetrics(/*delay=*/0, /*num_items=*/2), + Optional(Eq(ClippingPredictorLevelBuffer::Level{6.5f, 14}))); +} + +TEST(ClippingPredictorLevelBufferTest, CheckMetricsAfterTooFewItems) { + ClippingPredictorLevelBuffer buffer(/*capacity=*/4); + buffer.Push({1, 2}); + buffer.Push({3, 6}); + EXPECT_EQ(buffer.ComputePartialMetrics(/*delay=*/0, /*num_items=*/3), + absl::nullopt); + EXPECT_EQ(buffer.ComputePartialMetrics(/*delay=*/2, /*num_items=*/1), + absl::nullopt); +} + +TEST(ClippingPredictorLevelBufferTest, CheckMetricsAfterReset) { + ClippingPredictorLevelBuffer buffer(/*capacity=*/2); + buffer.Push({1, 2}); + buffer.Reset(); + buffer.Push({5, 10}); + buffer.Push({7, 14}); + EXPECT_THAT(buffer.ComputePartialMetrics(/*delay=*/0, /*num_items=*/1), + Optional(Eq(ClippingPredictorLevelBuffer::Level{7, 14}))); + EXPECT_THAT(buffer.ComputePartialMetrics(/*delay=*/0, /*num_items=*/2), + Optional(Eq(ClippingPredictorLevelBuffer::Level{6, 14}))); + EXPECT_THAT(buffer.ComputePartialMetrics(/*delay=*/1, /*num_items=*/1), + Optional(Eq(ClippingPredictorLevelBuffer::Level{5, 10}))); +} + +} // namespace +} // namespace webrtc diff --git a/modules/audio_processing/agc/clipping_predictor_unittest.cc b/modules/audio_processing/agc/clipping_predictor_unittest.cc new file mode 100644 index 0000000000..e848e1a724 --- /dev/null +++ b/modules/audio_processing/agc/clipping_predictor_unittest.cc @@ -0,0 +1,491 @@ +/* + * Copyright (c) 2021 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc/clipping_predictor.h" + +#include <cstdint> +#include <limits> +#include <tuple> + +#include "rtc_base/checks.h" +#include "test/gmock.h" +#include "test/gtest.h" + +namespace webrtc { +namespace { + +using ::testing::Eq; +using ::testing::Optional; +using ClippingPredictorConfig = AudioProcessing::Config::GainController1:: + AnalogGainController::ClippingPredictor; +using ClippingPredictorMode = AudioProcessing::Config::GainController1:: + AnalogGainController::ClippingPredictor::Mode; + +constexpr int kSampleRateHz = 32000; +constexpr int kNumChannels = 1; +constexpr int kSamplesPerChannel = kSampleRateHz / 100; +constexpr int kMaxMicLevel = 255; +constexpr int kMinMicLevel = 12; +constexpr int kDefaultClippedLevelStep = 15; +constexpr float kMaxSampleS16 = + static_cast<float>(std::numeric_limits<int16_t>::max()); + +// Threshold in dB corresponding to a signal with an amplitude equal to 99% of +// the dynamic range - i.e., computed as `20*log10(0.99)`. +constexpr float kClippingThresholdDb = -0.08729610804900176f; + +void CallAnalyze(int num_calls, + const AudioFrameView<const float>& frame, + ClippingPredictor& predictor) { + for (int i = 0; i < num_calls; ++i) { + predictor.Analyze(frame); + } +} + +// Creates and analyzes an audio frame with a non-zero (approx. 4.15dB) crest +// factor. +void AnalyzeNonZeroCrestFactorAudio(int num_calls, + int num_channels, + float peak_ratio, + ClippingPredictor& predictor) { + RTC_DCHECK_GT(num_calls, 0); + RTC_DCHECK_GT(num_channels, 0); + RTC_DCHECK_LE(peak_ratio, 1.0f); + std::vector<float*> audio(num_channels); + std::vector<float> audio_data(num_channels * kSamplesPerChannel, 0.0f); + for (int channel = 0; channel < num_channels; ++channel) { + audio[channel] = &audio_data[channel * kSamplesPerChannel]; + for (int sample = 0; sample < kSamplesPerChannel; sample += 10) { + audio[channel][sample] = 0.1f * peak_ratio * kMaxSampleS16; + audio[channel][sample + 1] = 0.2f * peak_ratio * kMaxSampleS16; + audio[channel][sample + 2] = 0.3f * peak_ratio * kMaxSampleS16; + audio[channel][sample + 3] = 0.4f * peak_ratio * kMaxSampleS16; + audio[channel][sample + 4] = 0.5f * peak_ratio * kMaxSampleS16; + audio[channel][sample + 5] = 0.6f * peak_ratio * kMaxSampleS16; + audio[channel][sample + 6] = 0.7f * peak_ratio * kMaxSampleS16; + audio[channel][sample + 7] = 0.8f * peak_ratio * kMaxSampleS16; + audio[channel][sample + 8] = 0.9f * peak_ratio * kMaxSampleS16; + audio[channel][sample + 9] = 1.0f * peak_ratio * kMaxSampleS16; + } + } + AudioFrameView<const float> frame(audio.data(), num_channels, + kSamplesPerChannel); + CallAnalyze(num_calls, frame, predictor); +} + +void CheckChannelEstimatesWithValue(int num_channels, + int level, + int default_step, + int min_mic_level, + int max_mic_level, + const ClippingPredictor& predictor, + int expected) { + for (int i = 0; i < num_channels; ++i) { + SCOPED_TRACE(i); + EXPECT_THAT(predictor.EstimateClippedLevelStep( + i, level, default_step, min_mic_level, max_mic_level), + Optional(Eq(expected))); + } +} + +void CheckChannelEstimatesWithoutValue(int num_channels, + int level, + int default_step, + int min_mic_level, + int max_mic_level, + const ClippingPredictor& predictor) { + for (int i = 0; i < num_channels; ++i) { + SCOPED_TRACE(i); + EXPECT_EQ(predictor.EstimateClippedLevelStep(i, level, default_step, + min_mic_level, max_mic_level), + absl::nullopt); + } +} + +// Creates and analyzes an audio frame with a zero crest factor. +void AnalyzeZeroCrestFactorAudio(int num_calls, + int num_channels, + float peak_ratio, + ClippingPredictor& predictor) { + RTC_DCHECK_GT(num_calls, 0); + RTC_DCHECK_GT(num_channels, 0); + RTC_DCHECK_LE(peak_ratio, 1.f); + std::vector<float*> audio(num_channels); + std::vector<float> audio_data(num_channels * kSamplesPerChannel, 0.f); + for (int channel = 0; channel < num_channels; ++channel) { + audio[channel] = &audio_data[channel * kSamplesPerChannel]; + for (int sample = 0; sample < kSamplesPerChannel; ++sample) { + audio[channel][sample] = peak_ratio * kMaxSampleS16; + } + } + auto frame = AudioFrameView<const float>(audio.data(), num_channels, + kSamplesPerChannel); + CallAnalyze(num_calls, frame, predictor); +} + +TEST(ClippingPeakPredictorTest, NoPredictorCreated) { + auto predictor = + CreateClippingPredictor(kNumChannels, /*config=*/{/*enabled=*/false}); + EXPECT_FALSE(predictor); +} + +TEST(ClippingPeakPredictorTest, ClippingEventPredictionCreated) { + // TODO(bugs.webrtc.org/12874): Use designated initializers one fixed. + auto predictor = CreateClippingPredictor( + kNumChannels, + /*config=*/{/*enabled=*/true, + /*mode=*/ClippingPredictorMode::kClippingEventPrediction}); + EXPECT_TRUE(predictor); +} + +TEST(ClippingPeakPredictorTest, AdaptiveStepClippingPeakPredictionCreated) { + // TODO(bugs.webrtc.org/12874): Use designated initializers one fixed. + auto predictor = CreateClippingPredictor( + kNumChannels, /*config=*/{ + /*enabled=*/true, + /*mode=*/ClippingPredictorMode::kAdaptiveStepClippingPeakPrediction}); + EXPECT_TRUE(predictor); +} + +TEST(ClippingPeakPredictorTest, FixedStepClippingPeakPredictionCreated) { + // TODO(bugs.webrtc.org/12874): Use designated initializers one fixed. + auto predictor = CreateClippingPredictor( + kNumChannels, /*config=*/{ + /*enabled=*/true, + /*mode=*/ClippingPredictorMode::kFixedStepClippingPeakPrediction}); + EXPECT_TRUE(predictor); +} + +class ClippingPredictorParameterization + : public ::testing::TestWithParam<std::tuple<int, int, int, int>> { + protected: + int num_channels() const { return std::get<0>(GetParam()); } + ClippingPredictorConfig GetConfig(ClippingPredictorMode mode) const { + // TODO(bugs.webrtc.org/12874): Use designated initializers one fixed. + return {/*enabled=*/true, + /*mode=*/mode, + /*window_length=*/std::get<1>(GetParam()), + /*reference_window_length=*/std::get<2>(GetParam()), + /*reference_window_delay=*/std::get<3>(GetParam()), + /*clipping_threshold=*/-1.0f, + /*crest_factor_margin=*/0.5f}; + } +}; + +TEST_P(ClippingPredictorParameterization, + CheckClippingEventPredictorEstimateAfterCrestFactorDrop) { + const ClippingPredictorConfig config = + GetConfig(ClippingPredictorMode::kClippingEventPrediction); + if (config.reference_window_length + config.reference_window_delay <= + config.window_length) { + return; + } + auto predictor = CreateClippingPredictor(num_channels(), config); + AnalyzeNonZeroCrestFactorAudio( + /*num_calls=*/config.reference_window_length + + config.reference_window_delay - config.window_length, + num_channels(), /*peak_ratio=*/0.99f, *predictor); + CheckChannelEstimatesWithoutValue(num_channels(), /*level=*/255, + kDefaultClippedLevelStep, kMinMicLevel, + kMaxMicLevel, *predictor); + AnalyzeZeroCrestFactorAudio(config.window_length, num_channels(), + /*peak_ratio=*/0.99f, *predictor); + CheckChannelEstimatesWithValue( + num_channels(), /*level=*/255, kDefaultClippedLevelStep, kMinMicLevel, + kMaxMicLevel, *predictor, kDefaultClippedLevelStep); +} + +TEST_P(ClippingPredictorParameterization, + CheckClippingEventPredictorNoEstimateAfterConstantCrestFactor) { + const ClippingPredictorConfig config = + GetConfig(ClippingPredictorMode::kClippingEventPrediction); + if (config.reference_window_length + config.reference_window_delay <= + config.window_length) { + return; + } + auto predictor = CreateClippingPredictor(num_channels(), config); + AnalyzeNonZeroCrestFactorAudio( + /*num_calls=*/config.reference_window_length + + config.reference_window_delay - config.window_length, + num_channels(), /*peak_ratio=*/0.99f, *predictor); + CheckChannelEstimatesWithoutValue(num_channels(), /*level=*/255, + kDefaultClippedLevelStep, kMinMicLevel, + kMaxMicLevel, *predictor); + AnalyzeNonZeroCrestFactorAudio(/*num_calls=*/config.window_length, + num_channels(), + /*peak_ratio=*/0.99f, *predictor); + CheckChannelEstimatesWithoutValue(num_channels(), /*level=*/255, + kDefaultClippedLevelStep, kMinMicLevel, + kMaxMicLevel, *predictor); +} + +TEST_P(ClippingPredictorParameterization, + CheckClippingPeakPredictorEstimateAfterHighCrestFactor) { + const ClippingPredictorConfig config = + GetConfig(ClippingPredictorMode::kAdaptiveStepClippingPeakPrediction); + if (config.reference_window_length + config.reference_window_delay <= + config.window_length) { + return; + } + auto predictor = CreateClippingPredictor(num_channels(), config); + AnalyzeNonZeroCrestFactorAudio( + /*num_calls=*/config.reference_window_length + + config.reference_window_delay - config.window_length, + num_channels(), /*peak_ratio=*/0.99f, *predictor); + CheckChannelEstimatesWithoutValue(num_channels(), /*level=*/255, + kDefaultClippedLevelStep, kMinMicLevel, + kMaxMicLevel, *predictor); + AnalyzeNonZeroCrestFactorAudio(/*num_calls=*/config.window_length, + num_channels(), + /*peak_ratio=*/0.99f, *predictor); + CheckChannelEstimatesWithValue( + num_channels(), /*level=*/255, kDefaultClippedLevelStep, kMinMicLevel, + kMaxMicLevel, *predictor, kDefaultClippedLevelStep); +} + +TEST_P(ClippingPredictorParameterization, + CheckClippingPeakPredictorNoEstimateAfterLowCrestFactor) { + const ClippingPredictorConfig config = + GetConfig(ClippingPredictorMode::kAdaptiveStepClippingPeakPrediction); + if (config.reference_window_length + config.reference_window_delay <= + config.window_length) { + return; + } + auto predictor = CreateClippingPredictor(num_channels(), config); + AnalyzeZeroCrestFactorAudio( + /*num_calls=*/config.reference_window_length + + config.reference_window_delay - config.window_length, + num_channels(), /*peak_ratio=*/0.99f, *predictor); + CheckChannelEstimatesWithoutValue(num_channels(), /*level=*/255, + kDefaultClippedLevelStep, kMinMicLevel, + kMaxMicLevel, *predictor); + AnalyzeNonZeroCrestFactorAudio(/*num_calls=*/config.window_length, + num_channels(), + /*peak_ratio=*/0.99f, *predictor); + CheckChannelEstimatesWithoutValue(num_channels(), /*level=*/255, + kDefaultClippedLevelStep, kMinMicLevel, + kMaxMicLevel, *predictor); +} + +INSTANTIATE_TEST_SUITE_P(GainController1ClippingPredictor, + ClippingPredictorParameterization, + ::testing::Combine(::testing::Values(1, 5), + ::testing::Values(1, 5, 10), + ::testing::Values(1, 5), + ::testing::Values(0, 1, 5))); + +class ClippingEventPredictorParameterization + : public ::testing::TestWithParam<std::tuple<float, float>> { + protected: + ClippingPredictorConfig GetConfig() const { + // TODO(bugs.webrtc.org/12874): Use designated initializers one fixed. + return {/*enabled=*/true, + /*mode=*/ClippingPredictorMode::kClippingEventPrediction, + /*window_length=*/5, + /*reference_window_length=*/5, + /*reference_window_delay=*/5, + /*clipping_threshold=*/std::get<0>(GetParam()), + /*crest_factor_margin=*/std::get<1>(GetParam())}; + } +}; + +TEST_P(ClippingEventPredictorParameterization, + CheckEstimateAfterCrestFactorDrop) { + const ClippingPredictorConfig config = GetConfig(); + auto predictor = CreateClippingPredictor(kNumChannels, config); + AnalyzeNonZeroCrestFactorAudio(/*num_calls=*/config.reference_window_length, + kNumChannels, /*peak_ratio=*/0.99f, + *predictor); + CheckChannelEstimatesWithoutValue(kNumChannels, /*level=*/255, + kDefaultClippedLevelStep, kMinMicLevel, + kMaxMicLevel, *predictor); + AnalyzeZeroCrestFactorAudio(config.window_length, kNumChannels, + /*peak_ratio=*/0.99f, *predictor); + // TODO(bugs.webrtc.org/12774): Add clarifying comment. + // TODO(bugs.webrtc.org/12774): Remove 4.15f threshold and split tests. + if (config.clipping_threshold < kClippingThresholdDb && + config.crest_factor_margin < 4.15f) { + CheckChannelEstimatesWithValue( + kNumChannels, /*level=*/255, kDefaultClippedLevelStep, kMinMicLevel, + kMaxMicLevel, *predictor, kDefaultClippedLevelStep); + } else { + CheckChannelEstimatesWithoutValue(kNumChannels, /*level=*/255, + kDefaultClippedLevelStep, kMinMicLevel, + kMaxMicLevel, *predictor); + } +} + +INSTANTIATE_TEST_SUITE_P(GainController1ClippingPredictor, + ClippingEventPredictorParameterization, + ::testing::Combine(::testing::Values(-1.0f, 0.0f), + ::testing::Values(3.0f, 4.16f))); + +class ClippingPredictorModeParameterization + : public ::testing::TestWithParam<ClippingPredictorMode> { + protected: + ClippingPredictorConfig GetConfig(float clipping_threshold_dbfs) const { + // TODO(bugs.webrtc.org/12874): Use designated initializers one fixed. + return {/*enabled=*/true, + /*mode=*/GetParam(), + /*window_length=*/5, + /*reference_window_length=*/5, + /*reference_window_delay=*/5, + /*clipping_threshold=*/clipping_threshold_dbfs, + /*crest_factor_margin=*/3.0f}; + } +}; + +TEST_P(ClippingPredictorModeParameterization, + CheckEstimateAfterHighCrestFactorWithNoClippingMargin) { + const ClippingPredictorConfig config = GetConfig( + /*clipping_threshold_dbfs=*/0.0f); + auto predictor = CreateClippingPredictor(kNumChannels, config); + AnalyzeNonZeroCrestFactorAudio(/*num_calls=*/config.reference_window_length, + kNumChannels, /*peak_ratio=*/0.99f, + *predictor); + CheckChannelEstimatesWithoutValue(kNumChannels, /*level=*/255, + kDefaultClippedLevelStep, kMinMicLevel, + kMaxMicLevel, *predictor); + AnalyzeZeroCrestFactorAudio(config.window_length, kNumChannels, + /*peak_ratio=*/0.99f, *predictor); + // Since the clipping threshold is set to 0 dBFS, `EstimateClippedLevelStep()` + // is expected to return an unavailable value. + CheckChannelEstimatesWithoutValue(kNumChannels, /*level=*/255, + kDefaultClippedLevelStep, kMinMicLevel, + kMaxMicLevel, *predictor); +} + +TEST_P(ClippingPredictorModeParameterization, + CheckEstimateAfterHighCrestFactorWithClippingMargin) { + const ClippingPredictorConfig config = + GetConfig(/*clipping_threshold_dbfs=*/-1.0f); + auto predictor = CreateClippingPredictor(kNumChannels, config); + AnalyzeNonZeroCrestFactorAudio(/*num_calls=*/config.reference_window_length, + kNumChannels, + /*peak_ratio=*/0.99f, *predictor); + CheckChannelEstimatesWithoutValue(kNumChannels, /*level=*/255, + kDefaultClippedLevelStep, kMinMicLevel, + kMaxMicLevel, *predictor); + AnalyzeZeroCrestFactorAudio(config.window_length, kNumChannels, + /*peak_ratio=*/0.99f, *predictor); + // TODO(bugs.webrtc.org/12774): Add clarifying comment. + const float expected_step = + config.mode == ClippingPredictorMode::kAdaptiveStepClippingPeakPrediction + ? 17 + : kDefaultClippedLevelStep; + CheckChannelEstimatesWithValue(kNumChannels, /*level=*/255, + kDefaultClippedLevelStep, kMinMicLevel, + kMaxMicLevel, *predictor, expected_step); +} + +INSTANTIATE_TEST_SUITE_P( + GainController1ClippingPredictor, + ClippingPredictorModeParameterization, + ::testing::Values( + ClippingPredictorMode::kAdaptiveStepClippingPeakPrediction, + ClippingPredictorMode::kFixedStepClippingPeakPrediction)); + +TEST(ClippingEventPredictorTest, CheckEstimateAfterReset) { + // TODO(bugs.webrtc.org/12874): Use designated initializers one fixed. + constexpr ClippingPredictorConfig kConfig{ + /*enabled=*/true, + /*mode=*/ClippingPredictorMode::kClippingEventPrediction, + /*window_length=*/5, + /*reference_window_length=*/5, + /*reference_window_delay=*/5, + /*clipping_threshold=*/-1.0f, + /*crest_factor_margin=*/3.0f}; + auto predictor = CreateClippingPredictor(kNumChannels, kConfig); + AnalyzeNonZeroCrestFactorAudio(/*num_calls=*/kConfig.reference_window_length, + kNumChannels, + /*peak_ratio=*/0.99f, *predictor); + CheckChannelEstimatesWithoutValue(kNumChannels, /*level=*/255, + kDefaultClippedLevelStep, kMinMicLevel, + kMaxMicLevel, *predictor); + predictor->Reset(); + AnalyzeZeroCrestFactorAudio(kConfig.window_length, kNumChannels, + /*peak_ratio=*/0.99f, *predictor); + CheckChannelEstimatesWithoutValue(kNumChannels, /*level=*/255, + kDefaultClippedLevelStep, kMinMicLevel, + kMaxMicLevel, *predictor); +} + +TEST(ClippingPeakPredictorTest, CheckNoEstimateAfterReset) { + // TODO(bugs.webrtc.org/12874): Use designated initializers one fixed. + constexpr ClippingPredictorConfig kConfig{ + /*enabled=*/true, + /*mode=*/ClippingPredictorMode::kAdaptiveStepClippingPeakPrediction, + /*window_length=*/5, + /*reference_window_length=*/5, + /*reference_window_delay=*/5, + /*clipping_threshold=*/-1.0f}; + auto predictor = CreateClippingPredictor(kNumChannels, kConfig); + AnalyzeNonZeroCrestFactorAudio(/*num_calls=*/kConfig.reference_window_length, + kNumChannels, + /*peak_ratio=*/0.99f, *predictor); + CheckChannelEstimatesWithoutValue(kNumChannels, /*level=*/255, + kDefaultClippedLevelStep, kMinMicLevel, + kMaxMicLevel, *predictor); + predictor->Reset(); + AnalyzeZeroCrestFactorAudio(kConfig.window_length, kNumChannels, + /*peak_ratio=*/0.99f, *predictor); + CheckChannelEstimatesWithoutValue(kNumChannels, /*level=*/255, + kDefaultClippedLevelStep, kMinMicLevel, + kMaxMicLevel, *predictor); +} + +TEST(ClippingPeakPredictorTest, CheckAdaptiveStepEstimate) { + // TODO(bugs.webrtc.org/12874): Use designated initializers one fixed. + constexpr ClippingPredictorConfig kConfig{ + /*enabled=*/true, + /*mode=*/ClippingPredictorMode::kAdaptiveStepClippingPeakPrediction, + /*window_length=*/5, + /*reference_window_length=*/5, + /*reference_window_delay=*/5, + /*clipping_threshold=*/-1.0f}; + auto predictor = CreateClippingPredictor(kNumChannels, kConfig); + AnalyzeNonZeroCrestFactorAudio(/*num_calls=*/kConfig.reference_window_length, + kNumChannels, /*peak_ratio=*/0.99f, + *predictor); + CheckChannelEstimatesWithoutValue(kNumChannels, /*level=*/255, + kDefaultClippedLevelStep, kMinMicLevel, + kMaxMicLevel, *predictor); + AnalyzeZeroCrestFactorAudio(kConfig.window_length, kNumChannels, + /*peak_ratio=*/0.99f, *predictor); + CheckChannelEstimatesWithValue(kNumChannels, /*level=*/255, + kDefaultClippedLevelStep, kMinMicLevel, + kMaxMicLevel, *predictor, /*expected=*/17); +} + +TEST(ClippingPeakPredictorTest, CheckFixedStepEstimate) { + // TODO(bugs.webrtc.org/12874): Use designated initializers one fixed. + constexpr ClippingPredictorConfig kConfig{ + /*enabled=*/true, + /*mode=*/ClippingPredictorMode::kFixedStepClippingPeakPrediction, + /*window_length=*/5, + /*reference_window_length=*/5, + /*reference_window_delay=*/5, + /*clipping_threshold=*/-1.0f}; + auto predictor = CreateClippingPredictor(kNumChannels, kConfig); + AnalyzeNonZeroCrestFactorAudio(/*num_calls=*/kConfig.reference_window_length, + kNumChannels, /*peak_ratio=*/0.99f, + *predictor); + CheckChannelEstimatesWithoutValue(kNumChannels, /*level=*/255, + kDefaultClippedLevelStep, kMinMicLevel, + kMaxMicLevel, *predictor); + AnalyzeZeroCrestFactorAudio(kConfig.window_length, kNumChannels, + /*peak_ratio=*/0.99f, *predictor); + CheckChannelEstimatesWithValue( + kNumChannels, /*level=*/255, kDefaultClippedLevelStep, kMinMicLevel, + kMaxMicLevel, *predictor, kDefaultClippedLevelStep); +} + +} // namespace +} // namespace webrtc diff --git a/modules/audio_processing/agc2/adaptive_agc.cc b/modules/audio_processing/agc2/adaptive_agc.cc index 8bf192e77f..3fc9008db1 100644 --- a/modules/audio_processing/agc2/adaptive_agc.cc +++ b/modules/audio_processing/agc2/adaptive_agc.cc @@ -25,10 +25,6 @@ using AdaptiveDigitalConfig = using NoiseEstimatorType = AudioProcessing::Config::GainController2::NoiseEstimator; -constexpr int kGainApplierAdjacentSpeechFramesThreshold = 1; -constexpr float kMaxGainChangePerSecondDb = 3.0f; -constexpr float kMaxOutputNoiseLevelDbfs = -50.0f; - // Detects the available CPU features and applies any kill-switches. AvailableCpuFeatures GetAllowedCpuFeatures( const AdaptiveDigitalConfig& config) { @@ -56,29 +52,8 @@ std::unique_ptr<NoiseLevelEstimator> CreateNoiseLevelEstimator( } } -constexpr NoiseEstimatorType kDefaultNoiseLevelEstimatorType = - NoiseEstimatorType::kNoiseFloor; - } // namespace -AdaptiveAgc::AdaptiveAgc(ApmDataDumper* apm_data_dumper) - : speech_level_estimator_(apm_data_dumper), - gain_controller_(apm_data_dumper, - kGainApplierAdjacentSpeechFramesThreshold, - kMaxGainChangePerSecondDb, - kMaxOutputNoiseLevelDbfs), - apm_data_dumper_(apm_data_dumper), - noise_level_estimator_( - CreateNoiseLevelEstimator(kDefaultNoiseLevelEstimatorType, - apm_data_dumper)), - saturation_protector_( - CreateSaturationProtector(kSaturationProtectorInitialHeadroomDb, - kSaturationProtectorExtraHeadroomDb, - kGainApplierAdjacentSpeechFramesThreshold, - apm_data_dumper)) { - RTC_DCHECK(apm_data_dumper); -} - AdaptiveAgc::AdaptiveAgc(ApmDataDumper* apm_data_dumper, const AdaptiveDigitalConfig& config) : speech_level_estimator_(apm_data_dumper, @@ -87,7 +62,8 @@ AdaptiveAgc::AdaptiveAgc(ApmDataDumper* apm_data_dumper, gain_controller_(apm_data_dumper, config.adjacent_speech_frames_threshold, config.max_gain_change_db_per_second, - config.max_output_noise_level_dbfs), + config.max_output_noise_level_dbfs, + config.dry_run), apm_data_dumper_(apm_data_dumper), noise_level_estimator_( CreateNoiseLevelEstimator(config.noise_estimator, apm_data_dumper)), @@ -106,6 +82,10 @@ AdaptiveAgc::AdaptiveAgc(ApmDataDumper* apm_data_dumper, AdaptiveAgc::~AdaptiveAgc() = default; +void AdaptiveAgc::Initialize(int sample_rate_hz, int num_channels) { + gain_controller_.Initialize(sample_rate_hz, num_channels); +} + void AdaptiveAgc::Process(AudioFrameView<float> frame, float limiter_envelope) { AdaptiveDigitalGainApplier::FrameInfo info; diff --git a/modules/audio_processing/agc2/adaptive_agc.h b/modules/audio_processing/agc2/adaptive_agc.h index fe814446ff..43c7787e36 100644 --- a/modules/audio_processing/agc2/adaptive_agc.h +++ b/modules/audio_processing/agc2/adaptive_agc.h @@ -25,19 +25,21 @@ namespace webrtc { class ApmDataDumper; // Adaptive digital gain controller. -// TODO(crbug.com/webrtc/7494): Unify with `AdaptiveDigitalGainApplier`. +// TODO(crbug.com/webrtc/7494): Rename to `AdaptiveDigitalGainController`. class AdaptiveAgc { public: - explicit AdaptiveAgc(ApmDataDumper* apm_data_dumper); - // TODO(crbug.com/webrtc/7494): Remove ctor above. AdaptiveAgc( ApmDataDumper* apm_data_dumper, const AudioProcessing::Config::GainController2::AdaptiveDigital& config); ~AdaptiveAgc(); + void Initialize(int sample_rate_hz, int num_channels); + + // TODO(crbug.com/webrtc/7494): Add `SetLimiterEnvelope()`. + // Analyzes `frame` and applies a digital adaptive gain to it. Takes into // account the envelope measured by the limiter. - // TODO(crbug.com/webrtc/7494): Make the class depend on the limiter. + // TODO(crbug.com/webrtc/7494): Remove `limiter_envelope`. void Process(AudioFrameView<float> frame, float limiter_envelope); // Handles a gain change applied to the input signal (e.g., analog gain). diff --git a/modules/audio_processing/agc2/adaptive_digital_gain_applier.cc b/modules/audio_processing/agc2/adaptive_digital_gain_applier.cc index 8a8a7fdc9b..e59b110efe 100644 --- a/modules/audio_processing/agc2/adaptive_digital_gain_applier.cc +++ b/modules/audio_processing/agc2/adaptive_digital_gain_applier.cc @@ -92,13 +92,28 @@ float ComputeGainChangeThisFrameDb(float target_gain_db, max_gain_increase_db); } +// Copies the (multichannel) audio samples from `src` into `dst`. +void CopyAudio(AudioFrameView<const float> src, + std::vector<std::vector<float>>& dst) { + RTC_DCHECK_GT(src.num_channels(), 0); + RTC_DCHECK_GT(src.samples_per_channel(), 0); + RTC_DCHECK_EQ(dst.size(), src.num_channels()); + for (size_t c = 0; c < src.num_channels(); ++c) { + rtc::ArrayView<const float> channel_view = src.channel(c); + RTC_DCHECK_EQ(channel_view.size(), src.samples_per_channel()); + RTC_DCHECK_EQ(dst[c].size(), src.samples_per_channel()); + std::copy(channel_view.begin(), channel_view.end(), dst[c].begin()); + } +} + } // namespace AdaptiveDigitalGainApplier::AdaptiveDigitalGainApplier( ApmDataDumper* apm_data_dumper, int adjacent_speech_frames_threshold, float max_gain_change_db_per_second, - float max_output_noise_level_dbfs) + float max_output_noise_level_dbfs, + bool dry_run) : apm_data_dumper_(apm_data_dumper), gain_applier_( /*hard_clip_samples=*/false, @@ -107,13 +122,39 @@ AdaptiveDigitalGainApplier::AdaptiveDigitalGainApplier( max_gain_change_db_per_10ms_(max_gain_change_db_per_second * kFrameDurationMs / 1000.f), max_output_noise_level_dbfs_(max_output_noise_level_dbfs), + dry_run_(dry_run), calls_since_last_gain_log_(0), frames_to_gain_increase_allowed_(adjacent_speech_frames_threshold_), last_gain_db_(kInitialAdaptiveDigitalGainDb) { - RTC_DCHECK_GT(max_gain_change_db_per_second, 0.f); + RTC_DCHECK_GT(max_gain_change_db_per_second, 0.0f); RTC_DCHECK_GE(frames_to_gain_increase_allowed_, 1); - RTC_DCHECK_GE(max_output_noise_level_dbfs_, -90.f); - RTC_DCHECK_LE(max_output_noise_level_dbfs_, 0.f); + RTC_DCHECK_GE(max_output_noise_level_dbfs_, -90.0f); + RTC_DCHECK_LE(max_output_noise_level_dbfs_, 0.0f); + Initialize(/*sample_rate_hz=*/48000, /*num_channels=*/1); +} + +void AdaptiveDigitalGainApplier::Initialize(int sample_rate_hz, + int num_channels) { + if (!dry_run_) { + return; + } + RTC_DCHECK_GT(sample_rate_hz, 0); + RTC_DCHECK_GT(num_channels, 0); + int frame_size = rtc::CheckedDivExact(sample_rate_hz, 100); + bool sample_rate_changed = + dry_run_frame_.empty() || // Handle initialization. + dry_run_frame_[0].size() != static_cast<size_t>(frame_size); + bool num_channels_changed = + dry_run_channels_.size() != static_cast<size_t>(num_channels); + if (sample_rate_changed || num_channels_changed) { + // Resize the multichannel audio vector and update the channel pointers. + dry_run_frame_.resize(num_channels); + dry_run_channels_.resize(num_channels); + for (int c = 0; c < num_channels; ++c) { + dry_run_frame_[c].resize(frame_size); + dry_run_channels_[c] = dry_run_frame_[c].data(); + } + } } void AdaptiveDigitalGainApplier::Process(const FrameInfo& info, @@ -174,7 +215,19 @@ void AdaptiveDigitalGainApplier::Process(const FrameInfo& info, gain_applier_.SetGainFactor( DbToRatio(last_gain_db_ + gain_change_this_frame_db)); } - gain_applier_.ApplyGain(frame); + + // Modify `frame` only if not running in "dry run" mode. + if (!dry_run_) { + gain_applier_.ApplyGain(frame); + } else { + // Copy `frame` so that `ApplyGain()` is called (on a copy). + CopyAudio(frame, dry_run_frame_); + RTC_DCHECK(!dry_run_channels_.empty()); + AudioFrameView<float> frame_copy(&dry_run_channels_[0], + frame.num_channels(), + frame.samples_per_channel()); + gain_applier_.ApplyGain(frame_copy); + } // Remember that the gain has changed for the next iteration. last_gain_db_ = last_gain_db_ + gain_change_this_frame_db; diff --git a/modules/audio_processing/agc2/adaptive_digital_gain_applier.h b/modules/audio_processing/agc2/adaptive_digital_gain_applier.h index 74220fa861..8b58ea00b2 100644 --- a/modules/audio_processing/agc2/adaptive_digital_gain_applier.h +++ b/modules/audio_processing/agc2/adaptive_digital_gain_applier.h @@ -11,6 +11,8 @@ #ifndef MODULES_AUDIO_PROCESSING_AGC2_ADAPTIVE_DIGITAL_GAIN_APPLIER_H_ #define MODULES_AUDIO_PROCESSING_AGC2_ADAPTIVE_DIGITAL_GAIN_APPLIER_H_ +#include <vector> + #include "modules/audio_processing/agc2/gain_applier.h" #include "modules/audio_processing/include/audio_frame_view.h" @@ -37,15 +39,18 @@ class AdaptiveDigitalGainApplier { // frames must be observed in order to consider the sequence as speech. // `max_gain_change_db_per_second` limits the adaptation speed (uniformly // operated across frames). `max_output_noise_level_dbfs` limits the output - // noise level. + // noise level. If `dry_run` is true, `Process()` will not modify the audio. AdaptiveDigitalGainApplier(ApmDataDumper* apm_data_dumper, int adjacent_speech_frames_threshold, float max_gain_change_db_per_second, - float max_output_noise_level_dbfs); + float max_output_noise_level_dbfs, + bool dry_run); AdaptiveDigitalGainApplier(const AdaptiveDigitalGainApplier&) = delete; AdaptiveDigitalGainApplier& operator=(const AdaptiveDigitalGainApplier&) = delete; + void Initialize(int sample_rate_hz, int num_channels); + // Analyzes `info`, updates the digital gain and applies it to a 10 ms // `frame`. Supports any sample rate supported by APM. void Process(const FrameInfo& info, AudioFrameView<float> frame); @@ -57,10 +62,14 @@ class AdaptiveDigitalGainApplier { const int adjacent_speech_frames_threshold_; const float max_gain_change_db_per_10ms_; const float max_output_noise_level_dbfs_; + const bool dry_run_; int calls_since_last_gain_log_; int frames_to_gain_increase_allowed_; float last_gain_db_; + + std::vector<std::vector<float>> dry_run_frame_; + std::vector<float*> dry_run_channels_; }; } // namespace webrtc diff --git a/modules/audio_processing/agc2/adaptive_digital_gain_applier_unittest.cc b/modules/audio_processing/agc2/adaptive_digital_gain_applier_unittest.cc index ee9cb02ed6..f4a23a92b9 100644 --- a/modules/audio_processing/agc2/adaptive_digital_gain_applier_unittest.cc +++ b/modules/audio_processing/agc2/adaptive_digital_gain_applier_unittest.cc @@ -48,7 +48,8 @@ struct GainApplierHelper { &apm_data_dumper, adjacent_speech_frames_threshold, kMaxGainChangePerSecondDb, - kMaxOutputNoiseLevelDbfs)) {} + kMaxOutputNoiseLevelDbfs, + /*dry_run=*/false)) {} ApmDataDumper apm_data_dumper; std::unique_ptr<AdaptiveDigitalGainApplier> gain_applier; }; @@ -67,6 +68,7 @@ constexpr AdaptiveDigitalGainApplier::FrameInfo kFrameInfo{ TEST(GainController2AdaptiveGainApplier, GainApplierShouldNotCrash) { GainApplierHelper helper; + helper.gain_applier->Initialize(/*sample_rate_hz=*/48000, kStereo); // Make one call with reasonable audio level values and settings. VectorFloatFrame fake_audio(kStereo, kFrameLen10ms48kHz, 10000.0f); AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo; @@ -80,6 +82,7 @@ TEST(GainController2AdaptiveGainApplier, MaxGainApplied) { static_cast<int>(kMaxGainDb / kMaxGainChangePerFrameDb) + 10; GainApplierHelper helper; + helper.gain_applier->Initialize(/*sample_rate_hz=*/8000, kMono); AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo; info.speech_level_dbfs = -60.0f; float applied_gain; @@ -94,6 +97,7 @@ TEST(GainController2AdaptiveGainApplier, MaxGainApplied) { TEST(GainController2AdaptiveGainApplier, GainDoesNotChangeFast) { GainApplierHelper helper; + helper.gain_applier->Initialize(/*sample_rate_hz=*/8000, kMono); constexpr float initial_level_dbfs = -25.0f; // A few extra frames for safety. @@ -131,6 +135,7 @@ TEST(GainController2AdaptiveGainApplier, GainDoesNotChangeFast) { TEST(GainController2AdaptiveGainApplier, GainIsRampedInAFrame) { GainApplierHelper helper; + helper.gain_applier->Initialize(/*sample_rate_hz=*/48000, kMono); constexpr float initial_level_dbfs = -25.0f; @@ -155,6 +160,7 @@ TEST(GainController2AdaptiveGainApplier, GainIsRampedInAFrame) { TEST(GainController2AdaptiveGainApplier, NoiseLimitsGain) { GainApplierHelper helper; + helper.gain_applier->Initialize(/*sample_rate_hz=*/48000, kMono); constexpr float initial_level_dbfs = -25.0f; constexpr int num_initial_frames = @@ -184,6 +190,7 @@ TEST(GainController2AdaptiveGainApplier, NoiseLimitsGain) { TEST(GainController2GainApplier, CanHandlePositiveSpeechLevels) { GainApplierHelper helper; + helper.gain_applier->Initialize(/*sample_rate_hz=*/48000, kStereo); // Make one call with positive audio level values and settings. VectorFloatFrame fake_audio(kStereo, kFrameLen10ms48kHz, 10000.0f); @@ -194,6 +201,7 @@ TEST(GainController2GainApplier, CanHandlePositiveSpeechLevels) { TEST(GainController2GainApplier, AudioLevelLimitsGain) { GainApplierHelper helper; + helper.gain_applier->Initialize(/*sample_rate_hz=*/48000, kMono); constexpr float initial_level_dbfs = -25.0f; constexpr int num_initial_frames = @@ -231,6 +239,7 @@ TEST_P(AdaptiveDigitalGainApplierTest, DoNotIncreaseGainWithTooFewSpeechFrames) { const int adjacent_speech_frames_threshold = AdjacentSpeechFramesThreshold(); GainApplierHelper helper(adjacent_speech_frames_threshold); + helper.gain_applier->Initialize(/*sample_rate_hz=*/48000, kMono); float prev_gain = 0.0f; for (int i = 0; i < adjacent_speech_frames_threshold; ++i) { @@ -248,6 +257,7 @@ TEST_P(AdaptiveDigitalGainApplierTest, TEST_P(AdaptiveDigitalGainApplierTest, IncreaseGainWithEnoughSpeechFrames) { const int adjacent_speech_frames_threshold = AdjacentSpeechFramesThreshold(); GainApplierHelper helper(adjacent_speech_frames_threshold); + helper.gain_applier->Initialize(/*sample_rate_hz=*/48000, kMono); float prev_gain = 0.0f; for (int i = 0; i < adjacent_speech_frames_threshold; ++i) { @@ -269,5 +279,68 @@ INSTANTIATE_TEST_SUITE_P(GainController2, AdaptiveDigitalGainApplierTest, ::testing::Values(1, 7, 31)); +// Checks that the input is never modified when running in dry run mode. +TEST(GainController2GainApplier, DryRunDoesNotChangeInput) { + ApmDataDumper apm_data_dumper(0); + AdaptiveDigitalGainApplier gain_applier( + &apm_data_dumper, /*adjacent_speech_frames_threshold=*/1, + kMaxGainChangePerSecondDb, kMaxOutputNoiseLevelDbfs, /*dry_run=*/true); + // Simulate an input signal with log speech level. + AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo; + info.speech_level_dbfs = -60.0f; + // Allow enough time to reach the maximum gain. + constexpr int kNumFramesToAdapt = + static_cast<int>(kMaxGainDb / kMaxGainChangePerFrameDb) + 10; + constexpr float kPcmSamples = 123.456f; + // Run the gain applier and check that the PCM samples are not modified. + gain_applier.Initialize(/*sample_rate_hz=*/8000, kMono); + for (int i = 0; i < kNumFramesToAdapt; ++i) { + SCOPED_TRACE(i); + VectorFloatFrame fake_audio(kMono, kFrameLen10ms8kHz, kPcmSamples); + gain_applier.Process(info, fake_audio.float_frame_view()); + EXPECT_FLOAT_EQ(fake_audio.float_frame_view().channel(0)[0], kPcmSamples); + } +} + +// Checks that no sample is modified before and after the sample rate changes. +TEST(GainController2GainApplier, DryRunHandlesSampleRateChange) { + ApmDataDumper apm_data_dumper(0); + AdaptiveDigitalGainApplier gain_applier( + &apm_data_dumper, /*adjacent_speech_frames_threshold=*/1, + kMaxGainChangePerSecondDb, kMaxOutputNoiseLevelDbfs, /*dry_run=*/true); + AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo; + info.speech_level_dbfs = -60.0f; + constexpr float kPcmSamples = 123.456f; + VectorFloatFrame fake_audio_8k(kMono, kFrameLen10ms8kHz, kPcmSamples); + gain_applier.Initialize(/*sample_rate_hz=*/8000, kMono); + gain_applier.Process(info, fake_audio_8k.float_frame_view()); + EXPECT_FLOAT_EQ(fake_audio_8k.float_frame_view().channel(0)[0], kPcmSamples); + gain_applier.Initialize(/*sample_rate_hz=*/48000, kMono); + VectorFloatFrame fake_audio_48k(kMono, kFrameLen10ms48kHz, kPcmSamples); + gain_applier.Process(info, fake_audio_48k.float_frame_view()); + EXPECT_FLOAT_EQ(fake_audio_48k.float_frame_view().channel(0)[0], kPcmSamples); +} + +// Checks that no sample is modified before and after the number of channels +// changes. +TEST(GainController2GainApplier, DryRunHandlesNumChannelsChange) { + ApmDataDumper apm_data_dumper(0); + AdaptiveDigitalGainApplier gain_applier( + &apm_data_dumper, /*adjacent_speech_frames_threshold=*/1, + kMaxGainChangePerSecondDb, kMaxOutputNoiseLevelDbfs, /*dry_run=*/true); + AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo; + info.speech_level_dbfs = -60.0f; + constexpr float kPcmSamples = 123.456f; + VectorFloatFrame fake_audio_8k(kMono, kFrameLen10ms8kHz, kPcmSamples); + gain_applier.Initialize(/*sample_rate_hz=*/8000, kMono); + gain_applier.Process(info, fake_audio_8k.float_frame_view()); + EXPECT_FLOAT_EQ(fake_audio_8k.float_frame_view().channel(0)[0], kPcmSamples); + VectorFloatFrame fake_audio_48k(kStereo, kFrameLen10ms8kHz, kPcmSamples); + gain_applier.Initialize(/*sample_rate_hz=*/8000, kStereo); + gain_applier.Process(info, fake_audio_48k.float_frame_view()); + EXPECT_FLOAT_EQ(fake_audio_48k.float_frame_view().channel(0)[0], kPcmSamples); + EXPECT_FLOAT_EQ(fake_audio_48k.float_frame_view().channel(1)[0], kPcmSamples); +} + } // namespace } // namespace webrtc diff --git a/modules/audio_processing/agc2/agc2_common.h b/modules/audio_processing/agc2/agc2_common.h index 0f806d3938..adb1614926 100644 --- a/modules/audio_processing/agc2/agc2_common.h +++ b/modules/audio_processing/agc2/agc2_common.h @@ -44,8 +44,6 @@ constexpr float kLevelEstimatorLeakFactor = 1.0f - 1.0f / kLevelEstimatorTimeToConfidenceMs; // Robust VAD probability and speech decisions. -constexpr int kDefaultVadRnnResetPeriodMs = 1500; -static_assert(kDefaultVadRnnResetPeriodMs % kFrameDurationMs == 0, ""); constexpr int kDefaultLevelEstimatorAdjacentSpeechFramesThreshold = 12; // Saturation Protector settings. diff --git a/modules/audio_processing/agc2/vad_with_level.cc b/modules/audio_processing/agc2/vad_with_level.cc index 034f2b6ac0..9747ca2370 100644 --- a/modules/audio_processing/agc2/vad_with_level.cc +++ b/modules/audio_processing/agc2/vad_with_level.cc @@ -67,10 +67,6 @@ class Vad : public VoiceActivityDetector { } // namespace -VadLevelAnalyzer::VadLevelAnalyzer() - : VadLevelAnalyzer(kDefaultVadRnnResetPeriodMs, GetAvailableCpuFeatures()) { -} - VadLevelAnalyzer::VadLevelAnalyzer(int vad_reset_period_ms, const AvailableCpuFeatures& cpu_features) : VadLevelAnalyzer(vad_reset_period_ms, diff --git a/modules/audio_processing/agc2/vad_with_level.h b/modules/audio_processing/agc2/vad_with_level.h index 7cd93d6f2b..8d2ae45762 100644 --- a/modules/audio_processing/agc2/vad_with_level.h +++ b/modules/audio_processing/agc2/vad_with_level.h @@ -37,8 +37,6 @@ class VadLevelAnalyzer { virtual float ComputeProbability(AudioFrameView<const float> frame) = 0; }; - // Ctor. Uses the default VAD with the default settings. - VadLevelAnalyzer(); // Ctor. `vad_reset_period_ms` indicates the period in milliseconds to call // `VadLevelAnalyzer::Reset()`; it must be equal to or greater than the // duration of two frames. Uses `cpu_features` to instantiate the default VAD. diff --git a/modules/audio_processing/agc2/vad_with_level_unittest.cc b/modules/audio_processing/agc2/vad_with_level_unittest.cc index 99b0136376..ec8e476965 100644 --- a/modules/audio_processing/agc2/vad_with_level_unittest.cc +++ b/modules/audio_processing/agc2/vad_with_level_unittest.cc @@ -71,16 +71,16 @@ struct FrameWithView { const AudioFrameView<const float> view; }; -TEST(GainController2VadLevelAnalyzer, PeakLevelGreaterThanRmsLevel) { +TEST(GainController2VadLevelAnalyzer, RmsLessThanPeakLevel) { + auto analyzer = CreateVadLevelAnalyzerWithMockVad( + /*vad_reset_period_ms=*/1500, + /*speech_probabilities=*/{1.0f}, + /*expected_vad_reset_calls=*/0); // Handcrafted frame so that the average is lower than the peak value. FrameWithView frame(1000.0f); // Constant frame. frame.samples[10] = 2000.0f; // Except for one peak value. - - // Compute audio frame levels (the VAD result is ignored). - VadLevelAnalyzer analyzer; - auto levels_and_vad_prob = analyzer.AnalyzeFrame(frame.view); - - // Compare peak and RMS levels. + // Compute audio frame levels. + auto levels_and_vad_prob = analyzer->AnalyzeFrame(frame.view); EXPECT_LT(levels_and_vad_prob.rms_dbfs, levels_and_vad_prob.peak_dbfs); } diff --git a/modules/audio_processing/audio_processing_impl.cc b/modules/audio_processing/audio_processing_impl.cc index 12646fd243..4a1985545f 100644 --- a/modules/audio_processing/audio_processing_impl.cc +++ b/modules/audio_processing/audio_processing_impl.cc @@ -271,7 +271,8 @@ AudioProcessingImpl::AudioProcessingImpl( !field_trial::IsEnabled( "WebRTC-ApmExperimentalMultiChannelCaptureKillSwitch"), EnforceSplitBandHpf(), - MinimizeProcessingForUnusedOutput()), + MinimizeProcessingForUnusedOutput(), + field_trial::IsEnabled("WebRTC-TransientSuppressorForcedOff")), capture_(), capture_nonlocked_() { RTC_LOG(LS_INFO) << "Injected APM submodules:" @@ -290,8 +291,7 @@ AudioProcessingImpl::AudioProcessingImpl( // If no echo detector is injected, use the ResidualEchoDetector. if (!submodules_.echo_detector) { - submodules_.echo_detector = - new rtc::RefCountedObject<ResidualEchoDetector>(); + submodules_.echo_detector = rtc::make_ref_counted<ResidualEchoDetector>(); } #if !(defined(WEBRTC_ANDROID) || defined(WEBRTC_IOS)) @@ -1733,7 +1733,8 @@ bool AudioProcessingImpl::UpdateActiveSubmoduleStates() { } void AudioProcessingImpl::InitializeTransientSuppressor() { - if (config_.transient_suppression.enabled) { + if (config_.transient_suppression.enabled && + !constants_.transient_suppressor_forced_off) { // Attempt to create a transient suppressor, if one is not already created. if (!submodules_.transient_suppressor) { submodules_.transient_suppressor = @@ -1917,7 +1918,11 @@ void AudioProcessingImpl::InitializeGainController1() { config_.gain_controller1.analog_gain_controller.clipped_level_min, !config_.gain_controller1.analog_gain_controller .enable_digital_adaptive, - capture_nonlocked_.split_rate)); + capture_nonlocked_.split_rate, + config_.gain_controller1.analog_gain_controller.clipped_level_step, + config_.gain_controller1.analog_gain_controller.clipped_ratio_threshold, + config_.gain_controller1.analog_gain_controller.clipped_wait_frames, + config_.gain_controller1.analog_gain_controller.clipping_predictor)); if (re_creation) { submodules_.agc_manager->set_stream_analog_level(stream_analog_level); } @@ -1937,7 +1942,8 @@ void AudioProcessingImpl::InitializeGainController2() { submodules_.gain_controller2.reset(new GainController2()); } - submodules_.gain_controller2->Initialize(proc_fullband_sample_rate_hz()); + submodules_.gain_controller2->Initialize(proc_fullband_sample_rate_hz(), + num_input_channels()); submodules_.gain_controller2->ApplyConfig(config_.gain_controller2); } else { submodules_.gain_controller2.reset(); diff --git a/modules/audio_processing/audio_processing_impl.h b/modules/audio_processing/audio_processing_impl.h index e08abd5797..c88cfcde92 100644 --- a/modules/audio_processing/audio_processing_impl.h +++ b/modules/audio_processing/audio_processing_impl.h @@ -422,16 +422,19 @@ class AudioProcessingImpl : public AudioProcessing { ApmConstants(bool multi_channel_render_support, bool multi_channel_capture_support, bool enforce_split_band_hpf, - bool minimize_processing_for_unused_output) + bool minimize_processing_for_unused_output, + bool transient_suppressor_forced_off) : multi_channel_render_support(multi_channel_render_support), multi_channel_capture_support(multi_channel_capture_support), enforce_split_band_hpf(enforce_split_band_hpf), minimize_processing_for_unused_output( - minimize_processing_for_unused_output) {} + minimize_processing_for_unused_output), + transient_suppressor_forced_off(transient_suppressor_forced_off) {} bool multi_channel_render_support; bool multi_channel_capture_support; bool enforce_split_band_hpf; bool minimize_processing_for_unused_output; + bool transient_suppressor_forced_off; } constants_; struct ApmCaptureState { diff --git a/modules/audio_processing/audio_processing_impl_locking_unittest.cc b/modules/audio_processing/audio_processing_impl_locking_unittest.cc index ec165aa146..66c1251d4c 100644 --- a/modules/audio_processing/audio_processing_impl_locking_unittest.cc +++ b/modules/audio_processing/audio_processing_impl_locking_unittest.cc @@ -387,33 +387,6 @@ class AudioProcessingImplLockTest void SetUp() override; void TearDown() override; - // Thread callback for the render thread - static void RenderProcessorThreadFunc(void* context) { - AudioProcessingImplLockTest* impl = - reinterpret_cast<AudioProcessingImplLockTest*>(context); - while (!impl->MaybeEndTest()) { - impl->render_thread_state_.Process(); - } - } - - // Thread callback for the capture thread - static void CaptureProcessorThreadFunc(void* context) { - AudioProcessingImplLockTest* impl = - reinterpret_cast<AudioProcessingImplLockTest*>(context); - while (!impl->MaybeEndTest()) { - impl->capture_thread_state_.Process(); - } - } - - // Thread callback for the stats thread - static void StatsProcessorThreadFunc(void* context) { - AudioProcessingImplLockTest* impl = - reinterpret_cast<AudioProcessingImplLockTest*>(context); - while (!impl->MaybeEndTest()) { - impl->stats_thread_state_.Process(); - } - } - // Tests whether all the required render and capture side calls have been // done. bool TestDone() { @@ -423,9 +396,28 @@ class AudioProcessingImplLockTest // Start the threads used in the test. void StartThreads() { - render_thread_.Start(); - capture_thread_.Start(); - stats_thread_.Start(); + const auto attributes = + rtc::ThreadAttributes().SetPriority(rtc::ThreadPriority::kRealtime); + render_thread_ = rtc::PlatformThread::SpawnJoinable( + [this] { + while (!MaybeEndTest()) + render_thread_state_.Process(); + }, + "render", attributes); + capture_thread_ = rtc::PlatformThread::SpawnJoinable( + [this] { + while (!MaybeEndTest()) { + capture_thread_state_.Process(); + } + }, + "capture", attributes); + + stats_thread_ = rtc::PlatformThread::SpawnJoinable( + [this] { + while (!MaybeEndTest()) + stats_thread_state_.Process(); + }, + "stats", attributes); } // Event handlers for the test. @@ -434,9 +426,6 @@ class AudioProcessingImplLockTest rtc::Event capture_call_event_; // Thread related variables. - rtc::PlatformThread render_thread_; - rtc::PlatformThread capture_thread_; - rtc::PlatformThread stats_thread_; mutable RandomGenerator rand_gen_; std::unique_ptr<AudioProcessing> apm_; @@ -445,6 +434,9 @@ class AudioProcessingImplLockTest RenderProcessor render_thread_state_; CaptureProcessor capture_thread_state_; StatsProcessor stats_thread_state_; + rtc::PlatformThread render_thread_; + rtc::PlatformThread capture_thread_; + rtc::PlatformThread stats_thread_; }; // Sleeps a random time between 0 and max_sleep milliseconds. @@ -485,19 +477,7 @@ void PopulateAudioFrame(float amplitude, } AudioProcessingImplLockTest::AudioProcessingImplLockTest() - : render_thread_(RenderProcessorThreadFunc, - this, - "render", - rtc::kRealtimePriority), - capture_thread_(CaptureProcessorThreadFunc, - this, - "capture", - rtc::kRealtimePriority), - stats_thread_(StatsProcessorThreadFunc, - this, - "stats", - rtc::kNormalPriority), - apm_(AudioProcessingBuilderForTesting().Create()), + : apm_(AudioProcessingBuilderForTesting().Create()), render_thread_state_(kMaxFrameSize, &rand_gen_, &render_call_event_, @@ -549,9 +529,6 @@ void AudioProcessingImplLockTest::SetUp() { void AudioProcessingImplLockTest::TearDown() { render_call_event_.Set(); capture_call_event_.Set(); - render_thread_.Stop(); - capture_thread_.Stop(); - stats_thread_.Stop(); } StatsProcessor::StatsProcessor(RandomGenerator* rand_gen, diff --git a/modules/audio_processing/audio_processing_impl_unittest.cc b/modules/audio_processing/audio_processing_impl_unittest.cc index ef1830357a..ca8b8b4c25 100644 --- a/modules/audio_processing/audio_processing_impl_unittest.cc +++ b/modules/audio_processing/audio_processing_impl_unittest.cc @@ -544,8 +544,7 @@ TEST(AudioProcessingImplTest, EchoControllerObservesPlayoutVolumeChange) { TEST(AudioProcessingImplTest, RenderPreProcessorBeforeEchoDetector) { // Make sure that signal changes caused by a render pre-processing sub-module // take place before any echo detector analysis. - rtc::scoped_refptr<TestEchoDetector> test_echo_detector( - new rtc::RefCountedObject<TestEchoDetector>()); + auto test_echo_detector = rtc::make_ref_counted<TestEchoDetector>(); std::unique_ptr<CustomProcessing> test_render_pre_processor( new TestRenderPreProcessor()); // Create APM injecting the test echo detector and render pre-processor. @@ -605,8 +604,7 @@ TEST(AudioProcessingImplTest, RenderPreProcessorBeforeEchoDetector) { // config should be bit-exact with running APM with said submodules disabled. // This mainly tests that SetCreateOptionalSubmodulesForTesting has an effect. TEST(ApmWithSubmodulesExcludedTest, BitexactWithDisabledModules) { - rtc::scoped_refptr<AudioProcessingImpl> apm = - new rtc::RefCountedObject<AudioProcessingImpl>(webrtc::Config()); + auto apm = rtc::make_ref_counted<AudioProcessingImpl>(webrtc::Config()); ASSERT_EQ(apm->Initialize(), AudioProcessing::kNoError); ApmSubmoduleCreationOverrides overrides; @@ -654,8 +652,7 @@ TEST(ApmWithSubmodulesExcludedTest, BitexactWithDisabledModules) { // Disable transient suppressor creation and run APM in ways that should trigger // calls to the transient suppressor API. TEST(ApmWithSubmodulesExcludedTest, ReinitializeTransientSuppressor) { - rtc::scoped_refptr<AudioProcessingImpl> apm = - new rtc::RefCountedObject<AudioProcessingImpl>(webrtc::Config()); + auto apm = rtc::make_ref_counted<AudioProcessingImpl>(webrtc::Config()); ASSERT_EQ(apm->Initialize(), kNoErr); ApmSubmoduleCreationOverrides overrides; @@ -716,8 +713,7 @@ TEST(ApmWithSubmodulesExcludedTest, ReinitializeTransientSuppressor) { // Disable transient suppressor creation and run APM in ways that should trigger // calls to the transient suppressor API. TEST(ApmWithSubmodulesExcludedTest, ToggleTransientSuppressor) { - rtc::scoped_refptr<AudioProcessingImpl> apm = - new rtc::RefCountedObject<AudioProcessingImpl>(webrtc::Config()); + auto apm = rtc::make_ref_counted<AudioProcessingImpl>(webrtc::Config()); ASSERT_EQ(apm->Initialize(), AudioProcessing::kNoError); ApmSubmoduleCreationOverrides overrides; diff --git a/modules/audio_processing/audio_processing_performance_unittest.cc b/modules/audio_processing/audio_processing_performance_unittest.cc index 86ff0e8bfe..9585850296 100644 --- a/modules/audio_processing/audio_processing_performance_unittest.cc +++ b/modules/audio_processing/audio_processing_performance_unittest.cc @@ -391,15 +391,7 @@ class TimedThreadApiProcessor { class CallSimulator : public ::testing::TestWithParam<SimulationConfig> { public: CallSimulator() - : render_thread_(new rtc::PlatformThread(RenderProcessorThreadFunc, - this, - "render", - rtc::kRealtimePriority)), - capture_thread_(new rtc::PlatformThread(CaptureProcessorThreadFunc, - this, - "capture", - rtc::kRealtimePriority)), - rand_gen_(42U), + : rand_gen_(42U), simulation_config_(static_cast<SimulationConfig>(GetParam())) {} // Run the call simulation with a timeout. @@ -434,13 +426,10 @@ class CallSimulator : public ::testing::TestWithParam<SimulationConfig> { static const int kMinNumFramesToProcess = 150; static const int32_t kTestTimeout = 3 * 10 * kMinNumFramesToProcess; - // ::testing::TestWithParam<> implementation. - void TearDown() override { StopThreads(); } - // Stop all running threads. void StopThreads() { - render_thread_->Stop(); - capture_thread_->Stop(); + render_thread_.Finalize(); + capture_thread_.Finalize(); } // Simulator and APM setup. @@ -531,32 +520,28 @@ class CallSimulator : public ::testing::TestWithParam<SimulationConfig> { kMinNumFramesToProcess, kCaptureInputFloatLevel, num_capture_channels)); } - // Thread callback for the render thread. - static void RenderProcessorThreadFunc(void* context) { - CallSimulator* call_simulator = reinterpret_cast<CallSimulator*>(context); - while (call_simulator->render_thread_state_->Process()) { - } - } - - // Thread callback for the capture thread. - static void CaptureProcessorThreadFunc(void* context) { - CallSimulator* call_simulator = reinterpret_cast<CallSimulator*>(context); - while (call_simulator->capture_thread_state_->Process()) { - } - } - // Start the threads used in the test. void StartThreads() { - ASSERT_NO_FATAL_FAILURE(render_thread_->Start()); - ASSERT_NO_FATAL_FAILURE(capture_thread_->Start()); + const auto attributes = + rtc::ThreadAttributes().SetPriority(rtc::ThreadPriority::kRealtime); + render_thread_ = rtc::PlatformThread::SpawnJoinable( + [this] { + while (render_thread_state_->Process()) { + } + }, + "render", attributes); + capture_thread_ = rtc::PlatformThread::SpawnJoinable( + [this] { + while (capture_thread_state_->Process()) { + } + }, + "capture", attributes); } // Event handler for the test. rtc::Event test_complete_; // Thread related variables. - std::unique_ptr<rtc::PlatformThread> render_thread_; - std::unique_ptr<rtc::PlatformThread> capture_thread_; Random rand_gen_; std::unique_ptr<AudioProcessing> apm_; @@ -565,6 +550,8 @@ class CallSimulator : public ::testing::TestWithParam<SimulationConfig> { LockedFlag capture_call_checker_; std::unique_ptr<TimedThreadApiProcessor> render_thread_state_; std::unique_ptr<TimedThreadApiProcessor> capture_thread_state_; + rtc::PlatformThread render_thread_; + rtc::PlatformThread capture_thread_; }; // Implements the callback functionality for the threads. diff --git a/modules/audio_processing/audio_processing_unittest.cc b/modules/audio_processing/audio_processing_unittest.cc index 3d562dffcd..4d30a348f6 100644 --- a/modules/audio_processing/audio_processing_unittest.cc +++ b/modules/audio_processing/audio_processing_unittest.cc @@ -3039,50 +3039,50 @@ TEST(AudioProcessing, GainController1ConfigNotEqual) { Toggle(a.enabled); EXPECT_NE(a, b); - a.enabled = b.enabled; + a = b; a.mode = AudioProcessing::Config::GainController1::Mode::kAdaptiveDigital; EXPECT_NE(a, b); - a.mode = b.mode; + a = b; a.target_level_dbfs++; EXPECT_NE(a, b); - a.target_level_dbfs = b.target_level_dbfs; + a = b; a.compression_gain_db++; EXPECT_NE(a, b); - a.compression_gain_db = b.compression_gain_db; + a = b; Toggle(a.enable_limiter); EXPECT_NE(a, b); - a.enable_limiter = b.enable_limiter; + a = b; a.analog_level_minimum++; EXPECT_NE(a, b); - a.analog_level_minimum = b.analog_level_minimum; + a = b; a.analog_level_maximum--; EXPECT_NE(a, b); - a.analog_level_maximum = b.analog_level_maximum; + a = b; auto& a_analog = a.analog_gain_controller; const auto& b_analog = b.analog_gain_controller; Toggle(a_analog.enabled); EXPECT_NE(a, b); - a_analog.enabled = b_analog.enabled; + a_analog = b_analog; a_analog.startup_min_volume++; EXPECT_NE(a, b); - a_analog.startup_min_volume = b_analog.startup_min_volume; + a_analog = b_analog; a_analog.clipped_level_min++; EXPECT_NE(a, b); - a_analog.clipped_level_min = b_analog.clipped_level_min; + a_analog = b_analog; Toggle(a_analog.enable_digital_adaptive); EXPECT_NE(a, b); - a_analog.enable_digital_adaptive = b_analog.enable_digital_adaptive; + a_analog = b_analog; } TEST(AudioProcessing, GainController2ConfigEqual) { @@ -3094,7 +3094,7 @@ TEST(AudioProcessing, GainController2ConfigEqual) { b.enabled = a.enabled; EXPECT_EQ(a, b); - a.fixed_digital.gain_db += 1.f; + a.fixed_digital.gain_db += 1.0f; b.fixed_digital.gain_db = a.fixed_digital.gain_db; EXPECT_EQ(a, b); @@ -3105,46 +3105,44 @@ TEST(AudioProcessing, GainController2ConfigEqual) { b_adaptive.enabled = a_adaptive.enabled; EXPECT_EQ(a, b); - a_adaptive.vad_probability_attack += 1.f; - b_adaptive.vad_probability_attack = a_adaptive.vad_probability_attack; + Toggle(a_adaptive.dry_run); + b_adaptive.dry_run = a_adaptive.dry_run; EXPECT_EQ(a, b); - a_adaptive.level_estimator = - AudioProcessing::Config::GainController2::LevelEstimator::kPeak; - b_adaptive.level_estimator = a_adaptive.level_estimator; + a_adaptive.noise_estimator = AudioProcessing::Config::GainController2:: + NoiseEstimator::kStationaryNoise; + b_adaptive.noise_estimator = a_adaptive.noise_estimator; EXPECT_EQ(a, b); - a_adaptive.level_estimator_adjacent_speech_frames_threshold++; - b_adaptive.level_estimator_adjacent_speech_frames_threshold = - a_adaptive.level_estimator_adjacent_speech_frames_threshold; + a_adaptive.vad_reset_period_ms++; + b_adaptive.vad_reset_period_ms = a_adaptive.vad_reset_period_ms; EXPECT_EQ(a, b); - Toggle(a_adaptive.use_saturation_protector); - b_adaptive.use_saturation_protector = a_adaptive.use_saturation_protector; + a_adaptive.adjacent_speech_frames_threshold++; + b_adaptive.adjacent_speech_frames_threshold = + a_adaptive.adjacent_speech_frames_threshold; EXPECT_EQ(a, b); - a_adaptive.initial_saturation_margin_db += 1.f; - b_adaptive.initial_saturation_margin_db = - a_adaptive.initial_saturation_margin_db; + a_adaptive.max_gain_change_db_per_second += 1.0f; + b_adaptive.max_gain_change_db_per_second = + a_adaptive.max_gain_change_db_per_second; EXPECT_EQ(a, b); - a_adaptive.extra_saturation_margin_db += 1.f; - b_adaptive.extra_saturation_margin_db = a_adaptive.extra_saturation_margin_db; + a_adaptive.max_output_noise_level_dbfs += 1.0f; + b_adaptive.max_output_noise_level_dbfs = + a_adaptive.max_output_noise_level_dbfs; EXPECT_EQ(a, b); - a_adaptive.gain_applier_adjacent_speech_frames_threshold++; - b_adaptive.gain_applier_adjacent_speech_frames_threshold = - a_adaptive.gain_applier_adjacent_speech_frames_threshold; + Toggle(a_adaptive.sse2_allowed); + b_adaptive.sse2_allowed = a_adaptive.sse2_allowed; EXPECT_EQ(a, b); - a_adaptive.max_gain_change_db_per_second += 1.f; - b_adaptive.max_gain_change_db_per_second = - a_adaptive.max_gain_change_db_per_second; + Toggle(a_adaptive.avx2_allowed); + b_adaptive.avx2_allowed = a_adaptive.avx2_allowed; EXPECT_EQ(a, b); - a_adaptive.max_output_noise_level_dbfs -= 1.f; - b_adaptive.max_output_noise_level_dbfs = - a_adaptive.max_output_noise_level_dbfs; + Toggle(a_adaptive.neon_allowed); + b_adaptive.neon_allowed = a_adaptive.neon_allowed; EXPECT_EQ(a, b); } @@ -3156,60 +3154,55 @@ TEST(AudioProcessing, GainController2ConfigNotEqual) { Toggle(a.enabled); EXPECT_NE(a, b); - a.enabled = b.enabled; + a = b; - a.fixed_digital.gain_db += 1.f; + a.fixed_digital.gain_db += 1.0f; EXPECT_NE(a, b); - a.fixed_digital.gain_db = b.fixed_digital.gain_db; + a.fixed_digital = b.fixed_digital; auto& a_adaptive = a.adaptive_digital; const auto& b_adaptive = b.adaptive_digital; Toggle(a_adaptive.enabled); EXPECT_NE(a, b); - a_adaptive.enabled = b_adaptive.enabled; + a_adaptive = b_adaptive; - a_adaptive.vad_probability_attack += 1.f; + Toggle(a_adaptive.dry_run); EXPECT_NE(a, b); - a_adaptive.vad_probability_attack = b_adaptive.vad_probability_attack; + a_adaptive = b_adaptive; - a_adaptive.level_estimator = - AudioProcessing::Config::GainController2::LevelEstimator::kPeak; + a_adaptive.noise_estimator = AudioProcessing::Config::GainController2:: + NoiseEstimator::kStationaryNoise; EXPECT_NE(a, b); - a_adaptive.level_estimator = b_adaptive.level_estimator; + a_adaptive = b_adaptive; - a_adaptive.level_estimator_adjacent_speech_frames_threshold++; + a_adaptive.vad_reset_period_ms++; EXPECT_NE(a, b); - a_adaptive.level_estimator_adjacent_speech_frames_threshold = - b_adaptive.level_estimator_adjacent_speech_frames_threshold; + a_adaptive = b_adaptive; - Toggle(a_adaptive.use_saturation_protector); + a_adaptive.adjacent_speech_frames_threshold++; EXPECT_NE(a, b); - a_adaptive.use_saturation_protector = b_adaptive.use_saturation_protector; + a_adaptive = b_adaptive; - a_adaptive.initial_saturation_margin_db += 1.f; + a_adaptive.max_gain_change_db_per_second += 1.0f; EXPECT_NE(a, b); - a_adaptive.initial_saturation_margin_db = - b_adaptive.initial_saturation_margin_db; + a_adaptive = b_adaptive; - a_adaptive.extra_saturation_margin_db += 1.f; + a_adaptive.max_output_noise_level_dbfs += 1.0f; EXPECT_NE(a, b); - a_adaptive.extra_saturation_margin_db = b_adaptive.extra_saturation_margin_db; + a_adaptive = b_adaptive; - a_adaptive.gain_applier_adjacent_speech_frames_threshold++; + Toggle(a_adaptive.sse2_allowed); EXPECT_NE(a, b); - a_adaptive.gain_applier_adjacent_speech_frames_threshold = - b_adaptive.gain_applier_adjacent_speech_frames_threshold; + a_adaptive = b_adaptive; - a_adaptive.max_gain_change_db_per_second += 1.f; + Toggle(a_adaptive.avx2_allowed); EXPECT_NE(a, b); - a_adaptive.max_gain_change_db_per_second = - b_adaptive.max_gain_change_db_per_second; + a_adaptive = b_adaptive; - a_adaptive.max_output_noise_level_dbfs -= 1.f; + Toggle(a_adaptive.neon_allowed); EXPECT_NE(a, b); - a_adaptive.max_output_noise_level_dbfs = - b_adaptive.max_output_noise_level_dbfs; + a_adaptive = b_adaptive; } } // namespace webrtc diff --git a/modules/audio_processing/gain_controller2.cc b/modules/audio_processing/gain_controller2.cc index 9e3e8e7cae..74b63c9432 100644 --- a/modules/audio_processing/gain_controller2.cc +++ b/modules/audio_processing/gain_controller2.cc @@ -26,22 +26,26 @@ int GainController2::instance_count_ = 0; GainController2::GainController2() : data_dumper_(rtc::AtomicOps::Increment(&instance_count_)), gain_applier_(/*hard_clip_samples=*/false, - /*initial_gain_factor=*/0.f), + /*initial_gain_factor=*/0.0f), limiter_(static_cast<size_t>(48000), &data_dumper_, "Agc2"), calls_since_last_limiter_log_(0) { if (config_.adaptive_digital.enabled) { - adaptive_agc_ = std::make_unique<AdaptiveAgc>(&data_dumper_); + adaptive_agc_ = + std::make_unique<AdaptiveAgc>(&data_dumper_, config_.adaptive_digital); } } GainController2::~GainController2() = default; -void GainController2::Initialize(int sample_rate_hz) { +void GainController2::Initialize(int sample_rate_hz, int num_channels) { RTC_DCHECK(sample_rate_hz == AudioProcessing::kSampleRate8kHz || sample_rate_hz == AudioProcessing::kSampleRate16kHz || sample_rate_hz == AudioProcessing::kSampleRate32kHz || sample_rate_hz == AudioProcessing::kSampleRate48kHz); limiter_.SetSampleRate(sample_rate_hz); + if (adaptive_agc_) { + adaptive_agc_->Initialize(sample_rate_hz, num_channels); + } data_dumper_.InitiateNewSetOfRecordings(); data_dumper_.DumpRaw("sample_rate_hz", sample_rate_hz); calls_since_last_limiter_log_ = 0; diff --git a/modules/audio_processing/gain_controller2.h b/modules/audio_processing/gain_controller2.h index b62890d721..ce758c7834 100644 --- a/modules/audio_processing/gain_controller2.h +++ b/modules/audio_processing/gain_controller2.h @@ -34,7 +34,7 @@ class GainController2 { GainController2& operator=(const GainController2&) = delete; ~GainController2(); - void Initialize(int sample_rate_hz); + void Initialize(int sample_rate_hz, int num_channels); void Process(AudioBuffer* audio); void NotifyAnalogLevel(int level); diff --git a/modules/audio_processing/gain_controller2_unittest.cc b/modules/audio_processing/gain_controller2_unittest.cc index 815d58efe7..85c08bb750 100644 --- a/modules/audio_processing/gain_controller2_unittest.cc +++ b/modules/audio_processing/gain_controller2_unittest.cc @@ -65,7 +65,7 @@ std::unique_ptr<GainController2> CreateAgc2FixedDigitalMode( size_t sample_rate_hz) { auto agc2 = std::make_unique<GainController2>(); agc2->ApplyConfig(CreateAgc2FixedDigitalModeConfig(fixed_gain_db)); - agc2->Initialize(sample_rate_hz); + agc2->Initialize(sample_rate_hz, /*num_channels=*/1); return agc2; } @@ -337,9 +337,10 @@ TEST(GainController2, CheckGainAdaptiveDigital) { constexpr float kExpectedGainDb = 4.3f; constexpr float kToleranceDb = 0.5f; GainController2 gain_controller2; - gain_controller2.Initialize(AudioProcessing::kSampleRate48kHz); + gain_controller2.Initialize(AudioProcessing::kSampleRate48kHz, + /*num_channels=*/1); AudioProcessing::Config::GainController2 config; - config.fixed_digital.gain_db = 0.f; + config.fixed_digital.gain_db = 0.0f; config.adaptive_digital.enabled = true; gain_controller2.ApplyConfig(config); EXPECT_NEAR( diff --git a/modules/audio_processing/include/audio_processing.cc b/modules/audio_processing/include/audio_processing.cc index fa45230c6b..44a90d6e76 100644 --- a/modules/audio_processing/include/audio_processing.cc +++ b/modules/audio_processing/include/audio_processing.cc @@ -77,33 +77,42 @@ bool Agc1Config::operator==(const Agc1Config& rhs) const { analog_lhs.startup_min_volume == analog_rhs.startup_min_volume && analog_lhs.clipped_level_min == analog_rhs.clipped_level_min && analog_lhs.enable_digital_adaptive == - analog_rhs.enable_digital_adaptive; + analog_rhs.enable_digital_adaptive && + analog_lhs.clipped_level_step == analog_rhs.clipped_level_step && + analog_lhs.clipped_ratio_threshold == + analog_rhs.clipped_ratio_threshold && + analog_lhs.clipped_wait_frames == analog_rhs.clipped_wait_frames && + analog_lhs.clipping_predictor.mode == + analog_rhs.clipping_predictor.mode && + analog_lhs.clipping_predictor.window_length == + analog_rhs.clipping_predictor.window_length && + analog_lhs.clipping_predictor.reference_window_length == + analog_rhs.clipping_predictor.reference_window_length && + analog_lhs.clipping_predictor.reference_window_delay == + analog_rhs.clipping_predictor.reference_window_delay && + analog_lhs.clipping_predictor.clipping_threshold == + analog_rhs.clipping_predictor.clipping_threshold && + analog_lhs.clipping_predictor.crest_factor_margin == + analog_rhs.clipping_predictor.crest_factor_margin; } -bool Agc2Config::operator==(const Agc2Config& rhs) const { - const auto& adaptive_lhs = adaptive_digital; - const auto& adaptive_rhs = rhs.adaptive_digital; +bool Agc2Config::AdaptiveDigital::operator==( + const Agc2Config::AdaptiveDigital& rhs) const { + return enabled == rhs.enabled && dry_run == rhs.dry_run && + noise_estimator == rhs.noise_estimator && + vad_reset_period_ms == rhs.vad_reset_period_ms && + adjacent_speech_frames_threshold == + rhs.adjacent_speech_frames_threshold && + max_gain_change_db_per_second == rhs.max_gain_change_db_per_second && + max_output_noise_level_dbfs == rhs.max_output_noise_level_dbfs && + sse2_allowed == rhs.sse2_allowed && avx2_allowed == rhs.avx2_allowed && + neon_allowed == rhs.neon_allowed; +} +bool Agc2Config::operator==(const Agc2Config& rhs) const { return enabled == rhs.enabled && fixed_digital.gain_db == rhs.fixed_digital.gain_db && - adaptive_lhs.enabled == adaptive_rhs.enabled && - adaptive_lhs.vad_probability_attack == - adaptive_rhs.vad_probability_attack && - adaptive_lhs.level_estimator == adaptive_rhs.level_estimator && - adaptive_lhs.level_estimator_adjacent_speech_frames_threshold == - adaptive_rhs.level_estimator_adjacent_speech_frames_threshold && - adaptive_lhs.use_saturation_protector == - adaptive_rhs.use_saturation_protector && - adaptive_lhs.initial_saturation_margin_db == - adaptive_rhs.initial_saturation_margin_db && - adaptive_lhs.extra_saturation_margin_db == - adaptive_rhs.extra_saturation_margin_db && - adaptive_lhs.gain_applier_adjacent_speech_frames_threshold == - adaptive_rhs.gain_applier_adjacent_speech_frames_threshold && - adaptive_lhs.max_gain_change_db_per_second == - adaptive_rhs.max_gain_change_db_per_second && - adaptive_lhs.max_output_noise_level_dbfs == - adaptive_rhs.max_output_noise_level_dbfs; + adaptive_digital == rhs.adaptive_digital; } bool AudioProcessing::Config::CaptureLevelAdjustment::operator==( @@ -156,11 +165,46 @@ std::string AudioProcessing::Config::ToString() const { << ", enable_limiter: " << gain_controller1.enable_limiter << ", analog_level_minimum: " << gain_controller1.analog_level_minimum << ", analog_level_maximum: " << gain_controller1.analog_level_maximum - << " }, gain_controller2: { enabled: " << gain_controller2.enabled + << ", analog_gain_controller { enabled: " + << gain_controller1.analog_gain_controller.enabled + << ", startup_min_volume: " + << gain_controller1.analog_gain_controller.startup_min_volume + << ", clipped_level_min: " + << gain_controller1.analog_gain_controller.clipped_level_min + << ", enable_digital_adaptive: " + << gain_controller1.analog_gain_controller.enable_digital_adaptive + << ", clipped_level_step: " + << gain_controller1.analog_gain_controller.clipped_level_step + << ", clipped_ratio_threshold: " + << gain_controller1.analog_gain_controller.clipped_ratio_threshold + << ", clipped_wait_frames: " + << gain_controller1.analog_gain_controller.clipped_wait_frames + << ", clipping_predictor: { enabled: " + << gain_controller1.analog_gain_controller.clipping_predictor.enabled + << ", mode: " + << gain_controller1.analog_gain_controller.clipping_predictor.mode + << ", window_length: " + << gain_controller1.analog_gain_controller.clipping_predictor + .window_length + << ", reference_window_length: " + << gain_controller1.analog_gain_controller.clipping_predictor + .reference_window_length + << ", reference_window_delay: " + << gain_controller1.analog_gain_controller.clipping_predictor + .reference_window_delay + << ", clipping_threshold: " + << gain_controller1.analog_gain_controller.clipping_predictor + .clipping_threshold + << ", crest_factor_margin: " + << gain_controller1.analog_gain_controller.clipping_predictor + .crest_factor_margin + << " }}}, gain_controller2: { enabled: " << gain_controller2.enabled << ", fixed_digital: { gain_db: " << gain_controller2.fixed_digital.gain_db << " }, adaptive_digital: { enabled: " - << gain_controller2.adaptive_digital.enabled << ", noise_estimator: " + << gain_controller2.adaptive_digital.enabled + << ", dry_run: " << gain_controller2.adaptive_digital.dry_run + << ", noise_estimator: " << GainController2NoiseEstimatorToString( gain_controller2.adaptive_digital.noise_estimator) << ", vad_reset_period_ms: " diff --git a/modules/audio_processing/include/audio_processing.h b/modules/audio_processing/include/audio_processing.h index 01bb7c33c7..64b1b5d107 100644 --- a/modules/audio_processing/include/audio_processing.h +++ b/modules/audio_processing/include/audio_processing.h @@ -59,9 +59,9 @@ class CustomProcessing; // // Must be provided through AudioProcessingBuilder().Create(config). #if defined(WEBRTC_CHROMIUM_BUILD) -static const int kAgcStartupMinVolume = 85; +static constexpr int kAgcStartupMinVolume = 85; #else -static const int kAgcStartupMinVolume = 0; +static constexpr int kAgcStartupMinVolume = 0; #endif // defined(WEBRTC_CHROMIUM_BUILD) static constexpr int kClippedLevelMin = 70; @@ -275,7 +275,7 @@ class RTC_EXPORT AudioProcessing : public rtc::RefCountInterface { // in the analog mode, prescribing an analog gain to be applied at the audio // HAL. // Recommended to be enabled on the client-side. - struct GainController1 { + struct RTC_EXPORT GainController1 { bool operator==(const GainController1& rhs) const; bool operator!=(const GainController1& rhs) const { return !(*this == rhs); @@ -334,6 +334,43 @@ class RTC_EXPORT AudioProcessing : public rtc::RefCountInterface { // clipping. int clipped_level_min = kClippedLevelMin; bool enable_digital_adaptive = true; + // Amount the microphone level is lowered with every clipping event. + // Limited to (0, 255]. + int clipped_level_step = 15; + // Proportion of clipped samples required to declare a clipping event. + // Limited to (0.f, 1.f). + float clipped_ratio_threshold = 0.1f; + // Time in frames to wait after a clipping event before checking again. + // Limited to values higher than 0. + int clipped_wait_frames = 300; + + // Enables clipping prediction functionality. + struct ClippingPredictor { + bool enabled = false; + enum Mode { + // Clipping event prediction mode with fixed step estimation. + kClippingEventPrediction, + // Clipped peak estimation mode with adaptive step estimation. + kAdaptiveStepClippingPeakPrediction, + // Clipped peak estimation mode with fixed step estimation. + kFixedStepClippingPeakPrediction, + }; + Mode mode = kClippingEventPrediction; + // Number of frames in the sliding analysis window. + int window_length = 5; + // Number of frames in the sliding reference window. + int reference_window_length = 5; + // Reference window delay (unit: number of frames). + int reference_window_delay = 5; + // Clipping prediction threshold (dBFS). + float clipping_threshold = -1.0f; + // Crest factor drop threshold (dB). + float crest_factor_margin = 3.0f; + // If true, the recommended clipped level step is used to modify the + // analog gain. Otherwise, the predictor runs without affecting the + // analog gain. + bool use_predicted_step = true; + } clipping_predictor; } analog_gain_controller; } gain_controller1; @@ -343,7 +380,7 @@ class RTC_EXPORT AudioProcessing : public rtc::RefCountInterface { // setting |fixed_gain_db|, the limiter can be turned into a compressor that // first applies a fixed gain. The adaptive digital AGC can be turned off by // setting |adaptive_digital_mode=false|. - struct GainController2 { + struct RTC_EXPORT GainController2 { bool operator==(const GainController2& rhs) const; bool operator!=(const GainController2& rhs) const { return !(*this == rhs); @@ -356,8 +393,15 @@ class RTC_EXPORT AudioProcessing : public rtc::RefCountInterface { struct FixedDigital { float gain_db = 0.0f; } fixed_digital; - struct AdaptiveDigital { + struct RTC_EXPORT AdaptiveDigital { + bool operator==(const AdaptiveDigital& rhs) const; + bool operator!=(const AdaptiveDigital& rhs) const { + return !(*this == rhs); + } + bool enabled = false; + // Run the adaptive digital controller but the signal is not modified. + bool dry_run = false; NoiseEstimator noise_estimator = kNoiseFloor; int vad_reset_period_ms = 1500; int adjacent_speech_frames_threshold = 12; diff --git a/modules/audio_processing/logging/apm_data_dumper.h b/modules/audio_processing/logging/apm_data_dumper.h index 6d32b32ab5..9c2ac3be5d 100644 --- a/modules/audio_processing/logging/apm_data_dumper.h +++ b/modules/audio_processing/logging/apm_data_dumper.h @@ -65,6 +65,15 @@ class ApmDataDumper { #endif } + // Returns whether dumping functionality is enabled/available. + static bool IsAvailable() { +#if WEBRTC_APM_DEBUG_DUMP == 1 + return true; +#else + return false; +#endif + } + // Default dump set. static constexpr size_t kDefaultDumpSet = 0; diff --git a/modules/audio_processing/residual_echo_detector_unittest.cc b/modules/audio_processing/residual_echo_detector_unittest.cc index 6697cf009d..a5f1409516 100644 --- a/modules/audio_processing/residual_echo_detector_unittest.cc +++ b/modules/audio_processing/residual_echo_detector_unittest.cc @@ -18,8 +18,7 @@ namespace webrtc { TEST(ResidualEchoDetectorTests, Echo) { - rtc::scoped_refptr<ResidualEchoDetector> echo_detector = - new rtc::RefCountedObject<ResidualEchoDetector>(); + auto echo_detector = rtc::make_ref_counted<ResidualEchoDetector>(); echo_detector->SetReliabilityForTest(1.0f); std::vector<float> ones(160, 1.f); std::vector<float> zeros(160, 0.f); @@ -46,8 +45,7 @@ TEST(ResidualEchoDetectorTests, Echo) { } TEST(ResidualEchoDetectorTests, NoEcho) { - rtc::scoped_refptr<ResidualEchoDetector> echo_detector = - new rtc::RefCountedObject<ResidualEchoDetector>(); + auto echo_detector = rtc::make_ref_counted<ResidualEchoDetector>(); echo_detector->SetReliabilityForTest(1.0f); std::vector<float> ones(160, 1.f); std::vector<float> zeros(160, 0.f); @@ -69,8 +67,7 @@ TEST(ResidualEchoDetectorTests, NoEcho) { } TEST(ResidualEchoDetectorTests, EchoWithRenderClockDrift) { - rtc::scoped_refptr<ResidualEchoDetector> echo_detector = - new rtc::RefCountedObject<ResidualEchoDetector>(); + auto echo_detector = rtc::make_ref_counted<ResidualEchoDetector>(); echo_detector->SetReliabilityForTest(1.0f); std::vector<float> ones(160, 1.f); std::vector<float> zeros(160, 0.f); @@ -107,8 +104,7 @@ TEST(ResidualEchoDetectorTests, EchoWithRenderClockDrift) { } TEST(ResidualEchoDetectorTests, EchoWithCaptureClockDrift) { - rtc::scoped_refptr<ResidualEchoDetector> echo_detector = - new rtc::RefCountedObject<ResidualEchoDetector>(); + auto echo_detector = rtc::make_ref_counted<ResidualEchoDetector>(); echo_detector->SetReliabilityForTest(1.0f); std::vector<float> ones(160, 1.f); std::vector<float> zeros(160, 0.f); |