aboutsummaryrefslogtreecommitdiff
path: root/modules/audio_processing
diff options
context:
space:
mode:
Diffstat (limited to 'modules/audio_processing')
-rw-r--r--modules/audio_processing/aec3/aec_state.h6
-rw-r--r--modules/audio_processing/aec3/echo_canceller3.cc22
-rw-r--r--modules/audio_processing/aec3/echo_path_delay_estimator.cc4
-rw-r--r--modules/audio_processing/aec3/echo_remover.cc14
-rw-r--r--modules/audio_processing/aec3/erle_estimator.h12
-rw-r--r--modules/audio_processing/aec3/erle_estimator_unittest.cc32
-rw-r--r--modules/audio_processing/aec3/matched_filter.cc32
-rw-r--r--modules/audio_processing/aec3/matched_filter.h9
-rw-r--r--modules/audio_processing/aec3/matched_filter_lag_aggregator.h3
-rw-r--r--modules/audio_processing/aec3/matched_filter_unittest.cc15
-rw-r--r--modules/audio_processing/aec3/residual_echo_estimator.cc33
-rw-r--r--modules/audio_processing/aec3/residual_echo_estimator.h14
-rw-r--r--modules/audio_processing/aec3/residual_echo_estimator_unittest.cc5
-rw-r--r--modules/audio_processing/aec3/reverb_model_estimator.cc1
-rw-r--r--modules/audio_processing/aec3/reverb_model_estimator.h1
-rw-r--r--modules/audio_processing/aec3/subband_erle_estimator.cc11
-rw-r--r--modules/audio_processing/aec3/subband_erle_estimator.h7
-rw-r--r--modules/audio_processing/aec3/subtractor.cc24
-rw-r--r--modules/audio_processing/aec3/subtractor.h10
-rw-r--r--modules/audio_processing/aec3/suppression_gain.cc64
-rw-r--r--modules/audio_processing/aec3/suppression_gain.h7
-rw-r--r--modules/audio_processing/aec3/suppression_gain_unittest.cc56
-rw-r--r--modules/audio_processing/aec3/transparent_mode.cc4
-rw-r--r--modules/audio_processing/agc/BUILD.gn56
-rw-r--r--modules/audio_processing/agc/agc_manager_direct.cc182
-rw-r--r--modules/audio_processing/agc/agc_manager_direct.h67
-rw-r--r--modules/audio_processing/agc/agc_manager_direct_unittest.cc256
-rw-r--r--modules/audio_processing/agc/clipping_predictor.cc383
-rw-r--r--modules/audio_processing/agc/clipping_predictor.h63
-rw-r--r--modules/audio_processing/agc/clipping_predictor_evaluator.cc175
-rw-r--r--modules/audio_processing/agc/clipping_predictor_evaluator.h102
-rw-r--r--modules/audio_processing/agc/clipping_predictor_evaluator_unittest.cc568
-rw-r--r--modules/audio_processing/agc/clipping_predictor_level_buffer.cc77
-rw-r--r--modules/audio_processing/agc/clipping_predictor_level_buffer.h71
-rw-r--r--modules/audio_processing/agc/clipping_predictor_level_buffer_unittest.cc131
-rw-r--r--modules/audio_processing/agc/clipping_predictor_unittest.cc491
-rw-r--r--modules/audio_processing/agc2/adaptive_agc.cc32
-rw-r--r--modules/audio_processing/agc2/adaptive_agc.h10
-rw-r--r--modules/audio_processing/agc2/adaptive_digital_gain_applier.cc63
-rw-r--r--modules/audio_processing/agc2/adaptive_digital_gain_applier.h13
-rw-r--r--modules/audio_processing/agc2/adaptive_digital_gain_applier_unittest.cc75
-rw-r--r--modules/audio_processing/agc2/agc2_common.h2
-rw-r--r--modules/audio_processing/agc2/vad_with_level.cc4
-rw-r--r--modules/audio_processing/agc2/vad_with_level.h2
-rw-r--r--modules/audio_processing/agc2/vad_with_level_unittest.cc14
-rw-r--r--modules/audio_processing/audio_processing_impl.cc18
-rw-r--r--modules/audio_processing/audio_processing_impl.h7
-rw-r--r--modules/audio_processing/audio_processing_impl_locking_unittest.cc75
-rw-r--r--modules/audio_processing/audio_processing_impl_unittest.cc12
-rw-r--r--modules/audio_processing/audio_processing_performance_unittest.cc51
-rw-r--r--modules/audio_processing/audio_processing_unittest.cc121
-rw-r--r--modules/audio_processing/gain_controller2.cc10
-rw-r--r--modules/audio_processing/gain_controller2.h2
-rw-r--r--modules/audio_processing/gain_controller2_unittest.cc7
-rw-r--r--modules/audio_processing/include/audio_processing.cc92
-rw-r--r--modules/audio_processing/include/audio_processing.h54
-rw-r--r--modules/audio_processing/logging/apm_data_dumper.h9
-rw-r--r--modules/audio_processing/residual_echo_detector_unittest.cc12
58 files changed, 3273 insertions, 420 deletions
diff --git a/modules/audio_processing/aec3/aec_state.h b/modules/audio_processing/aec3/aec_state.h
index 125ae83a2b..e2f70a4c68 100644
--- a/modules/audio_processing/aec3/aec_state.h
+++ b/modules/audio_processing/aec3/aec_state.h
@@ -75,6 +75,12 @@ class AecState {
return erle_estimator_.Erle(onset_compensated);
}
+ // Returns the non-capped ERLE.
+ rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> ErleUnbounded()
+ const {
+ return erle_estimator_.ErleUnbounded();
+ }
+
// Returns the fullband ERLE estimate in log2 units.
float FullBandErleLog2() const { return erle_estimator_.FullbandErleLog2(); }
diff --git a/modules/audio_processing/aec3/echo_canceller3.cc b/modules/audio_processing/aec3/echo_canceller3.cc
index 35a2cff7ea..181b649f6d 100644
--- a/modules/audio_processing/aec3/echo_canceller3.cc
+++ b/modules/audio_processing/aec3/echo_canceller3.cc
@@ -49,7 +49,11 @@ void RetrieveFieldTrialValue(const char* trial_name,
ParseFieldTrial({&field_trial_param}, field_trial_str);
float field_trial_value = static_cast<float>(field_trial_param.Get());
- if (field_trial_value >= min && field_trial_value <= max) {
+ if (field_trial_value >= min && field_trial_value <= max &&
+ field_trial_value != *value_to_update) {
+ RTC_LOG(LS_INFO) << "Key " << trial_name
+ << " changing AEC3 parameter value from "
+ << *value_to_update << " to " << field_trial_value;
*value_to_update = field_trial_value;
}
}
@@ -65,7 +69,11 @@ void RetrieveFieldTrialValue(const char* trial_name,
ParseFieldTrial({&field_trial_param}, field_trial_str);
float field_trial_value = field_trial_param.Get();
- if (field_trial_value >= min && field_trial_value <= max) {
+ if (field_trial_value >= min && field_trial_value <= max &&
+ field_trial_value != *value_to_update) {
+ RTC_LOG(LS_INFO) << "Key " << trial_name
+ << " changing AEC3 parameter value from "
+ << *value_to_update << " to " << field_trial_value;
*value_to_update = field_trial_value;
}
}
@@ -572,6 +580,12 @@ EchoCanceller3Config AdjustConfig(const EchoCanceller3Config& config) {
RetrieveFieldTrialValue("WebRTC-Aec3SuppressorEpStrengthDefaultLenOverride",
-1.f, 1.f, &adjusted_cfg.ep_strength.default_len);
+ // Field trial-based overrides of individual delay estimator parameters.
+ RetrieveFieldTrialValue("WebRTC-Aec3DelayEstimateSmoothingOverride", 0.f, 1.f,
+ &adjusted_cfg.delay.delay_estimate_smoothing);
+ RetrieveFieldTrialValue(
+ "WebRTC-Aec3DelayEstimateSmoothingDelayFoundOverride", 0.f, 1.f,
+ &adjusted_cfg.delay.delay_estimate_smoothing_delay_found);
return adjusted_cfg;
}
@@ -731,6 +745,10 @@ EchoCanceller3::EchoCanceller3(const EchoCanceller3Config& config,
std::vector<std::vector<rtc::ArrayView<float>>>(
1, std::vector<rtc::ArrayView<float>>(num_capture_channels_));
}
+
+ RTC_LOG(LS_INFO) << "AEC3 created with sample rate: " << sample_rate_hz_
+ << " Hz, num render channels: " << num_render_channels_
+ << ", num capture channels: " << num_capture_channels_;
}
EchoCanceller3::~EchoCanceller3() = default;
diff --git a/modules/audio_processing/aec3/echo_path_delay_estimator.cc b/modules/audio_processing/aec3/echo_path_delay_estimator.cc
index 2c987f9341..8a78834143 100644
--- a/modules/audio_processing/aec3/echo_path_delay_estimator.cc
+++ b/modules/audio_processing/aec3/echo_path_delay_estimator.cc
@@ -42,6 +42,7 @@ EchoPathDelayEstimator::EchoPathDelayEstimator(
? config.render_levels.poor_excitation_render_limit_ds8
: config.render_levels.poor_excitation_render_limit,
config.delay.delay_estimate_smoothing,
+ config.delay.delay_estimate_smoothing_delay_found,
config.delay.delay_candidate_detection_threshold),
matched_filter_lag_aggregator_(data_dumper_,
matched_filter_.GetMaxFilterLag(),
@@ -71,7 +72,8 @@ absl::optional<DelayEstimate> EchoPathDelayEstimator::EstimateDelay(
data_dumper_->DumpWav("aec3_capture_decimator_output",
downsampled_capture.size(), downsampled_capture.data(),
16000 / down_sampling_factor_, 1);
- matched_filter_.Update(render_buffer, downsampled_capture);
+ matched_filter_.Update(render_buffer, downsampled_capture,
+ matched_filter_lag_aggregator_.ReliableDelayFound());
absl::optional<DelayEstimate> aggregated_matched_filter_lag =
matched_filter_lag_aggregator_.Aggregate(
diff --git a/modules/audio_processing/aec3/echo_remover.cc b/modules/audio_processing/aec3/echo_remover.cc
index 6c177c9a10..2bfaa951d8 100644
--- a/modules/audio_processing/aec3/echo_remover.cc
+++ b/modules/audio_processing/aec3/echo_remover.cc
@@ -172,6 +172,7 @@ class EchoRemoverImpl final : public EchoRemover {
std::vector<std::array<float, kFftLengthBy2Plus1>> Y2_heap_;
std::vector<std::array<float, kFftLengthBy2Plus1>> E2_heap_;
std::vector<std::array<float, kFftLengthBy2Plus1>> R2_heap_;
+ std::vector<std::array<float, kFftLengthBy2Plus1>> R2_unbounded_heap_;
std::vector<std::array<float, kFftLengthBy2Plus1>> S2_linear_heap_;
std::vector<FftData> Y_heap_;
std::vector<FftData> E_heap_;
@@ -218,6 +219,7 @@ EchoRemoverImpl::EchoRemoverImpl(const EchoCanceller3Config& config,
Y2_heap_(NumChannelsOnHeap(num_capture_channels_)),
E2_heap_(NumChannelsOnHeap(num_capture_channels_)),
R2_heap_(NumChannelsOnHeap(num_capture_channels_)),
+ R2_unbounded_heap_(NumChannelsOnHeap(num_capture_channels_)),
S2_linear_heap_(NumChannelsOnHeap(num_capture_channels_)),
Y_heap_(NumChannelsOnHeap(num_capture_channels_)),
E_heap_(NumChannelsOnHeap(num_capture_channels_)),
@@ -265,6 +267,8 @@ void EchoRemoverImpl::ProcessCapture(
std::array<std::array<float, kFftLengthBy2Plus1>, kMaxNumChannelsOnStack>
R2_stack;
std::array<std::array<float, kFftLengthBy2Plus1>, kMaxNumChannelsOnStack>
+ R2_unbounded_stack;
+ std::array<std::array<float, kFftLengthBy2Plus1>, kMaxNumChannelsOnStack>
S2_linear_stack;
std::array<FftData, kMaxNumChannelsOnStack> Y_stack;
std::array<FftData, kMaxNumChannelsOnStack> E_stack;
@@ -280,6 +284,8 @@ void EchoRemoverImpl::ProcessCapture(
E2_stack.data(), num_capture_channels_);
rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>> R2(
R2_stack.data(), num_capture_channels_);
+ rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>> R2_unbounded(
+ R2_unbounded_stack.data(), num_capture_channels_);
rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>> S2_linear(
S2_linear_stack.data(), num_capture_channels_);
rtc::ArrayView<FftData> Y(Y_stack.data(), num_capture_channels_);
@@ -301,6 +307,8 @@ void EchoRemoverImpl::ProcessCapture(
E2_heap_.data(), num_capture_channels_);
R2 = rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>>(
R2_heap_.data(), num_capture_channels_);
+ R2_unbounded = rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>>(
+ R2_unbounded_heap_.data(), num_capture_channels_);
S2_linear = rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>>(
S2_linear_heap_.data(), num_capture_channels_);
Y = rtc::ArrayView<FftData>(Y_heap_.data(), num_capture_channels_);
@@ -406,8 +414,8 @@ void EchoRemoverImpl::ProcessCapture(
if (capture_output_used_) {
// Estimate the residual echo power.
residual_echo_estimator_.Estimate(aec_state_, *render_buffer, S2_linear, Y2,
- suppression_gain_.IsDominantNearend(),
- R2);
+ suppression_gain_.IsDominantNearend(), R2,
+ R2_unbounded);
// Suppressor nearend estimate.
if (aec_state_.UsableLinearEstimate()) {
@@ -430,7 +438,7 @@ void EchoRemoverImpl::ProcessCapture(
// Compute preferred gains.
float high_bands_gain;
- suppression_gain_.GetGain(nearend_spectrum, echo_spectrum, R2,
+ suppression_gain_.GetGain(nearend_spectrum, echo_spectrum, R2, R2_unbounded,
cng_.NoiseSpectrum(), render_signal_analyzer_,
aec_state_, x, clock_drift, &high_bands_gain, &G);
diff --git a/modules/audio_processing/aec3/erle_estimator.h b/modules/audio_processing/aec3/erle_estimator.h
index cae896e82c..55797592a9 100644
--- a/modules/audio_processing/aec3/erle_estimator.h
+++ b/modules/audio_processing/aec3/erle_estimator.h
@@ -62,6 +62,18 @@ class ErleEstimator {
: subband_erle_estimator_.Erle(onset_compensated);
}
+ // Returns the non-capped subband ERLE.
+ rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> ErleUnbounded()
+ const {
+ // Unbounded ERLE is only used with the subband erle estimator where the
+ // ERLE is often capped at low values. When the signal dependent ERLE
+ // estimator is used the capped ERLE is returned.
+ return !signal_dependent_erle_estimator_
+ ? subband_erle_estimator_.ErleUnbounded()
+ : signal_dependent_erle_estimator_->Erle(
+ /*onset_compensated=*/false);
+ }
+
// Returns the subband ERLE that are estimated during onsets (only used for
// testing).
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> ErleDuringOnsets()
diff --git a/modules/audio_processing/aec3/erle_estimator_unittest.cc b/modules/audio_processing/aec3/erle_estimator_unittest.cc
index 6df71424bc..e38f2386f7 100644
--- a/modules/audio_processing/aec3/erle_estimator_unittest.cc
+++ b/modules/audio_processing/aec3/erle_estimator_unittest.cc
@@ -50,6 +50,16 @@ void VerifyErle(
EXPECT_NEAR(kTrueErle, erle_time_domain, 0.5);
}
+void VerifyErleGreaterOrEqual(
+ rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> erle1,
+ rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> erle2) {
+ for (size_t ch = 0; ch < erle1.size(); ++ch) {
+ for (size_t i = 0; i < kFftLengthBy2Plus1; ++i) {
+ EXPECT_GE(erle1[ch][i], erle2[ch][i]);
+ }
+ }
+}
+
void FormFarendTimeFrame(std::vector<std::vector<std::vector<float>>>* x) {
const std::array<float, kBlockSize> frame = {
7459.88, 17209.6, 17383, 20768.9, 16816.7, 18386.3, 4492.83, 9675.85,
@@ -156,9 +166,10 @@ TEST_P(ErleEstimatorMultiChannel, VerifyErleIncreaseAndHold) {
kNumBands, std::vector<std::vector<float>>(
num_render_channels, std::vector<float>(kBlockSize, 0.f)));
std::vector<std::vector<std::array<float, kFftLengthBy2Plus1>>>
- filter_frequency_response(
- config.filter.refined.length_blocks,
- std::vector<std::array<float, kFftLengthBy2Plus1>>(num_capture_channels));
+ filter_frequency_response(
+ config.filter.refined.length_blocks,
+ std::vector<std::array<float, kFftLengthBy2Plus1>>(
+ num_capture_channels));
std::unique_ptr<RenderDelayBuffer> render_delay_buffer(
RenderDelayBuffer::Create(config, kSampleRateHz, num_render_channels));
@@ -181,6 +192,10 @@ TEST_P(ErleEstimatorMultiChannel, VerifyErleIncreaseAndHold) {
VerifyErle(estimator.Erle(/*onset_compensated=*/true),
std::pow(2.f, estimator.FullbandErleLog2()), config.erle.max_l,
config.erle.max_h);
+ VerifyErleGreaterOrEqual(estimator.Erle(/*onset_compensated=*/false),
+ estimator.Erle(/*onset_compensated=*/true));
+ VerifyErleGreaterOrEqual(estimator.ErleUnbounded(),
+ estimator.Erle(/*onset_compensated=*/false));
FormNearendFrame(&x, &X2, E2, Y2);
// Verifies that the ERLE is not immediately decreased during nearend
@@ -194,6 +209,10 @@ TEST_P(ErleEstimatorMultiChannel, VerifyErleIncreaseAndHold) {
VerifyErle(estimator.Erle(/*onset_compensated=*/true),
std::pow(2.f, estimator.FullbandErleLog2()), config.erle.max_l,
config.erle.max_h);
+ VerifyErleGreaterOrEqual(estimator.Erle(/*onset_compensated=*/false),
+ estimator.Erle(/*onset_compensated=*/true));
+ VerifyErleGreaterOrEqual(estimator.ErleUnbounded(),
+ estimator.Erle(/*onset_compensated=*/false));
}
TEST_P(ErleEstimatorMultiChannel, VerifyErleTrackingOnOnsets) {
@@ -212,9 +231,10 @@ TEST_P(ErleEstimatorMultiChannel, VerifyErleTrackingOnOnsets) {
kNumBands, std::vector<std::vector<float>>(
num_render_channels, std::vector<float>(kBlockSize, 0.f)));
std::vector<std::vector<std::array<float, kFftLengthBy2Plus1>>>
- filter_frequency_response(
- config.filter.refined.length_blocks,
- std::vector<std::array<float, kFftLengthBy2Plus1>>(num_capture_channels));
+ filter_frequency_response(
+ config.filter.refined.length_blocks,
+ std::vector<std::array<float, kFftLengthBy2Plus1>>(
+ num_capture_channels));
std::unique_ptr<RenderDelayBuffer> render_delay_buffer(
RenderDelayBuffer::Create(config, kSampleRateHz, num_render_channels));
diff --git a/modules/audio_processing/aec3/matched_filter.cc b/modules/audio_processing/aec3/matched_filter.cc
index 64b2d4e697..1721e9c983 100644
--- a/modules/audio_processing/aec3/matched_filter.cc
+++ b/modules/audio_processing/aec3/matched_filter.cc
@@ -307,7 +307,8 @@ MatchedFilter::MatchedFilter(ApmDataDumper* data_dumper,
int num_matched_filters,
size_t alignment_shift_sub_blocks,
float excitation_limit,
- float smoothing,
+ float smoothing_fast,
+ float smoothing_slow,
float matching_filter_threshold)
: data_dumper_(data_dumper),
optimization_(optimization),
@@ -319,7 +320,8 @@ MatchedFilter::MatchedFilter(ApmDataDumper* data_dumper,
lag_estimates_(num_matched_filters),
filters_offsets_(num_matched_filters, 0),
excitation_limit_(excitation_limit),
- smoothing_(smoothing),
+ smoothing_fast_(smoothing_fast),
+ smoothing_slow_(smoothing_slow),
matching_filter_threshold_(matching_filter_threshold) {
RTC_DCHECK(data_dumper);
RTC_DCHECK_LT(0, window_size_sub_blocks);
@@ -340,10 +342,14 @@ void MatchedFilter::Reset() {
}
void MatchedFilter::Update(const DownsampledRenderBuffer& render_buffer,
- rtc::ArrayView<const float> capture) {
+ rtc::ArrayView<const float> capture,
+ bool use_slow_smoothing) {
RTC_DCHECK_EQ(sub_block_size_, capture.size());
auto& y = capture;
+ const float smoothing =
+ use_slow_smoothing ? smoothing_slow_ : smoothing_fast_;
+
const float x2_sum_threshold =
filters_[0].size() * excitation_limit_ * excitation_limit_;
@@ -360,25 +366,25 @@ void MatchedFilter::Update(const DownsampledRenderBuffer& render_buffer,
switch (optimization_) {
#if defined(WEBRTC_ARCH_X86_FAMILY)
case Aec3Optimization::kSse2:
- aec3::MatchedFilterCore_SSE2(x_start_index, x2_sum_threshold,
- smoothing_, render_buffer.buffer, y,
- filters_[n], &filters_updated, &error_sum);
+ aec3::MatchedFilterCore_SSE2(x_start_index, x2_sum_threshold, smoothing,
+ render_buffer.buffer, y, filters_[n],
+ &filters_updated, &error_sum);
break;
case Aec3Optimization::kAvx2:
- aec3::MatchedFilterCore_AVX2(x_start_index, x2_sum_threshold,
- smoothing_, render_buffer.buffer, y,
- filters_[n], &filters_updated, &error_sum);
+ aec3::MatchedFilterCore_AVX2(x_start_index, x2_sum_threshold, smoothing,
+ render_buffer.buffer, y, filters_[n],
+ &filters_updated, &error_sum);
break;
#endif
#if defined(WEBRTC_HAS_NEON)
case Aec3Optimization::kNeon:
- aec3::MatchedFilterCore_NEON(x_start_index, x2_sum_threshold,
- smoothing_, render_buffer.buffer, y,
- filters_[n], &filters_updated, &error_sum);
+ aec3::MatchedFilterCore_NEON(x_start_index, x2_sum_threshold, smoothing,
+ render_buffer.buffer, y, filters_[n],
+ &filters_updated, &error_sum);
break;
#endif
default:
- aec3::MatchedFilterCore(x_start_index, x2_sum_threshold, smoothing_,
+ aec3::MatchedFilterCore(x_start_index, x2_sum_threshold, smoothing,
render_buffer.buffer, y, filters_[n],
&filters_updated, &error_sum);
}
diff --git a/modules/audio_processing/aec3/matched_filter.h b/modules/audio_processing/aec3/matched_filter.h
index fa44eb27fd..c6410ab4ee 100644
--- a/modules/audio_processing/aec3/matched_filter.h
+++ b/modules/audio_processing/aec3/matched_filter.h
@@ -100,7 +100,8 @@ class MatchedFilter {
int num_matched_filters,
size_t alignment_shift_sub_blocks,
float excitation_limit,
- float smoothing,
+ float smoothing_fast,
+ float smoothing_slow,
float matching_filter_threshold);
MatchedFilter() = delete;
@@ -111,7 +112,8 @@ class MatchedFilter {
// Updates the correlation with the values in the capture buffer.
void Update(const DownsampledRenderBuffer& render_buffer,
- rtc::ArrayView<const float> capture);
+ rtc::ArrayView<const float> capture,
+ bool use_slow_smoothing);
// Resets the matched filter.
void Reset();
@@ -140,7 +142,8 @@ class MatchedFilter {
std::vector<LagEstimate> lag_estimates_;
std::vector<size_t> filters_offsets_;
const float excitation_limit_;
- const float smoothing_;
+ const float smoothing_fast_;
+ const float smoothing_slow_;
const float matching_filter_threshold_;
};
diff --git a/modules/audio_processing/aec3/matched_filter_lag_aggregator.h b/modules/audio_processing/aec3/matched_filter_lag_aggregator.h
index d48011e477..612bd5d942 100644
--- a/modules/audio_processing/aec3/matched_filter_lag_aggregator.h
+++ b/modules/audio_processing/aec3/matched_filter_lag_aggregator.h
@@ -45,6 +45,9 @@ class MatchedFilterLagAggregator {
absl::optional<DelayEstimate> Aggregate(
rtc::ArrayView<const MatchedFilter::LagEstimate> lag_estimates);
+ // Returns whether a reliable delay estimate has been found.
+ bool ReliableDelayFound() const { return significant_candidate_found_; }
+
private:
ApmDataDumper* const data_dumper_;
std::vector<int> histogram_;
diff --git a/modules/audio_processing/aec3/matched_filter_unittest.cc b/modules/audio_processing/aec3/matched_filter_unittest.cc
index 137275fd74..37b51fa624 100644
--- a/modules/audio_processing/aec3/matched_filter_unittest.cc
+++ b/modules/audio_processing/aec3/matched_filter_unittest.cc
@@ -206,6 +206,7 @@ TEST(MatchedFilter, LagEstimation) {
kWindowSizeSubBlocks, kNumMatchedFilters,
kAlignmentShiftSubBlocks, 150,
config.delay.delay_estimate_smoothing,
+ config.delay.delay_estimate_smoothing_delay_found,
config.delay.delay_candidate_detection_threshold);
std::unique_ptr<RenderDelayBuffer> render_delay_buffer(
@@ -231,7 +232,7 @@ TEST(MatchedFilter, LagEstimation) {
downsampled_capture_data.data(), sub_block_size);
capture_decimator.Decimate(capture[0], downsampled_capture);
filter.Update(render_delay_buffer->GetDownsampledRenderBuffer(),
- downsampled_capture);
+ downsampled_capture, false);
}
// Obtain the lag estimates.
@@ -318,6 +319,7 @@ TEST(MatchedFilter, LagNotReliableForUncorrelatedRenderAndCapture) {
kWindowSizeSubBlocks, kNumMatchedFilters,
kAlignmentShiftSubBlocks, 150,
config.delay.delay_estimate_smoothing,
+ config.delay.delay_estimate_smoothing_delay_found,
config.delay.delay_candidate_detection_threshold);
// Analyze the correlation between render and capture.
@@ -325,7 +327,8 @@ TEST(MatchedFilter, LagNotReliableForUncorrelatedRenderAndCapture) {
RandomizeSampleVector(&random_generator, render[0][0]);
RandomizeSampleVector(&random_generator, capture);
render_delay_buffer->Insert(render);
- filter.Update(render_delay_buffer->GetDownsampledRenderBuffer(), capture);
+ filter.Update(render_delay_buffer->GetDownsampledRenderBuffer(), capture,
+ false);
}
// Obtain the lag estimates.
@@ -361,6 +364,7 @@ TEST(MatchedFilter, LagNotUpdatedForLowLevelRender) {
kWindowSizeSubBlocks, kNumMatchedFilters,
kAlignmentShiftSubBlocks, 150,
config.delay.delay_estimate_smoothing,
+ config.delay.delay_estimate_smoothing_delay_found,
config.delay.delay_candidate_detection_threshold);
std::unique_ptr<RenderDelayBuffer> render_delay_buffer(
RenderDelayBuffer::Create(EchoCanceller3Config(), kSampleRateHz,
@@ -379,7 +383,7 @@ TEST(MatchedFilter, LagNotUpdatedForLowLevelRender) {
sub_block_size);
capture_decimator.Decimate(capture[0], downsampled_capture);
filter.Update(render_delay_buffer->GetDownsampledRenderBuffer(),
- downsampled_capture);
+ downsampled_capture, false);
}
// Obtain the lag estimates.
@@ -407,6 +411,7 @@ TEST(MatchedFilter, NumberOfLagEstimates) {
MatchedFilter filter(&data_dumper, DetectOptimization(), sub_block_size,
32, num_matched_filters, 1, 150,
config.delay.delay_estimate_smoothing,
+ config.delay.delay_estimate_smoothing_delay_found,
config.delay.delay_candidate_detection_threshold);
EXPECT_EQ(num_matched_filters, filter.GetLagEstimates().size());
}
@@ -421,6 +426,7 @@ TEST(MatchedFilterDeathTest, ZeroWindowSize) {
EchoCanceller3Config config;
EXPECT_DEATH(MatchedFilter(&data_dumper, DetectOptimization(), 16, 0, 1, 1,
150, config.delay.delay_estimate_smoothing,
+ config.delay.delay_estimate_smoothing_delay_found,
config.delay.delay_candidate_detection_threshold),
"");
}
@@ -430,6 +436,7 @@ TEST(MatchedFilterDeathTest, NullDataDumper) {
EchoCanceller3Config config;
EXPECT_DEATH(MatchedFilter(nullptr, DetectOptimization(), 16, 1, 1, 1, 150,
config.delay.delay_estimate_smoothing,
+ config.delay.delay_estimate_smoothing_delay_found,
config.delay.delay_candidate_detection_threshold),
"");
}
@@ -441,6 +448,7 @@ TEST(MatchedFilterDeathTest, DISABLED_BlockSizeMultipleOf4) {
EchoCanceller3Config config;
EXPECT_DEATH(MatchedFilter(&data_dumper, DetectOptimization(), 15, 1, 1, 1,
150, config.delay.delay_estimate_smoothing,
+ config.delay.delay_estimate_smoothing_delay_found,
config.delay.delay_candidate_detection_threshold),
"");
}
@@ -453,6 +461,7 @@ TEST(MatchedFilterDeathTest, DISABLED_SubBlockSizeAddsUpToBlockSize) {
EchoCanceller3Config config;
EXPECT_DEATH(MatchedFilter(&data_dumper, DetectOptimization(), 12, 1, 1, 1,
150, config.delay.delay_estimate_smoothing,
+ config.delay.delay_estimate_smoothing_delay_found,
config.delay.delay_candidate_detection_threshold),
"");
}
diff --git a/modules/audio_processing/aec3/residual_echo_estimator.cc b/modules/audio_processing/aec3/residual_echo_estimator.cc
index 0688429d47..15bebecb5f 100644
--- a/modules/audio_processing/aec3/residual_echo_estimator.cc
+++ b/modules/audio_processing/aec3/residual_echo_estimator.cc
@@ -177,7 +177,8 @@ void ResidualEchoEstimator::Estimate(
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> S2_linear,
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> Y2,
bool dominant_nearend,
- rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>> R2) {
+ rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>> R2,
+ rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>> R2_unbounded) {
RTC_DCHECK_EQ(R2.size(), Y2.size());
RTC_DCHECK_EQ(R2.size(), S2_linear.size());
@@ -193,14 +194,18 @@ void ResidualEchoEstimator::Estimate(
if (aec_state.SaturatedEcho()) {
for (size_t ch = 0; ch < num_capture_channels; ++ch) {
std::copy(Y2[ch].begin(), Y2[ch].end(), R2[ch].begin());
+ std::copy(Y2[ch].begin(), Y2[ch].end(), R2_unbounded[ch].begin());
}
} else {
const bool onset_compensated =
erle_onset_compensation_in_dominant_nearend_ || !dominant_nearend;
LinearEstimate(S2_linear, aec_state.Erle(onset_compensated), R2);
+ LinearEstimate(S2_linear, aec_state.ErleUnbounded(), R2_unbounded);
}
- AddReverb(ReverbType::kLinear, aec_state, render_buffer, R2);
+ UpdateReverb(ReverbType::kLinear, aec_state, render_buffer);
+ AddReverb(R2);
+ AddReverb(R2_unbounded);
} else {
const float echo_path_gain =
GetEchoPathGain(aec_state, /*gain_for_early_reflections=*/true);
@@ -210,6 +215,7 @@ void ResidualEchoEstimator::Estimate(
if (aec_state.SaturatedEcho()) {
for (size_t ch = 0; ch < num_capture_channels; ++ch) {
std::copy(Y2[ch].begin(), Y2[ch].end(), R2[ch].begin());
+ std::copy(Y2[ch].begin(), Y2[ch].end(), R2_unbounded[ch].begin());
}
} else {
// Estimate the echo generating signal power.
@@ -229,11 +235,14 @@ void ResidualEchoEstimator::Estimate(
}
NonLinearEstimate(echo_path_gain, X2, R2);
+ NonLinearEstimate(echo_path_gain, X2, R2_unbounded);
}
if (config_.echo_model.model_reverb_in_nonlinear_mode &&
!aec_state.TransparentModeActive()) {
- AddReverb(ReverbType::kNonLinear, aec_state, render_buffer, R2);
+ UpdateReverb(ReverbType::kNonLinear, aec_state, render_buffer);
+ AddReverb(R2);
+ AddReverb(R2_unbounded);
}
}
@@ -244,6 +253,7 @@ void ResidualEchoEstimator::Estimate(
for (size_t ch = 0; ch < num_capture_channels; ++ch) {
for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
R2[ch][k] *= residual_scaling[k];
+ R2_unbounded[ch][k] *= residual_scaling[k];
}
}
}
@@ -292,14 +302,10 @@ void ResidualEchoEstimator::UpdateRenderNoisePower(
}
}
-// Adds the estimated power of the reverb to the residual echo power.
-void ResidualEchoEstimator::AddReverb(
- ReverbType reverb_type,
- const AecState& aec_state,
- const RenderBuffer& render_buffer,
- rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>> R2) {
- const size_t num_capture_channels = R2.size();
-
+// Updates the reverb estimation.
+void ResidualEchoEstimator::UpdateReverb(ReverbType reverb_type,
+ const AecState& aec_state,
+ const RenderBuffer& render_buffer) {
// Choose reverb partition based on what type of echo power model is used.
const size_t first_reverb_partition =
reverb_type == ReverbType::kLinear
@@ -334,6 +340,11 @@ void ResidualEchoEstimator::AddReverb(
echo_reverb_.UpdateReverbNoFreqShaping(render_power, echo_path_gain,
aec_state.ReverbDecay());
}
+}
+// Adds the estimated power of the reverb to the residual echo power.
+void ResidualEchoEstimator::AddReverb(
+ rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>> R2) const {
+ const size_t num_capture_channels = R2.size();
// Add the reverb power.
rtc::ArrayView<const float, kFftLengthBy2Plus1> reverb_power =
diff --git a/modules/audio_processing/aec3/residual_echo_estimator.h b/modules/audio_processing/aec3/residual_echo_estimator.h
index 9e977766cb..c071854c4a 100644
--- a/modules/audio_processing/aec3/residual_echo_estimator.h
+++ b/modules/audio_processing/aec3/residual_echo_estimator.h
@@ -40,7 +40,8 @@ class ResidualEchoEstimator {
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> S2_linear,
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> Y2,
bool dominant_nearend,
- rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>> R2);
+ rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>> R2,
+ rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>> R2_unbounded);
private:
enum class ReverbType { kLinear, kNonLinear };
@@ -52,12 +53,15 @@ class ResidualEchoEstimator {
// render signal.
void UpdateRenderNoisePower(const RenderBuffer& render_buffer);
+ // Updates the reverb estimation.
+ void UpdateReverb(ReverbType reverb_type,
+ const AecState& aec_state,
+ const RenderBuffer& render_buffer);
+
// Adds the estimated unmodelled echo power to the residual echo power
// estimate.
- void AddReverb(ReverbType reverb_type,
- const AecState& aec_state,
- const RenderBuffer& render_buffer,
- rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>> R2);
+ void AddReverb(
+ rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>> R2) const;
// Gets the echo path gain to apply.
float GetEchoPathGain(const AecState& aec_state,
diff --git a/modules/audio_processing/aec3/residual_echo_estimator_unittest.cc b/modules/audio_processing/aec3/residual_echo_estimator_unittest.cc
index e80838b5f6..3d760b7dda 100644
--- a/modules/audio_processing/aec3/residual_echo_estimator_unittest.cc
+++ b/modules/audio_processing/aec3/residual_echo_estimator_unittest.cc
@@ -48,6 +48,8 @@ TEST_P(ResidualEchoEstimatorMultiChannel, BasicTest) {
num_capture_channels);
std::vector<std::array<float, kFftLengthBy2Plus1>> Y2(num_capture_channels);
std::vector<std::array<float, kFftLengthBy2Plus1>> R2(num_capture_channels);
+ std::vector<std::array<float, kFftLengthBy2Plus1>> R2_unbounded(
+ num_capture_channels);
std::vector<std::vector<std::vector<float>>> x(
kNumBands, std::vector<std::vector<float>>(
num_render_channels, std::vector<float>(kBlockSize, 0.f)));
@@ -100,7 +102,8 @@ TEST_P(ResidualEchoEstimatorMultiChannel, BasicTest) {
output);
estimator.Estimate(aec_state, *render_delay_buffer->GetRenderBuffer(),
- S2_linear, Y2, /*dominant_nearend=*/false, R2);
+ S2_linear, Y2, /*dominant_nearend=*/false, R2,
+ R2_unbounded);
}
}
diff --git a/modules/audio_processing/aec3/reverb_model_estimator.cc b/modules/audio_processing/aec3/reverb_model_estimator.cc
index 717431103f..00ae466409 100644
--- a/modules/audio_processing/aec3/reverb_model_estimator.cc
+++ b/modules/audio_processing/aec3/reverb_model_estimator.cc
@@ -9,6 +9,7 @@
*/
#include "modules/audio_processing/aec3/reverb_model_estimator.h"
+#include <memory>
namespace webrtc {
diff --git a/modules/audio_processing/aec3/reverb_model_estimator.h b/modules/audio_processing/aec3/reverb_model_estimator.h
index 3b9971abae..e4e9540673 100644
--- a/modules/audio_processing/aec3/reverb_model_estimator.h
+++ b/modules/audio_processing/aec3/reverb_model_estimator.h
@@ -12,6 +12,7 @@
#define MODULES_AUDIO_PROCESSING_AEC3_REVERB_MODEL_ESTIMATOR_H_
#include <array>
+#include <memory>
#include <vector>
#include "absl/types/optional.h"
diff --git a/modules/audio_processing/aec3/subband_erle_estimator.cc b/modules/audio_processing/aec3/subband_erle_estimator.cc
index 1e957f23ac..dc7f92fd99 100644
--- a/modules/audio_processing/aec3/subband_erle_estimator.cc
+++ b/modules/audio_processing/aec3/subband_erle_estimator.cc
@@ -49,6 +49,7 @@ SubbandErleEstimator::SubbandErleEstimator(const EchoCanceller3Config& config,
accum_spectra_(num_capture_channels),
erle_(num_capture_channels),
erle_onset_compensated_(num_capture_channels),
+ erle_unbounded_(num_capture_channels),
erle_during_onsets_(num_capture_channels),
coming_onset_(num_capture_channels),
hold_counters_(num_capture_channels) {
@@ -62,6 +63,7 @@ void SubbandErleEstimator::Reset() {
for (size_t ch = 0; ch < num_capture_channels; ++ch) {
erle_[ch].fill(min_erle_);
erle_onset_compensated_[ch].fill(min_erle_);
+ erle_unbounded_[ch].fill(min_erle_);
erle_during_onsets_[ch].fill(min_erle_);
coming_onset_[ch].fill(true);
hold_counters_[ch].fill(0);
@@ -90,6 +92,10 @@ void SubbandErleEstimator::Update(
auto& erle_oc = erle_onset_compensated_[ch];
erle_oc[0] = erle_oc[1];
erle_oc[kFftLengthBy2] = erle_oc[kFftLengthBy2 - 1];
+
+ auto& erle_u = erle_unbounded_[ch];
+ erle_u[0] = erle_u[1];
+ erle_u[kFftLengthBy2] = erle_u[kFftLengthBy2 - 1];
}
}
@@ -163,6 +169,11 @@ void SubbandErleEstimator::UpdateBands(
update_erle_band(erle_onset_compensated_[ch][k], new_erle[k],
low_render_energy, min_erle_, max_erle_[k]);
}
+
+ // Virtually unbounded ERLE.
+ constexpr float kUnboundedErleMax = 100000.0f;
+ update_erle_band(erle_unbounded_[ch][k], new_erle[k], low_render_energy,
+ min_erle_, kUnboundedErleMax);
}
}
}
diff --git a/modules/audio_processing/aec3/subband_erle_estimator.h b/modules/audio_processing/aec3/subband_erle_estimator.h
index ffed6a57a5..8bf9c4d645 100644
--- a/modules/audio_processing/aec3/subband_erle_estimator.h
+++ b/modules/audio_processing/aec3/subband_erle_estimator.h
@@ -47,6 +47,12 @@ class SubbandErleEstimator {
: erle_;
}
+ // Returns the non-capped ERLE estimate.
+ rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> ErleUnbounded()
+ const {
+ return erle_unbounded_;
+ }
+
// Returns the ERLE estimate at onsets (only used for testing).
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> ErleDuringOnsets()
const {
@@ -88,6 +94,7 @@ class SubbandErleEstimator {
std::vector<std::array<float, kFftLengthBy2Plus1>> erle_;
// ERLE lowered during render onsets.
std::vector<std::array<float, kFftLengthBy2Plus1>> erle_onset_compensated_;
+ std::vector<std::array<float, kFftLengthBy2Plus1>> erle_unbounded_;
// Estimation of ERLE during render onsets.
std::vector<std::array<float, kFftLengthBy2Plus1>> erle_during_onsets_;
std::vector<std::array<bool, kFftLengthBy2Plus1>> coming_onset_;
diff --git a/modules/audio_processing/aec3/subtractor.cc b/modules/audio_processing/aec3/subtractor.cc
index d10e4ffc52..2eae686752 100644
--- a/modules/audio_processing/aec3/subtractor.cc
+++ b/modules/audio_processing/aec3/subtractor.cc
@@ -91,7 +91,20 @@ Subtractor::Subtractor(const EchoCanceller3Config& config,
std::vector<float>(GetTimeDomainLength(std::max(
config_.filter.refined_initial.length_blocks,
config_.filter.refined.length_blocks)),
- 0.f)) {
+ 0.f)),
+ coarse_impulse_responses_(0) {
+ // Set up the storing of coarse impulse responses if data dumping is
+ // available.
+ if (ApmDataDumper::IsAvailable()) {
+ coarse_impulse_responses_.resize(num_capture_channels_);
+ const size_t filter_size = GetTimeDomainLength(
+ std::max(config_.filter.coarse_initial.length_blocks,
+ config_.filter.coarse.length_blocks));
+ for (std::vector<float>& impulse_response : coarse_impulse_responses_) {
+ impulse_response.resize(filter_size, 0.f);
+ }
+ }
+
for (size_t ch = 0; ch < num_capture_channels_; ++ch) {
refined_filters_[ch] = std::make_unique<AdaptiveFirFilter>(
config_.filter.refined.length_blocks,
@@ -285,7 +298,14 @@ void Subtractor::Process(const RenderBuffer& render_buffer,
config_.filter.coarse_reset_hangover_blocks;
}
- coarse_filter_[ch]->Adapt(render_buffer, G);
+ if (ApmDataDumper::IsAvailable()) {
+ RTC_DCHECK_LT(ch, coarse_impulse_responses_.size());
+ coarse_filter_[ch]->Adapt(render_buffer, G,
+ &coarse_impulse_responses_[ch]);
+ } else {
+ coarse_filter_[ch]->Adapt(render_buffer, G);
+ }
+
if (ch == 0) {
data_dumper_->DumpRaw("aec3_subtractor_G_coarse", G.re);
data_dumper_->DumpRaw("aec3_subtractor_G_coarse", G.im);
diff --git a/modules/audio_processing/aec3/subtractor.h b/modules/audio_processing/aec3/subtractor.h
index 560f6568eb..767e4aad46 100644
--- a/modules/audio_processing/aec3/subtractor.h
+++ b/modules/audio_processing/aec3/subtractor.h
@@ -78,6 +78,15 @@ class Subtractor {
refined_impulse_responses_[0].data(),
GetTimeDomainLength(
refined_filters_[0]->max_filter_size_partitions())));
+ if (ApmDataDumper::IsAvailable()) {
+ RTC_DCHECK_GT(coarse_impulse_responses_.size(), 0);
+ data_dumper_->DumpRaw(
+ "aec3_subtractor_h_coarse",
+ rtc::ArrayView<const float>(
+ coarse_impulse_responses_[0].data(),
+ GetTimeDomainLength(
+ coarse_filter_[0]->max_filter_size_partitions())));
+ }
refined_filters_[0]->DumpFilter("aec3_subtractor_H_refined");
coarse_filter_[0]->DumpFilter("aec3_subtractor_H_coarse");
@@ -132,6 +141,7 @@ class Subtractor {
std::vector<std::vector<std::array<float, kFftLengthBy2Plus1>>>
refined_frequency_responses_;
std::vector<std::vector<float>> refined_impulse_responses_;
+ std::vector<std::vector<float>> coarse_impulse_responses_;
};
} // namespace webrtc
diff --git a/modules/audio_processing/aec3/suppression_gain.cc b/modules/audio_processing/aec3/suppression_gain.cc
index 5b01c52908..6405d71c2d 100644
--- a/modules/audio_processing/aec3/suppression_gain.cc
+++ b/modules/audio_processing/aec3/suppression_gain.cc
@@ -23,10 +23,15 @@
#include "modules/audio_processing/logging/apm_data_dumper.h"
#include "rtc_base/atomic_ops.h"
#include "rtc_base/checks.h"
+#include "system_wrappers/include/field_trial.h"
namespace webrtc {
namespace {
+bool UseUnboundedEchoSpectrum() {
+ return field_trial::IsEnabled("WebRTC-Aec3UseUnboundedEchoSpectrum");
+}
+
void LimitLowFrequencyGains(std::array<float, kFftLengthBy2Plus1>* gain) {
// Limit the low frequency gains to avoid the impact of the high-pass filter
// on the lower-frequency gain influencing the overall achieved gain.
@@ -230,16 +235,20 @@ void SuppressionGain::GetMinGain(
min_gain[k] = std::min(min_gain[k], 1.f);
}
- const bool is_nearend_state = dominant_nearend_detector_->IsNearendState();
- for (size_t k = 0; k < 6; ++k) {
- const auto& dec = is_nearend_state ? nearend_params_.max_dec_factor_lf
- : normal_params_.max_dec_factor_lf;
-
- // Make sure the gains of the low frequencies do not decrease too
- // quickly after strong nearend.
- if (last_nearend[k] > last_echo[k]) {
- min_gain[k] = std::max(min_gain[k], last_gain_[k] * dec);
- min_gain[k] = std::min(min_gain[k], 1.f);
+ if (!initial_state_ ||
+ config_.suppressor.lf_smoothing_during_initial_phase) {
+ const float& dec = dominant_nearend_detector_->IsNearendState()
+ ? nearend_params_.max_dec_factor_lf
+ : normal_params_.max_dec_factor_lf;
+
+ for (int k = 0; k <= config_.suppressor.last_lf_smoothing_band; ++k) {
+ // Make sure the gains of the low frequencies do not decrease too
+ // quickly after strong nearend.
+ if (last_nearend[k] > last_echo[k] ||
+ k <= config_.suppressor.last_permanent_lf_smoothing_band) {
+ min_gain[k] = std::max(min_gain[k], last_gain_[k] * dec);
+ min_gain[k] = std::min(min_gain[k], 1.f);
+ }
}
}
} else {
@@ -333,8 +342,13 @@ SuppressionGain::SuppressionGain(const EchoCanceller3Config& config,
num_capture_channels_,
aec3::MovingAverage(kFftLengthBy2Plus1,
config.suppressor.nearend_average_blocks)),
- nearend_params_(config_.suppressor.nearend_tuning),
- normal_params_(config_.suppressor.normal_tuning) {
+ nearend_params_(config_.suppressor.last_lf_band,
+ config_.suppressor.first_hf_band,
+ config_.suppressor.nearend_tuning),
+ normal_params_(config_.suppressor.last_lf_band,
+ config_.suppressor.first_hf_band,
+ config_.suppressor.normal_tuning),
+ use_unbounded_echo_spectrum_(UseUnboundedEchoSpectrum()) {
RTC_DCHECK_LT(0, state_change_duration_blocks_);
last_gain_.fill(1.f);
if (config_.suppressor.use_subband_nearend_detection) {
@@ -356,6 +370,8 @@ void SuppressionGain::GetGain(
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
residual_echo_spectrum,
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
+ residual_echo_spectrum_unbounded,
+ rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
comfort_noise_spectrum,
const RenderSignalAnalyzer& render_signal_analyzer,
const AecState& aec_state,
@@ -366,8 +382,13 @@ void SuppressionGain::GetGain(
RTC_DCHECK(high_bands_gain);
RTC_DCHECK(low_band_gain);
+ // Choose residual echo spectrum for the dominant nearend detector.
+ const auto echo = use_unbounded_echo_spectrum_
+ ? residual_echo_spectrum_unbounded
+ : residual_echo_spectrum;
+
// Update the nearend state selection.
- dominant_nearend_detector_->Update(nearend_spectrum, residual_echo_spectrum,
+ dominant_nearend_detector_->Update(nearend_spectrum, echo,
comfort_noise_spectrum, initial_state_);
// Compute gain for the lower band.
@@ -383,6 +404,9 @@ void SuppressionGain::GetGain(
*high_bands_gain =
UpperBandsGain(echo_spectrum, comfort_noise_spectrum, narrow_peak_band,
aec_state.SaturatedEcho(), render, *low_band_gain);
+
+ data_dumper_->DumpRaw("aec3_dominant_nearend",
+ dominant_nearend_detector_->IsNearendState());
}
void SuppressionGain::SetInitialState(bool state) {
@@ -419,23 +443,23 @@ bool SuppressionGain::LowNoiseRenderDetector::Detect(
}
SuppressionGain::GainParameters::GainParameters(
+ int last_lf_band,
+ int first_hf_band,
const EchoCanceller3Config::Suppressor::Tuning& tuning)
: max_inc_factor(tuning.max_inc_factor),
max_dec_factor_lf(tuning.max_dec_factor_lf) {
// Compute per-band masking thresholds.
- constexpr size_t kLastLfBand = 5;
- constexpr size_t kFirstHfBand = 8;
- RTC_DCHECK_LT(kLastLfBand, kFirstHfBand);
+ RTC_DCHECK_LT(last_lf_band, first_hf_band);
auto& lf = tuning.mask_lf;
auto& hf = tuning.mask_hf;
RTC_DCHECK_LT(lf.enr_transparent, lf.enr_suppress);
RTC_DCHECK_LT(hf.enr_transparent, hf.enr_suppress);
- for (size_t k = 0; k < kFftLengthBy2Plus1; k++) {
+ for (int k = 0; k < static_cast<int>(kFftLengthBy2Plus1); k++) {
float a;
- if (k <= kLastLfBand) {
+ if (k <= last_lf_band) {
a = 0.f;
- } else if (k < kFirstHfBand) {
- a = (k - kLastLfBand) / static_cast<float>(kFirstHfBand - kLastLfBand);
+ } else if (k < first_hf_band) {
+ a = (k - last_lf_band) / static_cast<float>(first_hf_band - last_lf_band);
} else {
a = 1.f;
}
diff --git a/modules/audio_processing/aec3/suppression_gain.h b/modules/audio_processing/aec3/suppression_gain.h
index d049baeaaf..7c4a1c9f7d 100644
--- a/modules/audio_processing/aec3/suppression_gain.h
+++ b/modules/audio_processing/aec3/suppression_gain.h
@@ -43,6 +43,8 @@ class SuppressionGain {
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
residual_echo_spectrum,
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
+ residual_echo_spectrum_unbounded,
+ rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
comfort_noise_spectrum,
const RenderSignalAnalyzer& render_signal_analyzer,
const AecState& aec_state,
@@ -103,6 +105,8 @@ class SuppressionGain {
struct GainParameters {
explicit GainParameters(
+ int last_lf_band,
+ int first_hf_band,
const EchoCanceller3Config::Suppressor::Tuning& tuning);
const float max_inc_factor;
const float max_dec_factor_lf;
@@ -126,6 +130,9 @@ class SuppressionGain {
std::vector<aec3::MovingAverage> nearend_smoothers_;
const GainParameters nearend_params_;
const GainParameters normal_params_;
+ // Determines if the dominant nearend detector uses the unbounded residual
+ // echo spectrum.
+ const bool use_unbounded_echo_spectrum_;
std::unique_ptr<NearendDetector> dominant_nearend_detector_;
RTC_DISALLOW_COPY_AND_ASSIGN(SuppressionGain);
diff --git a/modules/audio_processing/aec3/suppression_gain_unittest.cc b/modules/audio_processing/aec3/suppression_gain_unittest.cc
index 26bfc24ebb..999b0f27ab 100644
--- a/modules/audio_processing/aec3/suppression_gain_unittest.cc
+++ b/modules/audio_processing/aec3/suppression_gain_unittest.cc
@@ -26,29 +26,30 @@ namespace aec3 {
// Verifies that the check for non-null output gains works.
TEST(SuppressionGainDeathTest, NullOutputGains) {
- std::vector<std::array<float, kFftLengthBy2Plus1>> E2(1, {0.f});
- std::vector<std::array<float, kFftLengthBy2Plus1>> R2(1, {0.f});
+ std::vector<std::array<float, kFftLengthBy2Plus1>> E2(1, {0.0f});
+ std::vector<std::array<float, kFftLengthBy2Plus1>> R2(1, {0.0f});
+ std::vector<std::array<float, kFftLengthBy2Plus1>> R2_unbounded(1, {0.0f});
std::vector<std::array<float, kFftLengthBy2Plus1>> S2(1);
- std::vector<std::array<float, kFftLengthBy2Plus1>> N2(1, {0.f});
+ std::vector<std::array<float, kFftLengthBy2Plus1>> N2(1, {0.0f});
for (auto& S2_k : S2) {
- S2_k.fill(.1f);
+ S2_k.fill(0.1f);
}
FftData E;
FftData Y;
- E.re.fill(0.f);
- E.im.fill(0.f);
- Y.re.fill(0.f);
- Y.im.fill(0.f);
+ E.re.fill(0.0f);
+ E.im.fill(0.0f);
+ Y.re.fill(0.0f);
+ Y.im.fill(0.0f);
float high_bands_gain;
AecState aec_state(EchoCanceller3Config{}, 1);
EXPECT_DEATH(
SuppressionGain(EchoCanceller3Config{}, DetectOptimization(), 16000, 1)
- .GetGain(E2, S2, R2, N2,
+ .GetGain(E2, S2, R2, R2_unbounded, N2,
RenderSignalAnalyzer((EchoCanceller3Config{})), aec_state,
std::vector<std::vector<std::vector<float>>>(
3, std::vector<std::vector<float>>(
- 1, std::vector<float>(kBlockSize, 0.f))),
+ 1, std::vector<float>(kBlockSize, 0.0f))),
false, &high_bands_gain, nullptr),
"");
}
@@ -67,15 +68,17 @@ TEST(SuppressionGain, BasicGainComputation) {
float high_bands_gain;
std::vector<std::array<float, kFftLengthBy2Plus1>> E2(kNumCaptureChannels);
std::vector<std::array<float, kFftLengthBy2Plus1>> S2(kNumCaptureChannels,
- {0.f});
+ {0.0f});
std::vector<std::array<float, kFftLengthBy2Plus1>> Y2(kNumCaptureChannels);
std::vector<std::array<float, kFftLengthBy2Plus1>> R2(kNumCaptureChannels);
+ std::vector<std::array<float, kFftLengthBy2Plus1>> R2_unbounded(
+ kNumCaptureChannels);
std::vector<std::array<float, kFftLengthBy2Plus1>> N2(kNumCaptureChannels);
std::array<float, kFftLengthBy2Plus1> g;
std::vector<SubtractorOutput> output(kNumCaptureChannels);
std::vector<std::vector<std::vector<float>>> x(
kNumBands, std::vector<std::vector<float>>(
- kNumRenderChannels, std::vector<float>(kBlockSize, 0.f)));
+ kNumRenderChannels, std::vector<float>(kBlockSize, 0.0f)));
EchoCanceller3Config config;
AecState aec_state(config, kNumCaptureChannels);
ApmDataDumper data_dumper(42);
@@ -89,8 +92,9 @@ TEST(SuppressionGain, BasicGainComputation) {
for (size_t ch = 0; ch < kNumCaptureChannels; ++ch) {
E2[ch].fill(10.f);
Y2[ch].fill(10.f);
- R2[ch].fill(.1f);
- N2[ch].fill(100.f);
+ R2[ch].fill(0.1f);
+ R2_unbounded[ch].fill(0.1f);
+ N2[ch].fill(100.0f);
}
for (auto& subtractor_output : output) {
subtractor_output.Reset();
@@ -107,17 +111,18 @@ TEST(SuppressionGain, BasicGainComputation) {
aec_state.Update(delay_estimate, subtractor.FilterFrequencyResponses(),
subtractor.FilterImpulseResponses(),
*render_delay_buffer->GetRenderBuffer(), E2, Y2, output);
- suppression_gain.GetGain(E2, S2, R2, N2, analyzer, aec_state, x, false,
- &high_bands_gain, &g);
+ suppression_gain.GetGain(E2, S2, R2, R2_unbounded, N2, analyzer, aec_state,
+ x, false, &high_bands_gain, &g);
}
std::for_each(g.begin(), g.end(),
- [](float a) { EXPECT_NEAR(1.f, a, 0.001); });
+ [](float a) { EXPECT_NEAR(1.0f, a, 0.001f); });
// Ensure that a strong nearend is detected to mask any echoes.
for (size_t ch = 0; ch < kNumCaptureChannels; ++ch) {
E2[ch].fill(100.f);
Y2[ch].fill(100.f);
R2[ch].fill(0.1f);
+ R2_unbounded[ch].fill(0.1f);
S2[ch].fill(0.1f);
N2[ch].fill(0.f);
}
@@ -126,22 +131,23 @@ TEST(SuppressionGain, BasicGainComputation) {
aec_state.Update(delay_estimate, subtractor.FilterFrequencyResponses(),
subtractor.FilterImpulseResponses(),
*render_delay_buffer->GetRenderBuffer(), E2, Y2, output);
- suppression_gain.GetGain(E2, S2, R2, N2, analyzer, aec_state, x, false,
- &high_bands_gain, &g);
+ suppression_gain.GetGain(E2, S2, R2, R2_unbounded, N2, analyzer, aec_state,
+ x, false, &high_bands_gain, &g);
}
std::for_each(g.begin(), g.end(),
- [](float a) { EXPECT_NEAR(1.f, a, 0.001); });
+ [](float a) { EXPECT_NEAR(1.0f, a, 0.001f); });
// Add a strong echo to one of the channels and ensure that it is suppressed.
- E2[1].fill(1000000000.f);
- R2[1].fill(10000000000000.f);
+ E2[1].fill(1000000000.0f);
+ R2[1].fill(10000000000000.0f);
+ R2_unbounded[1].fill(10000000000000.0f);
for (int k = 0; k < 10; ++k) {
- suppression_gain.GetGain(E2, S2, R2, N2, analyzer, aec_state, x, false,
- &high_bands_gain, &g);
+ suppression_gain.GetGain(E2, S2, R2, R2_unbounded, N2, analyzer, aec_state,
+ x, false, &high_bands_gain, &g);
}
std::for_each(g.begin(), g.end(),
- [](float a) { EXPECT_NEAR(0.f, a, 0.001); });
+ [](float a) { EXPECT_NEAR(0.0f, a, 0.001f); });
}
} // namespace aec3
diff --git a/modules/audio_processing/aec3/transparent_mode.cc b/modules/audio_processing/aec3/transparent_mode.cc
index 7cfa3e8eae..489f53f4f1 100644
--- a/modules/audio_processing/aec3/transparent_mode.cc
+++ b/modules/audio_processing/aec3/transparent_mode.cc
@@ -11,6 +11,7 @@
#include "modules/audio_processing/aec3/transparent_mode.h"
#include "rtc_base/checks.h"
+#include "rtc_base/logging.h"
#include "system_wrappers/include/field_trial.h"
namespace webrtc {
@@ -228,11 +229,14 @@ class LegacyTransparentModeImpl : public TransparentMode {
std::unique_ptr<TransparentMode> TransparentMode::Create(
const EchoCanceller3Config& config) {
if (config.ep_strength.bounded_erl || DeactivateTransparentMode()) {
+ RTC_LOG(LS_INFO) << "AEC3 Transparent Mode: Disabled";
return nullptr;
}
if (ActivateTransparentModeHmm()) {
+ RTC_LOG(LS_INFO) << "AEC3 Transparent Mode: HMM";
return std::make_unique<TransparentModeImpl>();
}
+ RTC_LOG(LS_INFO) << "AEC3 Transparent Mode: Legacy";
return std::make_unique<LegacyTransparentModeImpl>(config);
}
diff --git a/modules/audio_processing/agc/BUILD.gn b/modules/audio_processing/agc/BUILD.gn
index 5ad6644323..4bb8c5494b 100644
--- a/modules/audio_processing/agc/BUILD.gn
+++ b/modules/audio_processing/agc/BUILD.gn
@@ -19,11 +19,14 @@ rtc_library("agc") {
]
configs += [ "..:apm_debug_dump" ]
deps = [
+ ":clipping_predictor",
+ ":clipping_predictor_evaluator",
":gain_control_interface",
":gain_map",
":level_estimation",
"..:apm_logging",
"..:audio_buffer",
+ "..:audio_frame_view",
"../../../common_audio",
"../../../common_audio:common_audio_c",
"../../../rtc_base:checks",
@@ -38,6 +41,49 @@ rtc_library("agc") {
absl_deps = [ "//third_party/abseil-cpp/absl/types:optional" ]
}
+rtc_library("clipping_predictor") {
+ sources = [
+ "clipping_predictor.cc",
+ "clipping_predictor.h",
+ ]
+ deps = [
+ ":clipping_predictor_level_buffer",
+ ":gain_map",
+ "..:api",
+ "..:audio_frame_view",
+ "../../../common_audio",
+ "../../../rtc_base:checks",
+ "../../../rtc_base:logging",
+ "../../../rtc_base:safe_minmax",
+ ]
+ absl_deps = [ "//third_party/abseil-cpp/absl/types:optional" ]
+}
+
+rtc_library("clipping_predictor_evaluator") {
+ sources = [
+ "clipping_predictor_evaluator.cc",
+ "clipping_predictor_evaluator.h",
+ ]
+ deps = [
+ "../../../rtc_base:checks",
+ "../../../rtc_base:logging",
+ ]
+ absl_deps = [ "//third_party/abseil-cpp/absl/types:optional" ]
+}
+
+rtc_library("clipping_predictor_level_buffer") {
+ sources = [
+ "clipping_predictor_level_buffer.cc",
+ "clipping_predictor_level_buffer.h",
+ ]
+ deps = [
+ "../../../rtc_base:checks",
+ "../../../rtc_base:logging",
+ "../../../rtc_base:rtc_base_approved",
+ ]
+ absl_deps = [ "//third_party/abseil-cpp/absl/types:optional" ]
+}
+
rtc_library("level_estimation") {
sources = [
"agc.cc",
@@ -96,6 +142,9 @@ if (rtc_include_tests) {
testonly = true
sources = [
"agc_manager_direct_unittest.cc",
+ "clipping_predictor_evaluator_unittest.cc",
+ "clipping_predictor_level_buffer_unittest.cc",
+ "clipping_predictor_unittest.cc",
"loudness_histogram_unittest.cc",
"mock_agc.h",
]
@@ -103,13 +152,20 @@ if (rtc_include_tests) {
deps = [
":agc",
+ ":clipping_predictor",
+ ":clipping_predictor_evaluator",
+ ":clipping_predictor_level_buffer",
":gain_control_interface",
":level_estimation",
"..:mocks",
+ "../../../rtc_base:checks",
+ "../../../rtc_base:rtc_base_approved",
+ "../../../rtc_base:safe_conversions",
"../../../test:field_trial",
"../../../test:fileutils",
"../../../test:test_support",
"//testing/gtest",
]
+ absl_deps = [ "//third_party/abseil-cpp/absl/types:optional" ]
}
}
diff --git a/modules/audio_processing/agc/agc_manager_direct.cc b/modules/audio_processing/agc/agc_manager_direct.cc
index 2454d1bbb1..817678801e 100644
--- a/modules/audio_processing/agc/agc_manager_direct.cc
+++ b/modules/audio_processing/agc/agc_manager_direct.cc
@@ -16,6 +16,7 @@
#include "common_audio/include/audio_util.h"
#include "modules/audio_processing/agc/gain_control.h"
#include "modules/audio_processing/agc/gain_map_internal.h"
+#include "modules/audio_processing/include/audio_frame_view.h"
#include "rtc_base/atomic_ops.h"
#include "rtc_base/checks.h"
#include "rtc_base/logging.h"
@@ -27,33 +28,33 @@ namespace webrtc {
namespace {
-// Amount the microphone level is lowered with every clipping event.
-const int kClippedLevelStep = 15;
-// Proportion of clipped samples required to declare a clipping event.
-const float kClippedRatioThreshold = 0.1f;
-// Time in frames to wait after a clipping event before checking again.
-const int kClippedWaitFrames = 300;
-
// Amount of error we tolerate in the microphone level (presumably due to OS
// quantization) before we assume the user has manually adjusted the microphone.
-const int kLevelQuantizationSlack = 25;
+constexpr int kLevelQuantizationSlack = 25;
-const int kDefaultCompressionGain = 7;
-const int kMaxCompressionGain = 12;
-const int kMinCompressionGain = 2;
+constexpr int kDefaultCompressionGain = 7;
+constexpr int kMaxCompressionGain = 12;
+constexpr int kMinCompressionGain = 2;
// Controls the rate of compression changes towards the target.
-const float kCompressionGainStep = 0.05f;
+constexpr float kCompressionGainStep = 0.05f;
-const int kMaxMicLevel = 255;
+constexpr int kMaxMicLevel = 255;
static_assert(kGainMapSize > kMaxMicLevel, "gain map too small");
-const int kMinMicLevel = 12;
+constexpr int kMinMicLevel = 12;
// Prevent very large microphone level changes.
-const int kMaxResidualGainChange = 15;
+constexpr int kMaxResidualGainChange = 15;
// Maximum additional gain allowed to compensate for microphone level
// restrictions from clipping events.
-const int kSurplusCompressionGain = 6;
+constexpr int kSurplusCompressionGain = 6;
+
+// History size for the clipping predictor evaluator (unit: number of 10 ms
+// frames).
+constexpr int kClippingPredictorEvaluatorHistorySize = 32;
+
+using ClippingPredictorConfig = AudioProcessing::Config::GainController1::
+ AnalogGainController::ClippingPredictor;
// Returns whether a fall-back solution to choose the maximum level should be
// chosen.
@@ -132,6 +133,33 @@ float ComputeClippedRatio(const float* const* audio,
return static_cast<float>(num_clipped) / (samples_per_channel);
}
+void LogClippingPredictorMetrics(const ClippingPredictorEvaluator& evaluator) {
+ RTC_LOG(LS_INFO) << "Clipping predictor metrics: TP "
+ << evaluator.true_positives() << " TN "
+ << evaluator.true_negatives() << " FP "
+ << evaluator.false_positives() << " FN "
+ << evaluator.false_negatives();
+ const float precision_denominator =
+ evaluator.true_positives() + evaluator.false_positives();
+ const float recall_denominator =
+ evaluator.true_positives() + evaluator.false_negatives();
+ if (precision_denominator > 0 && recall_denominator > 0) {
+ const float precision = evaluator.true_positives() / precision_denominator;
+ const float recall = evaluator.true_positives() / recall_denominator;
+ RTC_LOG(LS_INFO) << "Clipping predictor metrics: P " << precision << " R "
+ << recall;
+ const float f1_score_denominator = precision + recall;
+ if (f1_score_denominator > 0.0f) {
+ const float f1_score = 2 * precision * recall / f1_score_denominator;
+ RTC_LOG(LS_INFO) << "Clipping predictor metrics: F1 " << f1_score;
+ RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.Agc.ClippingPredictor.F1Score",
+ std::round(f1_score * 100.0f), /*min=*/0,
+ /*max=*/100,
+ /*bucket_count=*/50);
+ }
+ }
+}
+
} // namespace
MonoAgc::MonoAgc(ApmDataDumper* data_dumper,
@@ -182,19 +210,19 @@ void MonoAgc::Process(const int16_t* audio,
}
}
-void MonoAgc::HandleClipping() {
+void MonoAgc::HandleClipping(int clipped_level_step) {
// Always decrease the maximum level, even if the current level is below
// threshold.
- SetMaxLevel(std::max(clipped_level_min_, max_level_ - kClippedLevelStep));
+ SetMaxLevel(std::max(clipped_level_min_, max_level_ - clipped_level_step));
if (log_to_histograms_) {
RTC_HISTOGRAM_BOOLEAN("WebRTC.Audio.AgcClippingAdjustmentAllowed",
- level_ - kClippedLevelStep >= clipped_level_min_);
+ level_ - clipped_level_step >= clipped_level_min_);
}
if (level_ > clipped_level_min_) {
// Don't try to adjust the level if we're already below the limit. As
// a consequence, if the user has brought the level above the limit, we
// will still not react until the postproc updates the level.
- SetLevel(std::max(clipped_level_min_, level_ - kClippedLevelStep));
+ SetLevel(std::max(clipped_level_min_, level_ - clipped_level_step));
// Reset the AGCs for all channels since the level has changed.
agc_->Reset();
}
@@ -401,35 +429,58 @@ void MonoAgc::UpdateCompressor() {
int AgcManagerDirect::instance_counter_ = 0;
-AgcManagerDirect::AgcManagerDirect(Agc* agc,
- int startup_min_level,
- int clipped_level_min,
- int sample_rate_hz)
+AgcManagerDirect::AgcManagerDirect(
+ Agc* agc,
+ int startup_min_level,
+ int clipped_level_min,
+ int sample_rate_hz,
+ int clipped_level_step,
+ float clipped_ratio_threshold,
+ int clipped_wait_frames,
+ const ClippingPredictorConfig& clipping_config)
: AgcManagerDirect(/*num_capture_channels*/ 1,
startup_min_level,
clipped_level_min,
/*disable_digital_adaptive*/ false,
- sample_rate_hz) {
+ sample_rate_hz,
+ clipped_level_step,
+ clipped_ratio_threshold,
+ clipped_wait_frames,
+ clipping_config) {
RTC_DCHECK(channel_agcs_[0]);
RTC_DCHECK(agc);
channel_agcs_[0]->set_agc(agc);
}
-AgcManagerDirect::AgcManagerDirect(int num_capture_channels,
- int startup_min_level,
- int clipped_level_min,
- bool disable_digital_adaptive,
- int sample_rate_hz)
+AgcManagerDirect::AgcManagerDirect(
+ int num_capture_channels,
+ int startup_min_level,
+ int clipped_level_min,
+ bool disable_digital_adaptive,
+ int sample_rate_hz,
+ int clipped_level_step,
+ float clipped_ratio_threshold,
+ int clipped_wait_frames,
+ const ClippingPredictorConfig& clipping_config)
: data_dumper_(
new ApmDataDumper(rtc::AtomicOps::Increment(&instance_counter_))),
use_min_channel_level_(!UseMaxAnalogChannelLevel()),
sample_rate_hz_(sample_rate_hz),
num_capture_channels_(num_capture_channels),
disable_digital_adaptive_(disable_digital_adaptive),
- frames_since_clipped_(kClippedWaitFrames),
+ frames_since_clipped_(clipped_wait_frames),
capture_output_used_(true),
+ clipped_level_step_(clipped_level_step),
+ clipped_ratio_threshold_(clipped_ratio_threshold),
+ clipped_wait_frames_(clipped_wait_frames),
channel_agcs_(num_capture_channels),
- new_compressions_to_set_(num_capture_channels) {
+ new_compressions_to_set_(num_capture_channels),
+ clipping_predictor_(
+ CreateClippingPredictor(num_capture_channels, clipping_config)),
+ use_clipping_predictor_step_(!!clipping_predictor_ &&
+ clipping_config.use_predicted_step),
+ clipping_predictor_evaluator_(kClippingPredictorEvaluatorHistorySize),
+ clipping_predictor_log_counter_(0) {
const int min_mic_level = GetMinMicLevel();
for (size_t ch = 0; ch < channel_agcs_.size(); ++ch) {
ApmDataDumper* data_dumper_ch = ch == 0 ? data_dumper_.get() : nullptr;
@@ -438,7 +489,12 @@ AgcManagerDirect::AgcManagerDirect(int num_capture_channels,
data_dumper_ch, startup_min_level, clipped_level_min,
disable_digital_adaptive_, min_mic_level);
}
- RTC_DCHECK_LT(0, channel_agcs_.size());
+ RTC_DCHECK(!channel_agcs_.empty());
+ RTC_DCHECK_GT(clipped_level_step, 0);
+ RTC_DCHECK_LE(clipped_level_step, 255);
+ RTC_DCHECK_GT(clipped_ratio_threshold, 0.f);
+ RTC_DCHECK_LT(clipped_ratio_threshold, 1.f);
+ RTC_DCHECK_GT(clipped_wait_frames, 0);
channel_agcs_[0]->ActivateLogging();
}
@@ -453,6 +509,8 @@ void AgcManagerDirect::Initialize() {
capture_output_used_ = true;
AggregateChannelLevels();
+ clipping_predictor_evaluator_.Reset();
+ clipping_predictor_log_counter_ = 0;
}
void AgcManagerDirect::SetupDigitalGainControl(
@@ -489,7 +547,13 @@ void AgcManagerDirect::AnalyzePreProcess(const float* const* audio,
return;
}
- if (frames_since_clipped_ < kClippedWaitFrames) {
+ if (!!clipping_predictor_) {
+ AudioFrameView<const float> frame = AudioFrameView<const float>(
+ audio, num_capture_channels_, static_cast<int>(samples_per_channel));
+ clipping_predictor_->Analyze(frame);
+ }
+
+ if (frames_since_clipped_ < clipped_wait_frames_) {
++frames_since_clipped_;
return;
}
@@ -505,14 +569,54 @@ void AgcManagerDirect::AnalyzePreProcess(const float* const* audio,
// gain is increased, through SetMaxLevel().
float clipped_ratio =
ComputeClippedRatio(audio, num_capture_channels_, samples_per_channel);
-
- if (clipped_ratio > kClippedRatioThreshold) {
- RTC_DLOG(LS_INFO) << "[agc] Clipping detected. clipped_ratio="
- << clipped_ratio;
+ const bool clipping_detected = clipped_ratio > clipped_ratio_threshold_;
+ bool clipping_predicted = false;
+ int predicted_step = 0;
+ if (!!clipping_predictor_) {
+ for (int channel = 0; channel < num_capture_channels_; ++channel) {
+ const auto step = clipping_predictor_->EstimateClippedLevelStep(
+ channel, stream_analog_level_, clipped_level_step_,
+ channel_agcs_[channel]->min_mic_level(), kMaxMicLevel);
+ if (use_clipping_predictor_step_ && step.has_value()) {
+ predicted_step = std::max(predicted_step, step.value());
+ clipping_predicted = true;
+ }
+ }
+ // Clipping prediction evaluation.
+ absl::optional<int> prediction_interval =
+ clipping_predictor_evaluator_.Observe(clipping_detected,
+ clipping_predicted);
+ if (prediction_interval.has_value()) {
+ RTC_HISTOGRAM_COUNTS_LINEAR(
+ "WebRTC.Audio.Agc.ClippingPredictor.PredictionInterval",
+ prediction_interval.value(), /*min=*/0,
+ /*max=*/49, /*bucket_count=*/50);
+ }
+ constexpr int kNumFramesIn30Seconds = 3000;
+ clipping_predictor_log_counter_++;
+ if (clipping_predictor_log_counter_ == kNumFramesIn30Seconds) {
+ LogClippingPredictorMetrics(clipping_predictor_evaluator_);
+ clipping_predictor_log_counter_ = 0;
+ }
+ }
+ if (clipping_detected || clipping_predicted) {
+ int step = clipped_level_step_;
+ if (clipping_detected) {
+ RTC_DLOG(LS_INFO) << "[agc] Clipping detected. clipped_ratio="
+ << clipped_ratio;
+ }
+ if (clipping_predicted) {
+ step = std::max(predicted_step, clipped_level_step_);
+ RTC_DLOG(LS_INFO) << "[agc] Clipping predicted. step=" << step;
+ }
for (auto& state_ch : channel_agcs_) {
- state_ch->HandleClipping();
+ state_ch->HandleClipping(step);
}
frames_since_clipped_ = 0;
+ if (!!clipping_predictor_) {
+ clipping_predictor_->Reset();
+ clipping_predictor_evaluator_.Reset();
+ }
}
AggregateChannelLevels();
}
diff --git a/modules/audio_processing/agc/agc_manager_direct.h b/modules/audio_processing/agc/agc_manager_direct.h
index f9417cffff..7ac96a661c 100644
--- a/modules/audio_processing/agc/agc_manager_direct.h
+++ b/modules/audio_processing/agc/agc_manager_direct.h
@@ -15,6 +15,8 @@
#include "absl/types/optional.h"
#include "modules/audio_processing/agc/agc.h"
+#include "modules/audio_processing/agc/clipping_predictor.h"
+#include "modules/audio_processing/agc/clipping_predictor_evaluator.h"
#include "modules/audio_processing/audio_buffer.h"
#include "modules/audio_processing/logging/apm_data_dumper.h"
#include "rtc_base/gtest_prod_util.h"
@@ -34,12 +36,23 @@ class AgcManagerDirect final {
// AgcManagerDirect will configure GainControl internally. The user is
// responsible for processing the audio using it after the call to Process.
// The operating range of startup_min_level is [12, 255] and any input value
- // outside that range will be clamped.
- AgcManagerDirect(int num_capture_channels,
- int startup_min_level,
- int clipped_level_min,
- bool disable_digital_adaptive,
- int sample_rate_hz);
+ // outside that range will be clamped. `clipped_level_step` is the amount
+ // the microphone level is lowered with every clipping event, limited to
+ // (0, 255]. `clipped_ratio_threshold` is the proportion of clipped
+ // samples required to declare a clipping event, limited to (0.f, 1.f).
+ // `clipped_wait_frames` is the time in frames to wait after a clipping event
+ // before checking again, limited to values higher than 0.
+ AgcManagerDirect(
+ int num_capture_channels,
+ int startup_min_level,
+ int clipped_level_min,
+ bool disable_digital_adaptive,
+ int sample_rate_hz,
+ int clipped_level_step,
+ float clipped_ratio_threshold,
+ int clipped_wait_frames,
+ const AudioProcessing::Config::GainController1::AnalogGainController::
+ ClippingPredictor& clipping_config);
~AgcManagerDirect();
AgcManagerDirect(const AgcManagerDirect&) = delete;
@@ -64,6 +77,14 @@ class AgcManagerDirect final {
// If available, returns a new compression gain for the digital gain control.
absl::optional<int> GetDigitalComressionGain();
+ // Returns true if clipping prediction is enabled.
+ bool clipping_predictor_enabled() const { return !!clipping_predictor_; }
+
+ // Returns true if clipping prediction is used to adjust the analog gain.
+ bool use_clipping_predictor_step() const {
+ return use_clipping_predictor_step_;
+ }
+
private:
friend class AgcManagerDirectTest;
@@ -81,13 +102,28 @@ class AgcManagerDirect final {
AgcMinMicLevelExperimentEnabled50);
FRIEND_TEST_ALL_PREFIXES(AgcManagerDirectStandaloneTest,
AgcMinMicLevelExperimentEnabledAboveStartupLevel);
+ FRIEND_TEST_ALL_PREFIXES(AgcManagerDirectStandaloneTest,
+ ClippingParametersVerified);
+ FRIEND_TEST_ALL_PREFIXES(AgcManagerDirectStandaloneTest,
+ DisableClippingPredictorDoesNotLowerVolume);
+ FRIEND_TEST_ALL_PREFIXES(
+ AgcManagerDirectStandaloneTest,
+ EnableClippingPredictorWithUnusedPredictedStepDoesNotLowerVolume);
+ FRIEND_TEST_ALL_PREFIXES(AgcManagerDirectStandaloneTest,
+ EnableClippingPredictorLowersVolume);
// Dependency injection for testing. Don't delete |agc| as the memory is owned
// by the manager.
- AgcManagerDirect(Agc* agc,
- int startup_min_level,
- int clipped_level_min,
- int sample_rate_hz);
+ AgcManagerDirect(
+ Agc* agc,
+ int startup_min_level,
+ int clipped_level_min,
+ int sample_rate_hz,
+ int clipped_level_step,
+ float clipped_ratio_threshold,
+ int clipped_wait_frames,
+ const AudioProcessing::Config::GainController1::AnalogGainController::
+ ClippingPredictor& clipping_config);
void AnalyzePreProcess(const float* const* audio, size_t samples_per_channel);
@@ -105,8 +141,17 @@ class AgcManagerDirect final {
bool capture_output_used_;
int channel_controlling_gain_ = 0;
+ const int clipped_level_step_;
+ const float clipped_ratio_threshold_;
+ const int clipped_wait_frames_;
+
std::vector<std::unique_ptr<MonoAgc>> channel_agcs_;
std::vector<absl::optional<int>> new_compressions_to_set_;
+
+ const std::unique_ptr<ClippingPredictor> clipping_predictor_;
+ const bool use_clipping_predictor_step_;
+ ClippingPredictorEvaluator clipping_predictor_evaluator_;
+ int clipping_predictor_log_counter_;
};
class MonoAgc {
@@ -123,7 +168,7 @@ class MonoAgc {
void Initialize();
void HandleCaptureOutputUsedChange(bool capture_output_used);
- void HandleClipping();
+ void HandleClipping(int clipped_level_step);
void Process(const int16_t* audio,
size_t samples_per_channel,
diff --git a/modules/audio_processing/agc/agc_manager_direct_unittest.cc b/modules/audio_processing/agc/agc_manager_direct_unittest.cc
index 1954ed4b21..bb284f9abc 100644
--- a/modules/audio_processing/agc/agc_manager_direct_unittest.cc
+++ b/modules/audio_processing/agc/agc_manager_direct_unittest.cc
@@ -26,13 +26,19 @@ using ::testing::SetArgPointee;
namespace webrtc {
namespace {
-const int kSampleRateHz = 32000;
-const int kNumChannels = 1;
-const int kSamplesPerChannel = kSampleRateHz / 100;
-const int kInitialVolume = 128;
+constexpr int kSampleRateHz = 32000;
+constexpr int kNumChannels = 1;
+constexpr int kSamplesPerChannel = kSampleRateHz / 100;
+constexpr int kInitialVolume = 128;
constexpr int kClippedMin = 165; // Arbitrary, but different from the default.
-const float kAboveClippedThreshold = 0.2f;
-const int kMinMicLevel = 12;
+constexpr float kAboveClippedThreshold = 0.2f;
+constexpr int kMinMicLevel = 12;
+constexpr int kClippedLevelStep = 15;
+constexpr float kClippedRatioThreshold = 0.1f;
+constexpr int kClippedWaitFrames = 300;
+
+using ClippingPredictorConfig = AudioProcessing::Config::GainController1::
+ AnalogGainController::ClippingPredictor;
class MockGainControl : public GainControl {
public:
@@ -57,10 +63,53 @@ class MockGainControl : public GainControl {
};
std::unique_ptr<AgcManagerDirect> CreateAgcManagerDirect(
- int startup_min_level) {
+ int startup_min_level,
+ int clipped_level_step,
+ float clipped_ratio_threshold,
+ int clipped_wait_frames) {
return std::make_unique<AgcManagerDirect>(
/*num_capture_channels=*/1, startup_min_level, kClippedMin,
- /*disable_digital_adaptive=*/true, kSampleRateHz);
+ /*disable_digital_adaptive=*/true, kSampleRateHz, clipped_level_step,
+ clipped_ratio_threshold, clipped_wait_frames, ClippingPredictorConfig());
+}
+
+std::unique_ptr<AgcManagerDirect> CreateAgcManagerDirect(
+ int startup_min_level,
+ int clipped_level_step,
+ float clipped_ratio_threshold,
+ int clipped_wait_frames,
+ const ClippingPredictorConfig& clipping_cfg) {
+ return std::make_unique<AgcManagerDirect>(
+ /*num_capture_channels=*/1, startup_min_level, kClippedMin,
+ /*disable_digital_adaptive=*/true, kSampleRateHz, clipped_level_step,
+ clipped_ratio_threshold, clipped_wait_frames, clipping_cfg);
+}
+
+void CallPreProcessAudioBuffer(int num_calls,
+ float peak_ratio,
+ AgcManagerDirect& manager) {
+ RTC_DCHECK_GE(1.f, peak_ratio);
+ AudioBuffer audio_buffer(kSampleRateHz, 1, kSampleRateHz, 1, kSampleRateHz,
+ 1);
+ const int num_channels = audio_buffer.num_channels();
+ const int num_frames = audio_buffer.num_frames();
+ for (int ch = 0; ch < num_channels; ++ch) {
+ for (int i = 0; i < num_frames; i += 2) {
+ audio_buffer.channels()[ch][i] = peak_ratio * 32767.f;
+ audio_buffer.channels()[ch][i + 1] = 0.0f;
+ }
+ }
+ for (int n = 0; n < num_calls / 2; ++n) {
+ manager.AnalyzePreProcess(&audio_buffer);
+ }
+ for (int ch = 0; ch < num_channels; ++ch) {
+ for (int i = 0; i < num_frames; ++i) {
+ audio_buffer.channels()[ch][i] = peak_ratio * 32767.f;
+ }
+ }
+ for (int n = 0; n < num_calls - num_calls / 2; ++n) {
+ manager.AnalyzePreProcess(&audio_buffer);
+ }
}
} // namespace
@@ -69,7 +118,14 @@ class AgcManagerDirectTest : public ::testing::Test {
protected:
AgcManagerDirectTest()
: agc_(new MockAgc),
- manager_(agc_, kInitialVolume, kClippedMin, kSampleRateHz),
+ manager_(agc_,
+ kInitialVolume,
+ kClippedMin,
+ kSampleRateHz,
+ kClippedLevelStep,
+ kClippedRatioThreshold,
+ kClippedWaitFrames,
+ ClippingPredictorConfig()),
audio(kNumChannels),
audio_data(kNumChannels * kSamplesPerChannel, 0.f) {
ExpectInitialize();
@@ -124,12 +180,32 @@ class AgcManagerDirectTest : public ::testing::Test {
audio[ch][k] = 32767.f;
}
}
-
for (int i = 0; i < num_calls; ++i) {
manager_.AnalyzePreProcess(audio.data(), kSamplesPerChannel);
}
}
+ void CallPreProcForChangingAudio(int num_calls, float peak_ratio) {
+ RTC_DCHECK_GE(1.f, peak_ratio);
+ std::fill(audio_data.begin(), audio_data.end(), 0.f);
+ for (size_t ch = 0; ch < kNumChannels; ++ch) {
+ for (size_t k = 0; k < kSamplesPerChannel; k += 2) {
+ audio[ch][k] = peak_ratio * 32767.f;
+ }
+ }
+ for (int i = 0; i < num_calls / 2; ++i) {
+ manager_.AnalyzePreProcess(audio.data(), kSamplesPerChannel);
+ }
+ for (size_t ch = 0; ch < kNumChannels; ++ch) {
+ for (size_t k = 0; k < kSamplesPerChannel; ++k) {
+ audio[ch][k] = peak_ratio * 32767.f;
+ }
+ }
+ for (int i = 0; i < num_calls - num_calls / 2; ++i) {
+ manager_.AnalyzePreProcess(audio.data(), kSamplesPerChannel);
+ }
+ }
+
MockAgc* agc_;
MockGainControl gctrl_;
AgcManagerDirect manager_;
@@ -696,6 +772,25 @@ TEST_F(AgcManagerDirectTest, TakesNoActionOnZeroMicVolume) {
EXPECT_EQ(0, manager_.stream_analog_level());
}
+TEST_F(AgcManagerDirectTest, ClippingDetectionLowersVolume) {
+ SetVolumeAndProcess(255);
+ EXPECT_EQ(255, manager_.stream_analog_level());
+ CallPreProcForChangingAudio(/*num_calls=*/100, /*peak_ratio=*/0.99f);
+ EXPECT_EQ(255, manager_.stream_analog_level());
+ CallPreProcForChangingAudio(/*num_calls=*/100, /*peak_ratio=*/1.0f);
+ EXPECT_EQ(240, manager_.stream_analog_level());
+}
+
+TEST_F(AgcManagerDirectTest, DisabledClippingPredictorDoesNotLowerVolume) {
+ SetVolumeAndProcess(255);
+ EXPECT_FALSE(manager_.clipping_predictor_enabled());
+ EXPECT_EQ(255, manager_.stream_analog_level());
+ CallPreProcForChangingAudio(/*num_calls=*/100, /*peak_ratio=*/0.99f);
+ EXPECT_EQ(255, manager_.stream_analog_level());
+ CallPreProcForChangingAudio(/*num_calls=*/100, /*peak_ratio=*/0.99f);
+ EXPECT_EQ(255, manager_.stream_analog_level());
+}
+
TEST(AgcManagerDirectStandaloneTest, DisableDigitalDisablesDigital) {
auto agc = std::unique_ptr<Agc>(new ::testing::NiceMock<MockAgc>());
MockGainControl gctrl;
@@ -705,14 +800,16 @@ TEST(AgcManagerDirectStandaloneTest, DisableDigitalDisablesDigital) {
EXPECT_CALL(gctrl, enable_limiter(false));
std::unique_ptr<AgcManagerDirect> manager =
- CreateAgcManagerDirect(kInitialVolume);
+ CreateAgcManagerDirect(kInitialVolume, kClippedLevelStep,
+ kClippedRatioThreshold, kClippedWaitFrames);
manager->Initialize();
manager->SetupDigitalGainControl(&gctrl);
}
TEST(AgcManagerDirectStandaloneTest, AgcMinMicLevelExperiment) {
std::unique_ptr<AgcManagerDirect> manager =
- CreateAgcManagerDirect(kInitialVolume);
+ CreateAgcManagerDirect(kInitialVolume, kClippedLevelStep,
+ kClippedRatioThreshold, kClippedWaitFrames);
EXPECT_EQ(manager->channel_agcs_[0]->min_mic_level(), kMinMicLevel);
EXPECT_EQ(manager->channel_agcs_[0]->startup_min_level(), kInitialVolume);
}
@@ -721,7 +818,8 @@ TEST(AgcManagerDirectStandaloneTest, AgcMinMicLevelExperimentDisabled) {
test::ScopedFieldTrials field_trial(
"WebRTC-Audio-AgcMinMicLevelExperiment/Disabled/");
std::unique_ptr<AgcManagerDirect> manager =
- CreateAgcManagerDirect(kInitialVolume);
+ CreateAgcManagerDirect(kInitialVolume, kClippedLevelStep,
+ kClippedRatioThreshold, kClippedWaitFrames);
EXPECT_EQ(manager->channel_agcs_[0]->min_mic_level(), kMinMicLevel);
EXPECT_EQ(manager->channel_agcs_[0]->startup_min_level(), kInitialVolume);
}
@@ -732,7 +830,8 @@ TEST(AgcManagerDirectStandaloneTest, AgcMinMicLevelExperimentOutOfRangeAbove) {
test::ScopedFieldTrials field_trial(
"WebRTC-Audio-AgcMinMicLevelExperiment/Enabled-256/");
std::unique_ptr<AgcManagerDirect> manager =
- CreateAgcManagerDirect(kInitialVolume);
+ CreateAgcManagerDirect(kInitialVolume, kClippedLevelStep,
+ kClippedRatioThreshold, kClippedWaitFrames);
EXPECT_EQ(manager->channel_agcs_[0]->min_mic_level(), kMinMicLevel);
EXPECT_EQ(manager->channel_agcs_[0]->startup_min_level(), kInitialVolume);
}
@@ -743,7 +842,8 @@ TEST(AgcManagerDirectStandaloneTest, AgcMinMicLevelExperimentOutOfRangeBelow) {
test::ScopedFieldTrials field_trial(
"WebRTC-Audio-AgcMinMicLevelExperiment/Enabled--1/");
std::unique_ptr<AgcManagerDirect> manager =
- CreateAgcManagerDirect(kInitialVolume);
+ CreateAgcManagerDirect(kInitialVolume, kClippedLevelStep,
+ kClippedRatioThreshold, kClippedWaitFrames);
EXPECT_EQ(manager->channel_agcs_[0]->min_mic_level(), kMinMicLevel);
EXPECT_EQ(manager->channel_agcs_[0]->startup_min_level(), kInitialVolume);
}
@@ -755,7 +855,8 @@ TEST(AgcManagerDirectStandaloneTest, AgcMinMicLevelExperimentEnabled50) {
test::ScopedFieldTrials field_trial(
"WebRTC-Audio-AgcMinMicLevelExperiment/Enabled-50/");
std::unique_ptr<AgcManagerDirect> manager =
- CreateAgcManagerDirect(kInitialVolume);
+ CreateAgcManagerDirect(kInitialVolume, kClippedLevelStep,
+ kClippedRatioThreshold, kClippedWaitFrames);
EXPECT_EQ(manager->channel_agcs_[0]->min_mic_level(), 50);
EXPECT_EQ(manager->channel_agcs_[0]->startup_min_level(), kInitialVolume);
}
@@ -768,9 +869,130 @@ TEST(AgcManagerDirectStandaloneTest,
test::ScopedFieldTrials field_trial(
"WebRTC-Audio-AgcMinMicLevelExperiment/Enabled-50/");
std::unique_ptr<AgcManagerDirect> manager =
- CreateAgcManagerDirect(/*startup_min_level=*/30);
+ CreateAgcManagerDirect(/*startup_min_level=*/30, kClippedLevelStep,
+ kClippedRatioThreshold, kClippedWaitFrames);
EXPECT_EQ(manager->channel_agcs_[0]->min_mic_level(), 50);
EXPECT_EQ(manager->channel_agcs_[0]->startup_min_level(), 50);
}
+// TODO(bugs.webrtc.org/12774): Test the bahavior of `clipped_level_step`.
+// TODO(bugs.webrtc.org/12774): Test the bahavior of `clipped_ratio_threshold`.
+// TODO(bugs.webrtc.org/12774): Test the bahavior of `clipped_wait_frames`.
+// Verifies that configurable clipping parameters are initialized as intended.
+TEST(AgcManagerDirectStandaloneTest, ClippingParametersVerified) {
+ std::unique_ptr<AgcManagerDirect> manager =
+ CreateAgcManagerDirect(kInitialVolume, kClippedLevelStep,
+ kClippedRatioThreshold, kClippedWaitFrames);
+ manager->Initialize();
+ EXPECT_EQ(manager->clipped_level_step_, kClippedLevelStep);
+ EXPECT_EQ(manager->clipped_ratio_threshold_, kClippedRatioThreshold);
+ EXPECT_EQ(manager->clipped_wait_frames_, kClippedWaitFrames);
+ std::unique_ptr<AgcManagerDirect> manager_custom =
+ CreateAgcManagerDirect(kInitialVolume,
+ /*clipped_level_step=*/10,
+ /*clipped_ratio_threshold=*/0.2f,
+ /*clipped_wait_frames=*/50);
+ manager_custom->Initialize();
+ EXPECT_EQ(manager_custom->clipped_level_step_, 10);
+ EXPECT_EQ(manager_custom->clipped_ratio_threshold_, 0.2f);
+ EXPECT_EQ(manager_custom->clipped_wait_frames_, 50);
+}
+
+TEST(AgcManagerDirectStandaloneTest,
+ DisableClippingPredictorDisablesClippingPredictor) {
+ ClippingPredictorConfig default_config;
+ EXPECT_FALSE(default_config.enabled);
+ std::unique_ptr<AgcManagerDirect> manager = CreateAgcManagerDirect(
+ kInitialVolume, kClippedLevelStep, kClippedRatioThreshold,
+ kClippedWaitFrames, default_config);
+ manager->Initialize();
+ EXPECT_FALSE(manager->clipping_predictor_enabled());
+ EXPECT_FALSE(manager->use_clipping_predictor_step());
+}
+
+TEST(AgcManagerDirectStandaloneTest, ClippingPredictorDisabledByDefault) {
+ constexpr ClippingPredictorConfig kDefaultConfig;
+ EXPECT_FALSE(kDefaultConfig.enabled);
+}
+
+TEST(AgcManagerDirectStandaloneTest,
+ EnableClippingPredictorEnablesClippingPredictor) {
+ // TODO(bugs.webrtc.org/12874): Use designated initializers one fixed.
+ ClippingPredictorConfig config;
+ config.enabled = true;
+ config.use_predicted_step = true;
+ std::unique_ptr<AgcManagerDirect> manager = CreateAgcManagerDirect(
+ kInitialVolume, kClippedLevelStep, kClippedRatioThreshold,
+ kClippedWaitFrames, config);
+ manager->Initialize();
+ EXPECT_TRUE(manager->clipping_predictor_enabled());
+ EXPECT_TRUE(manager->use_clipping_predictor_step());
+}
+
+TEST(AgcManagerDirectStandaloneTest,
+ DisableClippingPredictorDoesNotLowerVolume) {
+ // TODO(bugs.webrtc.org/12874): Use designated initializers one fixed.
+ constexpr ClippingPredictorConfig kConfig{/*enabled=*/false};
+ AgcManagerDirect manager(new ::testing::NiceMock<MockAgc>(), kInitialVolume,
+ kClippedMin, kSampleRateHz, kClippedLevelStep,
+ kClippedRatioThreshold, kClippedWaitFrames, kConfig);
+ manager.Initialize();
+ manager.set_stream_analog_level(/*level=*/255);
+ EXPECT_FALSE(manager.clipping_predictor_enabled());
+ EXPECT_FALSE(manager.use_clipping_predictor_step());
+ EXPECT_EQ(manager.stream_analog_level(), 255);
+ manager.Process(nullptr);
+ CallPreProcessAudioBuffer(/*num_calls=*/10, /*peak_ratio=*/0.99f, manager);
+ EXPECT_EQ(manager.stream_analog_level(), 255);
+ CallPreProcessAudioBuffer(/*num_calls=*/300, /*peak_ratio=*/0.99f, manager);
+ EXPECT_EQ(manager.stream_analog_level(), 255);
+ CallPreProcessAudioBuffer(/*num_calls=*/10, /*peak_ratio=*/0.99f, manager);
+ EXPECT_EQ(manager.stream_analog_level(), 255);
+}
+
+TEST(AgcManagerDirectStandaloneTest,
+ EnableClippingPredictorWithUnusedPredictedStepDoesNotLowerVolume) {
+ // TODO(bugs.webrtc.org/12874): Use designated initializers one fixed.
+ ClippingPredictorConfig config;
+ config.enabled = true;
+ config.use_predicted_step = false;
+ AgcManagerDirect manager(new ::testing::NiceMock<MockAgc>(), kInitialVolume,
+ kClippedMin, kSampleRateHz, kClippedLevelStep,
+ kClippedRatioThreshold, kClippedWaitFrames, config);
+ manager.Initialize();
+ manager.set_stream_analog_level(/*level=*/255);
+ EXPECT_TRUE(manager.clipping_predictor_enabled());
+ EXPECT_FALSE(manager.use_clipping_predictor_step());
+ EXPECT_EQ(manager.stream_analog_level(), 255);
+ manager.Process(nullptr);
+ CallPreProcessAudioBuffer(/*num_calls=*/10, /*peak_ratio=*/0.99f, manager);
+ EXPECT_EQ(manager.stream_analog_level(), 255);
+ CallPreProcessAudioBuffer(/*num_calls=*/300, /*peak_ratio=*/0.99f, manager);
+ EXPECT_EQ(manager.stream_analog_level(), 255);
+ CallPreProcessAudioBuffer(/*num_calls=*/10, /*peak_ratio=*/0.99f, manager);
+ EXPECT_EQ(manager.stream_analog_level(), 255);
+}
+
+TEST(AgcManagerDirectStandaloneTest, EnableClippingPredictorLowersVolume) {
+ // TODO(bugs.webrtc.org/12874): Use designated initializers one fixed.
+ ClippingPredictorConfig config;
+ config.enabled = true;
+ config.use_predicted_step = true;
+ AgcManagerDirect manager(new ::testing::NiceMock<MockAgc>(), kInitialVolume,
+ kClippedMin, kSampleRateHz, kClippedLevelStep,
+ kClippedRatioThreshold, kClippedWaitFrames, config);
+ manager.Initialize();
+ manager.set_stream_analog_level(/*level=*/255);
+ EXPECT_TRUE(manager.clipping_predictor_enabled());
+ EXPECT_TRUE(manager.use_clipping_predictor_step());
+ EXPECT_EQ(manager.stream_analog_level(), 255);
+ manager.Process(nullptr);
+ CallPreProcessAudioBuffer(/*num_calls=*/10, /*peak_ratio=*/0.99f, manager);
+ EXPECT_EQ(manager.stream_analog_level(), 240);
+ CallPreProcessAudioBuffer(/*num_calls=*/300, /*peak_ratio=*/0.99f, manager);
+ EXPECT_EQ(manager.stream_analog_level(), 240);
+ CallPreProcessAudioBuffer(/*num_calls=*/10, /*peak_ratio=*/0.99f, manager);
+ EXPECT_EQ(manager.stream_analog_level(), 225);
+}
+
} // namespace webrtc
diff --git a/modules/audio_processing/agc/clipping_predictor.cc b/modules/audio_processing/agc/clipping_predictor.cc
new file mode 100644
index 0000000000..982bbca2ee
--- /dev/null
+++ b/modules/audio_processing/agc/clipping_predictor.cc
@@ -0,0 +1,383 @@
+/*
+ * Copyright (c) 2021 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/audio_processing/agc/clipping_predictor.h"
+
+#include <algorithm>
+#include <memory>
+
+#include "common_audio/include/audio_util.h"
+#include "modules/audio_processing/agc/clipping_predictor_level_buffer.h"
+#include "modules/audio_processing/agc/gain_map_internal.h"
+#include "rtc_base/checks.h"
+#include "rtc_base/logging.h"
+#include "rtc_base/numerics/safe_minmax.h"
+
+namespace webrtc {
+namespace {
+
+constexpr int kClippingPredictorMaxGainChange = 15;
+
+// Estimates the new level from the gain error; a copy of the function
+// `LevelFromGainError` in agc_manager_direct.cc.
+int LevelFromGainError(int gain_error,
+ int level,
+ int min_mic_level,
+ int max_mic_level) {
+ RTC_DCHECK_GE(level, 0);
+ RTC_DCHECK_LE(level, max_mic_level);
+ if (gain_error == 0) {
+ return level;
+ }
+ int new_level = level;
+ if (gain_error > 0) {
+ while (kGainMap[new_level] - kGainMap[level] < gain_error &&
+ new_level < max_mic_level) {
+ ++new_level;
+ }
+ } else {
+ while (kGainMap[new_level] - kGainMap[level] > gain_error &&
+ new_level > min_mic_level) {
+ --new_level;
+ }
+ }
+ return new_level;
+}
+
+float ComputeCrestFactor(const ClippingPredictorLevelBuffer::Level& level) {
+ const float crest_factor =
+ FloatS16ToDbfs(level.max) - FloatS16ToDbfs(std::sqrt(level.average));
+ return crest_factor;
+}
+
+// Crest factor-based clipping prediction and clipped level step estimation.
+class ClippingEventPredictor : public ClippingPredictor {
+ public:
+ // ClippingEventPredictor with `num_channels` channels (limited to values
+ // higher than zero); window size `window_length` and reference window size
+ // `reference_window_length` (both referring to the number of frames in the
+ // respective sliding windows and limited to values higher than zero);
+ // reference window delay `reference_window_delay` (delay in frames, limited
+ // to values zero and higher with an additional requirement of
+ // `window_length` < `reference_window_length` + reference_window_delay`);
+ // and an estimation peak threshold `clipping_threshold` and a crest factor
+ // drop threshold `crest_factor_margin` (both in dB).
+ ClippingEventPredictor(int num_channels,
+ int window_length,
+ int reference_window_length,
+ int reference_window_delay,
+ float clipping_threshold,
+ float crest_factor_margin)
+ : window_length_(window_length),
+ reference_window_length_(reference_window_length),
+ reference_window_delay_(reference_window_delay),
+ clipping_threshold_(clipping_threshold),
+ crest_factor_margin_(crest_factor_margin) {
+ RTC_DCHECK_GT(num_channels, 0);
+ RTC_DCHECK_GT(window_length, 0);
+ RTC_DCHECK_GT(reference_window_length, 0);
+ RTC_DCHECK_GE(reference_window_delay, 0);
+ RTC_DCHECK_GT(reference_window_length + reference_window_delay,
+ window_length);
+ const int buffer_length = GetMinFramesProcessed();
+ RTC_DCHECK_GT(buffer_length, 0);
+ for (int i = 0; i < num_channels; ++i) {
+ ch_buffers_.push_back(
+ std::make_unique<ClippingPredictorLevelBuffer>(buffer_length));
+ }
+ }
+
+ ClippingEventPredictor(const ClippingEventPredictor&) = delete;
+ ClippingEventPredictor& operator=(const ClippingEventPredictor&) = delete;
+ ~ClippingEventPredictor() {}
+
+ void Reset() {
+ const int num_channels = ch_buffers_.size();
+ for (int i = 0; i < num_channels; ++i) {
+ ch_buffers_[i]->Reset();
+ }
+ }
+
+ // Analyzes a frame of audio and stores the framewise metrics in
+ // `ch_buffers_`.
+ void Analyze(const AudioFrameView<const float>& frame) {
+ const int num_channels = frame.num_channels();
+ RTC_DCHECK_EQ(num_channels, ch_buffers_.size());
+ const int samples_per_channel = frame.samples_per_channel();
+ RTC_DCHECK_GT(samples_per_channel, 0);
+ for (int channel = 0; channel < num_channels; ++channel) {
+ float sum_squares = 0.0f;
+ float peak = 0.0f;
+ for (const auto& sample : frame.channel(channel)) {
+ sum_squares += sample * sample;
+ peak = std::max(std::fabs(sample), peak);
+ }
+ ch_buffers_[channel]->Push(
+ {sum_squares / static_cast<float>(samples_per_channel), peak});
+ }
+ }
+
+ // Estimates the analog gain adjustment for channel `channel` using a
+ // sliding window over the frame-wise metrics in `ch_buffers_`. Returns an
+ // estimate for the clipped level step equal to `default_clipped_level_step_`
+ // if at least `GetMinFramesProcessed()` frames have been processed since the
+ // last reset and a clipping event is predicted. `level`, `min_mic_level`, and
+ // `max_mic_level` are limited to [0, 255] and `default_step` to [1, 255].
+ absl::optional<int> EstimateClippedLevelStep(int channel,
+ int level,
+ int default_step,
+ int min_mic_level,
+ int max_mic_level) const {
+ RTC_CHECK_GE(channel, 0);
+ RTC_CHECK_LT(channel, ch_buffers_.size());
+ RTC_DCHECK_GE(level, 0);
+ RTC_DCHECK_LE(level, 255);
+ RTC_DCHECK_GT(default_step, 0);
+ RTC_DCHECK_LE(default_step, 255);
+ RTC_DCHECK_GE(min_mic_level, 0);
+ RTC_DCHECK_LE(min_mic_level, 255);
+ RTC_DCHECK_GE(max_mic_level, 0);
+ RTC_DCHECK_LE(max_mic_level, 255);
+ if (level <= min_mic_level) {
+ return absl::nullopt;
+ }
+ if (PredictClippingEvent(channel)) {
+ const int new_level =
+ rtc::SafeClamp(level - default_step, min_mic_level, max_mic_level);
+ const int step = level - new_level;
+ if (step > 0) {
+ return step;
+ }
+ }
+ return absl::nullopt;
+ }
+
+ private:
+ int GetMinFramesProcessed() const {
+ return reference_window_delay_ + reference_window_length_;
+ }
+
+ // Predicts clipping events based on the processed audio frames. Returns
+ // true if a clipping event is likely.
+ bool PredictClippingEvent(int channel) const {
+ const auto metrics =
+ ch_buffers_[channel]->ComputePartialMetrics(0, window_length_);
+ if (!metrics.has_value() ||
+ !(FloatS16ToDbfs(metrics.value().max) > clipping_threshold_)) {
+ return false;
+ }
+ const auto reference_metrics = ch_buffers_[channel]->ComputePartialMetrics(
+ reference_window_delay_, reference_window_length_);
+ if (!reference_metrics.has_value()) {
+ return false;
+ }
+ const float crest_factor = ComputeCrestFactor(metrics.value());
+ const float reference_crest_factor =
+ ComputeCrestFactor(reference_metrics.value());
+ if (crest_factor < reference_crest_factor - crest_factor_margin_) {
+ return true;
+ }
+ return false;
+ }
+
+ std::vector<std::unique_ptr<ClippingPredictorLevelBuffer>> ch_buffers_;
+ const int window_length_;
+ const int reference_window_length_;
+ const int reference_window_delay_;
+ const float clipping_threshold_;
+ const float crest_factor_margin_;
+};
+
+// Performs crest factor-based clipping peak prediction.
+class ClippingPeakPredictor : public ClippingPredictor {
+ public:
+ // Ctor. ClippingPeakPredictor with `num_channels` channels (limited to values
+ // higher than zero); window size `window_length` and reference window size
+ // `reference_window_length` (both referring to the number of frames in the
+ // respective sliding windows and limited to values higher than zero);
+ // reference window delay `reference_window_delay` (delay in frames, limited
+ // to values zero and higher with an additional requirement of
+ // `window_length` < `reference_window_length` + reference_window_delay`);
+ // and a clipping prediction threshold `clipping_threshold` (in dB). Adaptive
+ // clipped level step estimation is used if `adaptive_step_estimation` is
+ // true.
+ explicit ClippingPeakPredictor(int num_channels,
+ int window_length,
+ int reference_window_length,
+ int reference_window_delay,
+ int clipping_threshold,
+ bool adaptive_step_estimation)
+ : window_length_(window_length),
+ reference_window_length_(reference_window_length),
+ reference_window_delay_(reference_window_delay),
+ clipping_threshold_(clipping_threshold),
+ adaptive_step_estimation_(adaptive_step_estimation) {
+ RTC_DCHECK_GT(num_channels, 0);
+ RTC_DCHECK_GT(window_length, 0);
+ RTC_DCHECK_GT(reference_window_length, 0);
+ RTC_DCHECK_GE(reference_window_delay, 0);
+ RTC_DCHECK_GT(reference_window_length + reference_window_delay,
+ window_length);
+ const int buffer_length = GetMinFramesProcessed();
+ RTC_DCHECK_GT(buffer_length, 0);
+ for (int i = 0; i < num_channels; ++i) {
+ ch_buffers_.push_back(
+ std::make_unique<ClippingPredictorLevelBuffer>(buffer_length));
+ }
+ }
+
+ ClippingPeakPredictor(const ClippingPeakPredictor&) = delete;
+ ClippingPeakPredictor& operator=(const ClippingPeakPredictor&) = delete;
+ ~ClippingPeakPredictor() {}
+
+ void Reset() {
+ const int num_channels = ch_buffers_.size();
+ for (int i = 0; i < num_channels; ++i) {
+ ch_buffers_[i]->Reset();
+ }
+ }
+
+ // Analyzes a frame of audio and stores the framewise metrics in
+ // `ch_buffers_`.
+ void Analyze(const AudioFrameView<const float>& frame) {
+ const int num_channels = frame.num_channels();
+ RTC_DCHECK_EQ(num_channels, ch_buffers_.size());
+ const int samples_per_channel = frame.samples_per_channel();
+ RTC_DCHECK_GT(samples_per_channel, 0);
+ for (int channel = 0; channel < num_channels; ++channel) {
+ float sum_squares = 0.0f;
+ float peak = 0.0f;
+ for (const auto& sample : frame.channel(channel)) {
+ sum_squares += sample * sample;
+ peak = std::max(std::fabs(sample), peak);
+ }
+ ch_buffers_[channel]->Push(
+ {sum_squares / static_cast<float>(samples_per_channel), peak});
+ }
+ }
+
+ // Estimates the analog gain adjustment for channel `channel` using a
+ // sliding window over the frame-wise metrics in `ch_buffers_`. Returns an
+ // estimate for the clipped level step (equal to
+ // `default_clipped_level_step_` if `adaptive_estimation_` is false) if at
+ // least `GetMinFramesProcessed()` frames have been processed since the last
+ // reset and a clipping event is predicted. `level`, `min_mic_level`, and
+ // `max_mic_level` are limited to [0, 255] and `default_step` to [1, 255].
+ absl::optional<int> EstimateClippedLevelStep(int channel,
+ int level,
+ int default_step,
+ int min_mic_level,
+ int max_mic_level) const {
+ RTC_DCHECK_GE(channel, 0);
+ RTC_DCHECK_LT(channel, ch_buffers_.size());
+ RTC_DCHECK_GE(level, 0);
+ RTC_DCHECK_LE(level, 255);
+ RTC_DCHECK_GT(default_step, 0);
+ RTC_DCHECK_LE(default_step, 255);
+ RTC_DCHECK_GE(min_mic_level, 0);
+ RTC_DCHECK_LE(min_mic_level, 255);
+ RTC_DCHECK_GE(max_mic_level, 0);
+ RTC_DCHECK_LE(max_mic_level, 255);
+ if (level <= min_mic_level) {
+ return absl::nullopt;
+ }
+ absl::optional<float> estimate_db = EstimatePeakValue(channel);
+ if (estimate_db.has_value() && estimate_db.value() > clipping_threshold_) {
+ int step = 0;
+ if (!adaptive_step_estimation_) {
+ step = default_step;
+ } else {
+ const int estimated_gain_change =
+ rtc::SafeClamp(-static_cast<int>(std::ceil(estimate_db.value())),
+ -kClippingPredictorMaxGainChange, 0);
+ step =
+ std::max(level - LevelFromGainError(estimated_gain_change, level,
+ min_mic_level, max_mic_level),
+ default_step);
+ }
+ const int new_level =
+ rtc::SafeClamp(level - step, min_mic_level, max_mic_level);
+ if (level > new_level) {
+ return level - new_level;
+ }
+ }
+ return absl::nullopt;
+ }
+
+ private:
+ int GetMinFramesProcessed() {
+ return reference_window_delay_ + reference_window_length_;
+ }
+
+ // Predicts clipping sample peaks based on the processed audio frames.
+ // Returns the estimated peak value if clipping is predicted. Otherwise
+ // returns absl::nullopt.
+ absl::optional<float> EstimatePeakValue(int channel) const {
+ const auto reference_metrics = ch_buffers_[channel]->ComputePartialMetrics(
+ reference_window_delay_, reference_window_length_);
+ if (!reference_metrics.has_value()) {
+ return absl::nullopt;
+ }
+ const auto metrics =
+ ch_buffers_[channel]->ComputePartialMetrics(0, window_length_);
+ if (!metrics.has_value() ||
+ !(FloatS16ToDbfs(metrics.value().max) > clipping_threshold_)) {
+ return absl::nullopt;
+ }
+ const float reference_crest_factor =
+ ComputeCrestFactor(reference_metrics.value());
+ const float& mean_squares = metrics.value().average;
+ const float projected_peak =
+ reference_crest_factor + FloatS16ToDbfs(std::sqrt(mean_squares));
+ return projected_peak;
+ }
+
+ std::vector<std::unique_ptr<ClippingPredictorLevelBuffer>> ch_buffers_;
+ const int window_length_;
+ const int reference_window_length_;
+ const int reference_window_delay_;
+ const int clipping_threshold_;
+ const bool adaptive_step_estimation_;
+};
+
+} // namespace
+
+std::unique_ptr<ClippingPredictor> CreateClippingPredictor(
+ int num_channels,
+ const AudioProcessing::Config::GainController1::AnalogGainController::
+ ClippingPredictor& config) {
+ if (!config.enabled) {
+ RTC_LOG(LS_INFO) << "[agc] Clipping prediction disabled.";
+ return nullptr;
+ }
+ RTC_LOG(LS_INFO) << "[agc] Clipping prediction enabled.";
+ using ClippingPredictorMode = AudioProcessing::Config::GainController1::
+ AnalogGainController::ClippingPredictor::Mode;
+ switch (config.mode) {
+ case ClippingPredictorMode::kClippingEventPrediction:
+ return std::make_unique<ClippingEventPredictor>(
+ num_channels, config.window_length, config.reference_window_length,
+ config.reference_window_delay, config.clipping_threshold,
+ config.crest_factor_margin);
+ case ClippingPredictorMode::kAdaptiveStepClippingPeakPrediction:
+ return std::make_unique<ClippingPeakPredictor>(
+ num_channels, config.window_length, config.reference_window_length,
+ config.reference_window_delay, config.clipping_threshold,
+ /*adaptive_step_estimation=*/true);
+ case ClippingPredictorMode::kFixedStepClippingPeakPrediction:
+ return std::make_unique<ClippingPeakPredictor>(
+ num_channels, config.window_length, config.reference_window_length,
+ config.reference_window_delay, config.clipping_threshold,
+ /*adaptive_step_estimation=*/false);
+ }
+ RTC_NOTREACHED();
+}
+
+} // namespace webrtc
diff --git a/modules/audio_processing/agc/clipping_predictor.h b/modules/audio_processing/agc/clipping_predictor.h
new file mode 100644
index 0000000000..ee2b6ef1e7
--- /dev/null
+++ b/modules/audio_processing/agc/clipping_predictor.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2021 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef MODULES_AUDIO_PROCESSING_AGC_CLIPPING_PREDICTOR_H_
+#define MODULES_AUDIO_PROCESSING_AGC_CLIPPING_PREDICTOR_H_
+
+#include <memory>
+#include <vector>
+
+#include "absl/types/optional.h"
+#include "modules/audio_processing/include/audio_frame_view.h"
+#include "modules/audio_processing/include/audio_processing.h"
+
+namespace webrtc {
+
+// Frame-wise clipping prediction and clipped level step estimation. Analyzes
+// 10 ms multi-channel frames and estimates an analog mic level decrease step
+// to possibly avoid clipping when predicted. `Analyze()` and
+// `EstimateClippedLevelStep()` can be called in any order.
+class ClippingPredictor {
+ public:
+ virtual ~ClippingPredictor() = default;
+
+ virtual void Reset() = 0;
+
+ // Analyzes a 10 ms multi-channel audio frame.
+ virtual void Analyze(const AudioFrameView<const float>& frame) = 0;
+
+ // Predicts if clipping is going to occur for the specified `channel` in the
+ // near-future and, if so, it returns a recommended analog mic level decrease
+ // step. Returns absl::nullopt if clipping is not predicted.
+ // `level` is the current analog mic level, `default_step` is the amount the
+ // mic level is lowered by the analog controller with every clipping event and
+ // `min_mic_level` and `max_mic_level` is the range of allowed analog mic
+ // levels.
+ virtual absl::optional<int> EstimateClippedLevelStep(
+ int channel,
+ int level,
+ int default_step,
+ int min_mic_level,
+ int max_mic_level) const = 0;
+
+};
+
+// Creates a ClippingPredictor based on the provided `config`. When enabled,
+// the following must hold for `config`:
+// `window_length < reference_window_length + reference_window_delay`.
+// Returns `nullptr` if `config.enabled` is false.
+std::unique_ptr<ClippingPredictor> CreateClippingPredictor(
+ int num_channels,
+ const AudioProcessing::Config::GainController1::AnalogGainController::
+ ClippingPredictor& config);
+
+} // namespace webrtc
+
+#endif // MODULES_AUDIO_PROCESSING_AGC_CLIPPING_PREDICTOR_H_
diff --git a/modules/audio_processing/agc/clipping_predictor_evaluator.cc b/modules/audio_processing/agc/clipping_predictor_evaluator.cc
new file mode 100644
index 0000000000..2a4ea922cf
--- /dev/null
+++ b/modules/audio_processing/agc/clipping_predictor_evaluator.cc
@@ -0,0 +1,175 @@
+/*
+ * Copyright (c) 2021 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/audio_processing/agc/clipping_predictor_evaluator.h"
+
+#include <algorithm>
+
+#include "rtc_base/checks.h"
+#include "rtc_base/logging.h"
+
+namespace webrtc {
+namespace {
+
+// Returns the index of the oldest item in the ring buffer for a non-empty
+// ring buffer with give `size`, `tail` index and `capacity`.
+int OldestExpectedDetectionIndex(int size, int tail, int capacity) {
+ RTC_DCHECK_GT(size, 0);
+ return tail - size + (tail < size ? capacity : 0);
+}
+
+} // namespace
+
+ClippingPredictorEvaluator::ClippingPredictorEvaluator(int history_size)
+ : history_size_(history_size),
+ ring_buffer_capacity_(history_size + 1),
+ ring_buffer_(ring_buffer_capacity_),
+ true_positives_(0),
+ true_negatives_(0),
+ false_positives_(0),
+ false_negatives_(0) {
+ RTC_DCHECK_GT(history_size_, 0);
+ Reset();
+}
+
+ClippingPredictorEvaluator::~ClippingPredictorEvaluator() = default;
+
+absl::optional<int> ClippingPredictorEvaluator::Observe(
+ bool clipping_detected,
+ bool clipping_predicted) {
+ RTC_DCHECK_GE(ring_buffer_size_, 0);
+ RTC_DCHECK_LE(ring_buffer_size_, ring_buffer_capacity_);
+ RTC_DCHECK_GE(ring_buffer_tail_, 0);
+ RTC_DCHECK_LT(ring_buffer_tail_, ring_buffer_capacity_);
+
+ DecreaseTimesToLive();
+ if (clipping_predicted) {
+ // TODO(bugs.webrtc.org/12874): Use designated initializers one fixed.
+ Push(/*expected_detection=*/{/*ttl=*/history_size_, /*detected=*/false});
+ }
+ // Clipping is expected if there are expected detections regardless of
+ // whether all the expected detections have been previously matched - i.e.,
+ // `ExpectedDetection::detected` is true.
+ const bool clipping_expected = ring_buffer_size_ > 0;
+
+ absl::optional<int> prediction_interval;
+ if (clipping_expected && clipping_detected) {
+ prediction_interval = FindEarliestPredictionInterval();
+ // Add a true positive for each unexpired expected detection.
+ const int num_modified_items = MarkExpectedDetectionAsDetected();
+ true_positives_ += num_modified_items;
+ RTC_DCHECK(prediction_interval.has_value() || num_modified_items == 0);
+ RTC_DCHECK(!prediction_interval.has_value() || num_modified_items > 0);
+ } else if (clipping_expected && !clipping_detected) {
+ // Add a false positive if there is one expected detection that has expired
+ // and that has never been matched before. Note that there is at most one
+ // unmatched expired detection.
+ if (HasExpiredUnmatchedExpectedDetection()) {
+ false_positives_++;
+ }
+ } else if (!clipping_expected && clipping_detected) {
+ false_negatives_++;
+ } else {
+ RTC_DCHECK(!clipping_expected && !clipping_detected);
+ true_negatives_++;
+ }
+ return prediction_interval;
+}
+
+void ClippingPredictorEvaluator::Reset() {
+ // Empty the ring buffer of expected detections.
+ ring_buffer_tail_ = 0;
+ ring_buffer_size_ = 0;
+}
+
+// Cost: O(1).
+void ClippingPredictorEvaluator::Push(ExpectedDetection value) {
+ ring_buffer_[ring_buffer_tail_] = value;
+ ring_buffer_tail_++;
+ if (ring_buffer_tail_ == ring_buffer_capacity_) {
+ ring_buffer_tail_ = 0;
+ }
+ ring_buffer_size_ = std::min(ring_buffer_capacity_, ring_buffer_size_ + 1);
+}
+
+// Cost: O(N).
+void ClippingPredictorEvaluator::DecreaseTimesToLive() {
+ bool expired_found = false;
+ for (int i = ring_buffer_tail_ - ring_buffer_size_; i < ring_buffer_tail_;
+ ++i) {
+ int index = i >= 0 ? i : ring_buffer_capacity_ + i;
+ RTC_DCHECK_GE(index, 0);
+ RTC_DCHECK_LT(index, ring_buffer_.size());
+ RTC_DCHECK_GE(ring_buffer_[index].ttl, 0);
+ if (ring_buffer_[index].ttl == 0) {
+ RTC_DCHECK(!expired_found)
+ << "There must be at most one expired item in the ring buffer.";
+ expired_found = true;
+ RTC_DCHECK_EQ(index, OldestExpectedDetectionIndex(ring_buffer_size_,
+ ring_buffer_tail_,
+ ring_buffer_capacity_))
+ << "The expired item must be the oldest in the ring buffer.";
+ }
+ ring_buffer_[index].ttl--;
+ }
+ if (expired_found) {
+ ring_buffer_size_--;
+ }
+}
+
+// Cost: O(N).
+absl::optional<int> ClippingPredictorEvaluator::FindEarliestPredictionInterval()
+ const {
+ absl::optional<int> prediction_interval;
+ for (int i = ring_buffer_tail_ - ring_buffer_size_; i < ring_buffer_tail_;
+ ++i) {
+ int index = i >= 0 ? i : ring_buffer_capacity_ + i;
+ RTC_DCHECK_GE(index, 0);
+ RTC_DCHECK_LT(index, ring_buffer_.size());
+ if (!ring_buffer_[index].detected) {
+ prediction_interval = std::max(prediction_interval.value_or(0),
+ history_size_ - ring_buffer_[index].ttl);
+ }
+ }
+ return prediction_interval;
+}
+
+// Cost: O(N).
+int ClippingPredictorEvaluator::MarkExpectedDetectionAsDetected() {
+ int num_modified_items = 0;
+ for (int i = ring_buffer_tail_ - ring_buffer_size_; i < ring_buffer_tail_;
+ ++i) {
+ int index = i >= 0 ? i : ring_buffer_capacity_ + i;
+ RTC_DCHECK_GE(index, 0);
+ RTC_DCHECK_LT(index, ring_buffer_.size());
+ if (!ring_buffer_[index].detected) {
+ num_modified_items++;
+ }
+ ring_buffer_[index].detected = true;
+ }
+ return num_modified_items;
+}
+
+// Cost: O(1).
+bool ClippingPredictorEvaluator::HasExpiredUnmatchedExpectedDetection() const {
+ if (ring_buffer_size_ == 0) {
+ return false;
+ }
+ // If an expired item, that is `ttl` equal to 0, exists, it must be the
+ // oldest.
+ const int oldest_index = OldestExpectedDetectionIndex(
+ ring_buffer_size_, ring_buffer_tail_, ring_buffer_capacity_);
+ RTC_DCHECK_GE(oldest_index, 0);
+ RTC_DCHECK_LT(oldest_index, ring_buffer_.size());
+ return ring_buffer_[oldest_index].ttl == 0 &&
+ !ring_buffer_[oldest_index].detected;
+}
+
+} // namespace webrtc
diff --git a/modules/audio_processing/agc/clipping_predictor_evaluator.h b/modules/audio_processing/agc/clipping_predictor_evaluator.h
new file mode 100644
index 0000000000..e76f25d5e1
--- /dev/null
+++ b/modules/audio_processing/agc/clipping_predictor_evaluator.h
@@ -0,0 +1,102 @@
+/*
+ * Copyright (c) 2021 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef MODULES_AUDIO_PROCESSING_AGC_CLIPPING_PREDICTOR_EVALUATOR_H_
+#define MODULES_AUDIO_PROCESSING_AGC_CLIPPING_PREDICTOR_EVALUATOR_H_
+
+#include <vector>
+
+#include "absl/types/optional.h"
+
+namespace webrtc {
+
+// Counts true/false positives/negatives while observing sequences of flag pairs
+// that indicate whether clipping has been detected and/or if clipping is
+// predicted. When a true positive is found measures the time interval between
+// prediction and detection events.
+// From the time a prediction is observed and for a period equal to
+// `history_size` calls to `Observe()`, one or more detections are expected. If
+// the expectation is met, a true positives is added and the time interval
+// between the earliest prediction and the detection is recorded; otherwise,
+// when the deadline is reached, a false positive is added. Note that one
+// detection matches all the expected detections that have not expired - i.e.,
+// one detection counts as multiple true positives.
+// If a detection is observed, but no prediction has been observed over the past
+// `history_size` calls to `Observe()`, then a false negative is added;
+// otherwise, a true negative is added.
+class ClippingPredictorEvaluator {
+ public:
+ // Ctor. `history_size` indicates how long to wait for a call to `Observe()`
+ // having `clipping_detected` set to true from the time clipping is predicted.
+ explicit ClippingPredictorEvaluator(int history_size);
+ ClippingPredictorEvaluator(const ClippingPredictorEvaluator&) = delete;
+ ClippingPredictorEvaluator& operator=(const ClippingPredictorEvaluator&) =
+ delete;
+ ~ClippingPredictorEvaluator();
+
+ // Observes whether clipping has been detected and/or if clipping is
+ // predicted. When predicted one or more detections are expected in the next
+ // `history_size_` calls of `Observe()`. When true positives are found returns
+ // the prediction interval between the earliest prediction and the detection.
+ absl::optional<int> Observe(bool clipping_detected, bool clipping_predicted);
+
+ // Removes any expectation recently set after a call to `Observe()` having
+ // `clipping_predicted` set to true.
+ void Reset();
+
+ // Metrics getters.
+ int true_positives() const { return true_positives_; }
+ int true_negatives() const { return true_negatives_; }
+ int false_positives() const { return false_positives_; }
+ int false_negatives() const { return false_negatives_; }
+
+ private:
+ const int history_size_;
+
+ // State of a detection expected to be observed after a prediction.
+ struct ExpectedDetection {
+ // Time to live (TTL); remaining number of `Observe()` calls to match a call
+ // having `clipping_detected` set to true.
+ int ttl;
+ // True if an `Observe()` call having `clipping_detected` set to true has
+ // been observed.
+ bool detected;
+ };
+ // Ring buffer of expected detections.
+ const int ring_buffer_capacity_;
+ std::vector<ExpectedDetection> ring_buffer_;
+ int ring_buffer_tail_;
+ int ring_buffer_size_;
+
+ // Pushes `expected_detection` into `expected_matches_ring_buffer_`.
+ void Push(ExpectedDetection expected_detection);
+ // Decreased the TTLs in `expected_matches_ring_buffer_` and removes expired
+ // items.
+ void DecreaseTimesToLive();
+ // Returns the prediction interval for the earliest unexpired expected
+ // detection if any.
+ absl::optional<int> FindEarliestPredictionInterval() const;
+ // Marks all the items in `expected_matches_ring_buffer_` as `detected` and
+ // returns the number of updated items.
+ int MarkExpectedDetectionAsDetected();
+ // Returns true if `expected_matches_ring_buffer_` has an item having `ttl`
+ // equal to 0 (expired) and `detected` equal to false (unmatched).
+ bool HasExpiredUnmatchedExpectedDetection() const;
+
+ // Metrics.
+ int true_positives_;
+ int true_negatives_;
+ int false_positives_;
+ int false_negatives_;
+};
+
+} // namespace webrtc
+
+#endif // MODULES_AUDIO_PROCESSING_AGC_CLIPPING_PREDICTOR_EVALUATOR_H_
diff --git a/modules/audio_processing/agc/clipping_predictor_evaluator_unittest.cc b/modules/audio_processing/agc/clipping_predictor_evaluator_unittest.cc
new file mode 100644
index 0000000000..1eb83eae61
--- /dev/null
+++ b/modules/audio_processing/agc/clipping_predictor_evaluator_unittest.cc
@@ -0,0 +1,568 @@
+/*
+ * Copyright (c) 2021 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/audio_processing/agc/clipping_predictor_evaluator.h"
+
+#include <cstdint>
+#include <memory>
+#include <tuple>
+#include <vector>
+
+#include "absl/types/optional.h"
+#include "rtc_base/numerics/safe_conversions.h"
+#include "rtc_base/random.h"
+#include "test/gmock.h"
+#include "test/gtest.h"
+
+namespace webrtc {
+namespace {
+
+using testing::Eq;
+using testing::Optional;
+
+constexpr bool kDetected = true;
+constexpr bool kNotDetected = false;
+
+constexpr bool kPredicted = true;
+constexpr bool kNotPredicted = false;
+
+int SumTrueFalsePositivesNegatives(
+ const ClippingPredictorEvaluator& evaluator) {
+ return evaluator.true_positives() + evaluator.true_negatives() +
+ evaluator.false_positives() + evaluator.false_negatives();
+}
+
+// Checks the metrics after init - i.e., no call to `Observe()`.
+TEST(ClippingPredictorEvaluatorTest, Init) {
+ ClippingPredictorEvaluator evaluator(/*history_size=*/3);
+ EXPECT_EQ(evaluator.true_positives(), 0);
+ EXPECT_EQ(evaluator.true_negatives(), 0);
+ EXPECT_EQ(evaluator.false_positives(), 0);
+ EXPECT_EQ(evaluator.false_negatives(), 0);
+}
+
+class ClippingPredictorEvaluatorParameterization
+ : public ::testing::TestWithParam<std::tuple<int, int>> {
+ protected:
+ uint64_t seed() const {
+ return rtc::checked_cast<uint64_t>(std::get<0>(GetParam()));
+ }
+ int history_size() const { return std::get<1>(GetParam()); }
+};
+
+// Checks that after each call to `Observe()` at most one metric changes.
+TEST_P(ClippingPredictorEvaluatorParameterization, AtMostOneMetricChanges) {
+ constexpr int kNumCalls = 123;
+ Random random_generator(seed());
+ ClippingPredictorEvaluator evaluator(history_size());
+
+ for (int i = 0; i < kNumCalls; ++i) {
+ SCOPED_TRACE(i);
+ // Read metrics before `Observe()` is called.
+ const int last_tp = evaluator.true_positives();
+ const int last_tn = evaluator.true_negatives();
+ const int last_fp = evaluator.false_positives();
+ const int last_fn = evaluator.false_negatives();
+ // `Observe()` a random observation.
+ bool clipping_detected = random_generator.Rand<bool>();
+ bool clipping_predicted = random_generator.Rand<bool>();
+ evaluator.Observe(clipping_detected, clipping_predicted);
+
+ // Check that at most one metric has changed.
+ int num_changes = 0;
+ num_changes += last_tp == evaluator.true_positives() ? 0 : 1;
+ num_changes += last_tn == evaluator.true_negatives() ? 0 : 1;
+ num_changes += last_fp == evaluator.false_positives() ? 0 : 1;
+ num_changes += last_fn == evaluator.false_negatives() ? 0 : 1;
+ EXPECT_GE(num_changes, 0);
+ EXPECT_LE(num_changes, 1);
+ }
+}
+
+// Checks that after each call to `Observe()` each metric either remains
+// unchanged or grows.
+TEST_P(ClippingPredictorEvaluatorParameterization, MetricsAreWeaklyMonotonic) {
+ constexpr int kNumCalls = 123;
+ Random random_generator(seed());
+ ClippingPredictorEvaluator evaluator(history_size());
+
+ for (int i = 0; i < kNumCalls; ++i) {
+ SCOPED_TRACE(i);
+ // Read metrics before `Observe()` is called.
+ const int last_tp = evaluator.true_positives();
+ const int last_tn = evaluator.true_negatives();
+ const int last_fp = evaluator.false_positives();
+ const int last_fn = evaluator.false_negatives();
+ // `Observe()` a random observation.
+ bool clipping_detected = random_generator.Rand<bool>();
+ bool clipping_predicted = random_generator.Rand<bool>();
+ evaluator.Observe(clipping_detected, clipping_predicted);
+
+ // Check that metrics are weakly monotonic.
+ EXPECT_GE(evaluator.true_positives(), last_tp);
+ EXPECT_GE(evaluator.true_negatives(), last_tn);
+ EXPECT_GE(evaluator.false_positives(), last_fp);
+ EXPECT_GE(evaluator.false_negatives(), last_fn);
+ }
+}
+
+// Checks that after each call to `Observe()` the growth speed of each metrics
+// is bounded.
+TEST_P(ClippingPredictorEvaluatorParameterization, BoundedMetricsGrowth) {
+ constexpr int kNumCalls = 123;
+ Random random_generator(seed());
+ ClippingPredictorEvaluator evaluator(history_size());
+
+ for (int i = 0; i < kNumCalls; ++i) {
+ SCOPED_TRACE(i);
+ // Read metrics before `Observe()` is called.
+ const int last_tp = evaluator.true_positives();
+ const int last_tn = evaluator.true_negatives();
+ const int last_fp = evaluator.false_positives();
+ const int last_fn = evaluator.false_negatives();
+ // `Observe()` a random observation.
+ bool clipping_detected = random_generator.Rand<bool>();
+ bool clipping_predicted = random_generator.Rand<bool>();
+ evaluator.Observe(clipping_detected, clipping_predicted);
+
+ // Check that TPs grow by at most `history_size() + 1`. Such an upper bound
+ // is reached when multiple predictions are matched by a single detection.
+ EXPECT_LE(evaluator.true_positives() - last_tp, history_size() + 1);
+ // Check that TNs, FPs and FNs grow by at most one. `max_growth`.
+ EXPECT_LE(evaluator.true_negatives() - last_tn, 1);
+ EXPECT_LE(evaluator.false_positives() - last_fp, 1);
+ EXPECT_LE(evaluator.false_negatives() - last_fn, 1);
+ }
+}
+
+// Checks that `Observe()` returns a prediction interval if and only if one or
+// more true positives are found.
+TEST_P(ClippingPredictorEvaluatorParameterization,
+ PredictionIntervalIfAndOnlyIfTruePositives) {
+ constexpr int kNumCalls = 123;
+ Random random_generator(seed());
+ ClippingPredictorEvaluator evaluator(history_size());
+
+ for (int i = 0; i < kNumCalls; ++i) {
+ SCOPED_TRACE(i);
+ // Read true positives before `Observe()` is called.
+ const int last_tp = evaluator.true_positives();
+ // `Observe()` a random observation.
+ bool clipping_detected = random_generator.Rand<bool>();
+ bool clipping_predicted = random_generator.Rand<bool>();
+ absl::optional<int> prediction_interval =
+ evaluator.Observe(clipping_detected, clipping_predicted);
+
+ // Check that the prediction interval is returned when a true positive is
+ // found.
+ if (evaluator.true_positives() == last_tp) {
+ EXPECT_FALSE(prediction_interval.has_value());
+ } else {
+ EXPECT_TRUE(prediction_interval.has_value());
+ }
+ }
+}
+
+INSTANTIATE_TEST_SUITE_P(
+ ClippingPredictorEvaluatorTest,
+ ClippingPredictorEvaluatorParameterization,
+ ::testing::Combine(::testing::Values(4, 8, 15, 16, 23, 42),
+ ::testing::Values(1, 10, 21)));
+
+// Checks that, observing a detection and a prediction after init, produces a
+// true positive.
+TEST(ClippingPredictorEvaluatorTest, OneTruePositiveAfterInit) {
+ ClippingPredictorEvaluator evaluator(/*history_size=*/3);
+ evaluator.Observe(kDetected, kPredicted);
+ EXPECT_EQ(evaluator.true_positives(), 1);
+
+ EXPECT_EQ(evaluator.true_negatives(), 0);
+ EXPECT_EQ(evaluator.false_positives(), 0);
+ EXPECT_EQ(evaluator.false_negatives(), 0);
+}
+
+// Checks that, observing a detection but no prediction after init, produces a
+// false negative.
+TEST(ClippingPredictorEvaluatorTest, OneFalseNegativeAfterInit) {
+ ClippingPredictorEvaluator evaluator(/*history_size=*/3);
+ evaluator.Observe(kDetected, kNotPredicted);
+ EXPECT_EQ(evaluator.false_negatives(), 1);
+
+ EXPECT_EQ(evaluator.true_positives(), 0);
+ EXPECT_EQ(evaluator.true_negatives(), 0);
+ EXPECT_EQ(evaluator.false_positives(), 0);
+}
+
+// Checks that, observing no detection but a prediction after init, produces a
+// false positive after expiration.
+TEST(ClippingPredictorEvaluatorTest, OneFalsePositiveAfterInit) {
+ ClippingPredictorEvaluator evaluator(/*history_size=*/3);
+ evaluator.Observe(kNotDetected, kPredicted);
+ EXPECT_EQ(evaluator.false_positives(), 0);
+ evaluator.Observe(kNotDetected, kNotPredicted);
+ evaluator.Observe(kNotDetected, kNotPredicted);
+ evaluator.Observe(kNotDetected, kNotPredicted);
+ EXPECT_EQ(evaluator.false_positives(), 1);
+
+ EXPECT_EQ(evaluator.true_positives(), 0);
+ EXPECT_EQ(evaluator.true_negatives(), 0);
+ EXPECT_EQ(evaluator.false_negatives(), 0);
+}
+
+// Checks that, observing no detection and no prediction after init, produces a
+// true negative.
+TEST(ClippingPredictorEvaluatorTest, OneTrueNegativeAfterInit) {
+ ClippingPredictorEvaluator evaluator(/*history_size=*/3);
+ evaluator.Observe(kNotDetected, kNotPredicted);
+ EXPECT_EQ(evaluator.true_negatives(), 1);
+
+ EXPECT_EQ(evaluator.true_positives(), 0);
+ EXPECT_EQ(evaluator.false_positives(), 0);
+ EXPECT_EQ(evaluator.false_negatives(), 0);
+}
+
+// Checks that the evaluator detects true negatives when clipping is neither
+// predicted nor detected.
+TEST(ClippingPredictorEvaluatorTest, NeverDetectedAndNotPredicted) {
+ ClippingPredictorEvaluator evaluator(/*history_size=*/3);
+ evaluator.Observe(kNotDetected, kNotPredicted);
+ evaluator.Observe(kNotDetected, kNotPredicted);
+ evaluator.Observe(kNotDetected, kNotPredicted);
+ evaluator.Observe(kNotDetected, kNotPredicted);
+ EXPECT_EQ(evaluator.true_negatives(), 4);
+
+ EXPECT_EQ(evaluator.true_positives(), 0);
+ EXPECT_EQ(evaluator.false_positives(), 0);
+ EXPECT_EQ(evaluator.false_negatives(), 0);
+}
+
+// Checks that the evaluator detects a false negative when clipping is detected
+// but not predicted.
+TEST(ClippingPredictorEvaluatorTest, DetectedButNotPredicted) {
+ ClippingPredictorEvaluator evaluator(/*history_size=*/3);
+ evaluator.Observe(kNotDetected, kNotPredicted);
+ evaluator.Observe(kNotDetected, kNotPredicted);
+ evaluator.Observe(kNotDetected, kNotPredicted);
+ evaluator.Observe(kDetected, kNotPredicted);
+ EXPECT_EQ(evaluator.false_negatives(), 1);
+
+ EXPECT_EQ(evaluator.true_positives(), 0);
+ EXPECT_EQ(evaluator.true_negatives(), 3);
+ EXPECT_EQ(evaluator.false_positives(), 0);
+}
+
+// Checks that the evaluator does not detect a false positive when clipping is
+// predicted but not detected until the observation period expires.
+TEST(ClippingPredictorEvaluatorTest,
+ PredictedOnceAndNeverDetectedBeforeDeadline) {
+ ClippingPredictorEvaluator evaluator(/*history_size=*/3);
+ evaluator.Observe(kNotDetected, kPredicted);
+ evaluator.Observe(kNotDetected, kNotPredicted);
+ EXPECT_EQ(evaluator.false_positives(), 0);
+ evaluator.Observe(kNotDetected, kNotPredicted);
+ EXPECT_EQ(evaluator.false_positives(), 0);
+ evaluator.Observe(kNotDetected, kNotPredicted);
+ EXPECT_EQ(evaluator.false_positives(), 1);
+
+ EXPECT_EQ(evaluator.true_positives(), 0);
+ EXPECT_EQ(evaluator.true_negatives(), 0);
+ EXPECT_EQ(evaluator.false_negatives(), 0);
+}
+
+// Checks that the evaluator detects a false positive when clipping is predicted
+// but detected after the observation period expires.
+TEST(ClippingPredictorEvaluatorTest, PredictedOnceButDetectedAfterDeadline) {
+ ClippingPredictorEvaluator evaluator(/*history_size=*/3);
+ evaluator.Observe(kNotDetected, kPredicted);
+ evaluator.Observe(kNotDetected, kNotPredicted);
+ evaluator.Observe(kNotDetected, kNotPredicted);
+ evaluator.Observe(kNotDetected, kNotPredicted);
+ evaluator.Observe(kDetected, kNotPredicted);
+ EXPECT_EQ(evaluator.false_positives(), 1);
+
+ EXPECT_EQ(evaluator.true_positives(), 0);
+ EXPECT_EQ(evaluator.true_negatives(), 0);
+ EXPECT_EQ(evaluator.false_negatives(), 1);
+}
+
+// Checks that a prediction followed by a detection counts as true positive.
+TEST(ClippingPredictorEvaluatorTest, PredictedOnceAndThenImmediatelyDetected) {
+ ClippingPredictorEvaluator evaluator(/*history_size=*/3);
+ evaluator.Observe(kNotDetected, kPredicted);
+ EXPECT_EQ(evaluator.false_positives(), 0);
+ evaluator.Observe(kDetected, kNotPredicted);
+ EXPECT_EQ(evaluator.true_positives(), 1);
+
+ EXPECT_EQ(evaluator.true_negatives(), 0);
+ EXPECT_EQ(evaluator.false_positives(), 0);
+ EXPECT_EQ(evaluator.false_negatives(), 0);
+}
+
+// Checks that a prediction followed by a delayed detection counts as true
+// positive if the delay is within the observation period.
+TEST(ClippingPredictorEvaluatorTest, PredictedOnceAndDetectedBeforeDeadline) {
+ ClippingPredictorEvaluator evaluator(/*history_size=*/3);
+ evaluator.Observe(kNotDetected, kPredicted);
+ EXPECT_EQ(evaluator.false_positives(), 0);
+ evaluator.Observe(kNotDetected, kNotPredicted);
+ EXPECT_EQ(evaluator.false_positives(), 0);
+ evaluator.Observe(kDetected, kNotPredicted);
+ EXPECT_EQ(evaluator.true_positives(), 1);
+
+ EXPECT_EQ(evaluator.true_negatives(), 0);
+ EXPECT_EQ(evaluator.false_positives(), 0);
+ EXPECT_EQ(evaluator.false_negatives(), 0);
+}
+
+// Checks that a prediction followed by a delayed detection counts as true
+// positive if the delay equals the observation period.
+TEST(ClippingPredictorEvaluatorTest, PredictedOnceAndDetectedAtDeadline) {
+ ClippingPredictorEvaluator evaluator(/*history_size=*/3);
+ evaluator.Observe(kNotDetected, kPredicted);
+ EXPECT_EQ(evaluator.false_positives(), 0);
+ evaluator.Observe(kNotDetected, kNotPredicted);
+ EXPECT_EQ(evaluator.false_positives(), 0);
+ evaluator.Observe(kNotDetected, kNotPredicted);
+ EXPECT_EQ(evaluator.false_positives(), 0);
+ evaluator.Observe(kDetected, kNotPredicted);
+ EXPECT_EQ(evaluator.true_positives(), 1);
+
+ EXPECT_EQ(evaluator.true_negatives(), 0);
+ EXPECT_EQ(evaluator.false_positives(), 0);
+ EXPECT_EQ(evaluator.false_negatives(), 0);
+}
+
+// Checks that a prediction followed by a multiple adjacent detections within
+// the deadline counts as a single true positive and that, after the deadline,
+// a detection counts as a false negative.
+TEST(ClippingPredictorEvaluatorTest, PredictedOnceAndDetectedMultipleTimes) {
+ ClippingPredictorEvaluator evaluator(/*history_size=*/3);
+ evaluator.Observe(kNotDetected, kPredicted);
+ evaluator.Observe(kNotDetected, kNotPredicted);
+ // Multiple detections.
+ evaluator.Observe(kDetected, kNotPredicted);
+ EXPECT_EQ(evaluator.true_positives(), 1);
+ evaluator.Observe(kDetected, kNotPredicted);
+ EXPECT_EQ(evaluator.true_positives(), 1);
+ // A detection outside of the observation period counts as false negative.
+ evaluator.Observe(kDetected, kNotPredicted);
+ EXPECT_EQ(evaluator.false_negatives(), 1);
+ EXPECT_EQ(SumTrueFalsePositivesNegatives(evaluator), 2);
+
+ EXPECT_EQ(evaluator.true_negatives(), 0);
+ EXPECT_EQ(evaluator.false_positives(), 0);
+}
+
+// Checks that a false positive is added when clipping is detected after a too
+// early prediction.
+TEST(ClippingPredictorEvaluatorTest,
+ PredictedMultipleTimesAndDetectedOnceAfterDeadline) {
+ ClippingPredictorEvaluator evaluator(/*history_size=*/3);
+ evaluator.Observe(kNotDetected, kPredicted); // ---+
+ evaluator.Observe(kNotDetected, kPredicted); // |
+ evaluator.Observe(kNotDetected, kPredicted); // |
+ evaluator.Observe(kNotDetected, kPredicted); // <--+ Not matched.
+ // The time to match a detection after the first prediction expired.
+ EXPECT_EQ(evaluator.false_positives(), 1);
+ evaluator.Observe(kDetected, kNotPredicted);
+ // The detection above does not match the first prediction because it happened
+ // after the deadline of the 1st prediction.
+ EXPECT_EQ(evaluator.false_positives(), 1);
+
+ EXPECT_EQ(evaluator.true_positives(), 3);
+ EXPECT_EQ(evaluator.true_negatives(), 0);
+ EXPECT_EQ(evaluator.false_negatives(), 0);
+}
+
+// Checks that multiple consecutive predictions match the first detection
+// observed before the expected detection deadline expires.
+TEST(ClippingPredictorEvaluatorTest, PredictedMultipleTimesAndDetectedOnce) {
+ ClippingPredictorEvaluator evaluator(/*history_size=*/3);
+ evaluator.Observe(kNotDetected, kPredicted); // --+
+ evaluator.Observe(kNotDetected, kPredicted); // | --+
+ evaluator.Observe(kNotDetected, kPredicted); // | | --+
+ evaluator.Observe(kDetected, kNotPredicted); // <-+ <-+ <-+
+ EXPECT_EQ(evaluator.true_positives(), 3);
+ // The following observations do not generate any true negatives as they
+ // belong to the observation period of the last prediction - for which a
+ // detection has already been matched.
+ const int true_negatives = evaluator.true_negatives();
+ evaluator.Observe(kNotDetected, kNotPredicted);
+ evaluator.Observe(kNotDetected, kNotPredicted);
+ EXPECT_EQ(evaluator.true_negatives(), true_negatives);
+
+ EXPECT_EQ(evaluator.true_negatives(), 0);
+ EXPECT_EQ(evaluator.false_positives(), 0);
+ EXPECT_EQ(evaluator.false_negatives(), 0);
+}
+
+// Checks that multiple consecutive predictions match the multiple detections
+// observed before the expected detection deadline expires.
+TEST(ClippingPredictorEvaluatorTest,
+ PredictedMultipleTimesAndDetectedMultipleTimes) {
+ ClippingPredictorEvaluator evaluator(/*history_size=*/3);
+ evaluator.Observe(kNotDetected, kPredicted); // --+
+ evaluator.Observe(kNotDetected, kPredicted); // | --+
+ evaluator.Observe(kNotDetected, kPredicted); // | | --+
+ evaluator.Observe(kDetected, kNotPredicted); // <-+ <-+ <-+
+ evaluator.Observe(kDetected, kNotPredicted); // <-+ <-+
+ EXPECT_EQ(evaluator.true_positives(), 3);
+ // The following observation does not generate a true negative as it belongs
+ // to the observation period of the last prediction - for which two detections
+ // have already been matched.
+ const int true_negatives = evaluator.true_negatives();
+ evaluator.Observe(kNotDetected, kNotPredicted);
+ EXPECT_EQ(evaluator.true_negatives(), true_negatives);
+
+ EXPECT_EQ(evaluator.true_negatives(), 0);
+ EXPECT_EQ(evaluator.false_positives(), 0);
+ EXPECT_EQ(evaluator.false_negatives(), 0);
+}
+
+// Checks that multiple consecutive predictions match all the detections
+// observed before the expected detection deadline expires.
+TEST(ClippingPredictorEvaluatorTest, PredictedMultipleTimesAndAllDetected) {
+ ClippingPredictorEvaluator evaluator(/*history_size=*/3);
+ evaluator.Observe(kNotDetected, kPredicted); // --+
+ evaluator.Observe(kNotDetected, kPredicted); // | --+
+ evaluator.Observe(kNotDetected, kPredicted); // | | --+
+ evaluator.Observe(kDetected, kNotPredicted); // <-+ <-+ <-+
+ evaluator.Observe(kDetected, kNotPredicted); // <-+ <-+
+ evaluator.Observe(kDetected, kNotPredicted); // <-+
+ EXPECT_EQ(evaluator.true_positives(), 3);
+ EXPECT_EQ(evaluator.true_negatives(), 0);
+ EXPECT_EQ(evaluator.false_positives(), 0);
+ EXPECT_EQ(evaluator.false_negatives(), 0);
+}
+
+// Checks that multiple non-consecutive predictions match all the detections
+// observed before the expected detection deadline expires.
+TEST(ClippingPredictorEvaluatorTest,
+ PredictedMultipleTimesWithGapAndAllDetected) {
+ ClippingPredictorEvaluator evaluator(/*history_size=*/3);
+ evaluator.Observe(kNotDetected, kPredicted); // --+
+ evaluator.Observe(kNotDetected, kNotPredicted); // |
+ evaluator.Observe(kNotDetected, kPredicted); // | --+
+ evaluator.Observe(kDetected, kNotPredicted); // <-+ <-+
+ evaluator.Observe(kDetected, kNotPredicted); // <-+
+ evaluator.Observe(kDetected, kNotPredicted); // <-+
+ EXPECT_EQ(evaluator.true_positives(), 2);
+ EXPECT_EQ(evaluator.true_negatives(), 0);
+ EXPECT_EQ(evaluator.false_positives(), 0);
+ EXPECT_EQ(evaluator.false_negatives(), 0);
+}
+
+class ClippingPredictorEvaluatorPredictionIntervalParameterization
+ : public ::testing::TestWithParam<std::tuple<int, int>> {
+ protected:
+ int num_extra_observe_calls() const { return std::get<0>(GetParam()); }
+ int history_size() const { return std::get<1>(GetParam()); }
+};
+
+// Checks that the minimum prediction interval is returned if clipping is
+// correctly predicted as soon as detected - i.e., no anticipation.
+TEST_P(ClippingPredictorEvaluatorPredictionIntervalParameterization,
+ MinimumPredictionInterval) {
+ ClippingPredictorEvaluator evaluator(history_size());
+ for (int i = 0; i < num_extra_observe_calls(); ++i) {
+ EXPECT_EQ(evaluator.Observe(kNotDetected, kNotPredicted), absl::nullopt);
+ }
+ absl::optional<int> prediction_interval =
+ evaluator.Observe(kDetected, kPredicted);
+ EXPECT_THAT(prediction_interval, Optional(Eq(0)));
+}
+
+// Checks that a prediction interval between the minimum and the maximum is
+// returned if clipping is correctly predicted before it is detected but not as
+// early as possible.
+TEST_P(ClippingPredictorEvaluatorPredictionIntervalParameterization,
+ IntermediatePredictionInterval) {
+ ClippingPredictorEvaluator evaluator(history_size());
+ for (int i = 0; i < num_extra_observe_calls(); ++i) {
+ EXPECT_EQ(evaluator.Observe(kNotDetected, kNotPredicted), absl::nullopt);
+ }
+ EXPECT_EQ(evaluator.Observe(kNotDetected, kPredicted), absl::nullopt);
+ EXPECT_EQ(evaluator.Observe(kNotDetected, kPredicted), absl::nullopt);
+ EXPECT_EQ(evaluator.Observe(kNotDetected, kPredicted), absl::nullopt);
+ absl::optional<int> prediction_interval =
+ evaluator.Observe(kDetected, kPredicted);
+ EXPECT_THAT(prediction_interval, Optional(Eq(3)));
+}
+
+// Checks that the maximum prediction interval is returned if clipping is
+// correctly predicted as early as possible.
+TEST_P(ClippingPredictorEvaluatorPredictionIntervalParameterization,
+ MaximumPredictionInterval) {
+ ClippingPredictorEvaluator evaluator(history_size());
+ for (int i = 0; i < num_extra_observe_calls(); ++i) {
+ EXPECT_EQ(evaluator.Observe(kNotDetected, kNotPredicted), absl::nullopt);
+ }
+ for (int i = 0; i < history_size(); ++i) {
+ EXPECT_EQ(evaluator.Observe(kNotDetected, kPredicted), absl::nullopt);
+ }
+ absl::optional<int> prediction_interval =
+ evaluator.Observe(kDetected, kPredicted);
+ EXPECT_THAT(prediction_interval, Optional(Eq(history_size())));
+}
+
+// Checks that `Observe()` returns the prediction interval as soon as a true
+// positive is found and never again while ongoing detections are matched to a
+// previously observed prediction.
+TEST_P(ClippingPredictorEvaluatorPredictionIntervalParameterization,
+ PredictionIntervalReturnedOnce) {
+ ASSERT_LT(num_extra_observe_calls(), history_size());
+ ClippingPredictorEvaluator evaluator(history_size());
+ // Observe predictions before detection.
+ for (int i = 0; i < num_extra_observe_calls(); ++i) {
+ EXPECT_EQ(evaluator.Observe(kNotDetected, kPredicted), absl::nullopt);
+ }
+ // Observe a detection.
+ absl::optional<int> prediction_interval =
+ evaluator.Observe(kDetected, kPredicted);
+ EXPECT_TRUE(prediction_interval.has_value());
+ // `Observe()` does not return a prediction interval anymore during ongoing
+ // detections observed while a detection is still expected.
+ for (int i = 0; i < history_size(); ++i) {
+ EXPECT_EQ(evaluator.Observe(kDetected, kNotPredicted), absl::nullopt);
+ }
+}
+
+INSTANTIATE_TEST_SUITE_P(
+ ClippingPredictorEvaluatorTest,
+ ClippingPredictorEvaluatorPredictionIntervalParameterization,
+ ::testing::Combine(::testing::Values(0, 3, 5), ::testing::Values(7, 11)));
+
+// Checks that, when a detection is expected, the expectation is removed if and
+// only if `Reset()` is called after a prediction is observed.
+TEST(ClippingPredictorEvaluatorTest, NoFalsePositivesAfterReset) {
+ constexpr int kHistorySize = 2;
+
+ ClippingPredictorEvaluator with_reset(kHistorySize);
+ with_reset.Observe(kNotDetected, kPredicted);
+ with_reset.Reset();
+ with_reset.Observe(kNotDetected, kNotPredicted);
+ with_reset.Observe(kNotDetected, kNotPredicted);
+ EXPECT_EQ(with_reset.true_positives(), 0);
+ EXPECT_EQ(with_reset.true_negatives(), 2);
+ EXPECT_EQ(with_reset.false_positives(), 0);
+ EXPECT_EQ(with_reset.false_negatives(), 0);
+
+ ClippingPredictorEvaluator no_reset(kHistorySize);
+ no_reset.Observe(kNotDetected, kPredicted);
+ no_reset.Observe(kNotDetected, kNotPredicted);
+ no_reset.Observe(kNotDetected, kNotPredicted);
+ EXPECT_EQ(no_reset.true_positives(), 0);
+ EXPECT_EQ(no_reset.true_negatives(), 0);
+ EXPECT_EQ(no_reset.false_positives(), 1);
+ EXPECT_EQ(no_reset.false_negatives(), 0);
+}
+
+} // namespace
+} // namespace webrtc
diff --git a/modules/audio_processing/agc/clipping_predictor_level_buffer.cc b/modules/audio_processing/agc/clipping_predictor_level_buffer.cc
new file mode 100644
index 0000000000..bc33cda040
--- /dev/null
+++ b/modules/audio_processing/agc/clipping_predictor_level_buffer.cc
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2021 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/audio_processing/agc/clipping_predictor_level_buffer.h"
+
+#include <algorithm>
+#include <cmath>
+
+#include "rtc_base/checks.h"
+#include "rtc_base/logging.h"
+
+namespace webrtc {
+
+bool ClippingPredictorLevelBuffer::Level::operator==(const Level& level) const {
+ constexpr float kEpsilon = 1e-6f;
+ return std::fabs(average - level.average) < kEpsilon &&
+ std::fabs(max - level.max) < kEpsilon;
+}
+
+ClippingPredictorLevelBuffer::ClippingPredictorLevelBuffer(int capacity)
+ : tail_(-1), size_(0), data_(std::max(1, capacity)) {
+ if (capacity > kMaxCapacity) {
+ RTC_LOG(LS_WARNING) << "[agc]: ClippingPredictorLevelBuffer exceeds the "
+ << "maximum allowed capacity. Capacity: " << capacity;
+ }
+ RTC_DCHECK(!data_.empty());
+}
+
+void ClippingPredictorLevelBuffer::Reset() {
+ tail_ = -1;
+ size_ = 0;
+}
+
+void ClippingPredictorLevelBuffer::Push(Level level) {
+ ++tail_;
+ if (tail_ == Capacity()) {
+ tail_ = 0;
+ }
+ if (size_ < Capacity()) {
+ size_++;
+ }
+ data_[tail_] = level;
+}
+
+// TODO(bugs.webrtc.org/12774): Optimize partial computation for long buffers.
+absl::optional<ClippingPredictorLevelBuffer::Level>
+ClippingPredictorLevelBuffer::ComputePartialMetrics(int delay,
+ int num_items) const {
+ RTC_DCHECK_GE(delay, 0);
+ RTC_DCHECK_LT(delay, Capacity());
+ RTC_DCHECK_GT(num_items, 0);
+ RTC_DCHECK_LE(num_items, Capacity());
+ RTC_DCHECK_LE(delay + num_items, Capacity());
+ if (delay + num_items > Size()) {
+ return absl::nullopt;
+ }
+ float sum = 0.0f;
+ float max = 0.0f;
+ for (int i = 0; i < num_items && i < Size(); ++i) {
+ int idx = tail_ - delay - i;
+ if (idx < 0) {
+ idx += Capacity();
+ }
+ sum += data_[idx].average;
+ max = std::fmax(data_[idx].max, max);
+ }
+ return absl::optional<Level>({sum / static_cast<float>(num_items), max});
+}
+
+} // namespace webrtc
diff --git a/modules/audio_processing/agc/clipping_predictor_level_buffer.h b/modules/audio_processing/agc/clipping_predictor_level_buffer.h
new file mode 100644
index 0000000000..f3e8368194
--- /dev/null
+++ b/modules/audio_processing/agc/clipping_predictor_level_buffer.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2021 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef MODULES_AUDIO_PROCESSING_AGC_CLIPPING_PREDICTOR_LEVEL_BUFFER_H_
+#define MODULES_AUDIO_PROCESSING_AGC_CLIPPING_PREDICTOR_LEVEL_BUFFER_H_
+
+#include <memory>
+#include <vector>
+
+#include "absl/types/optional.h"
+
+namespace webrtc {
+
+// A circular buffer to store frame-wise `Level` items for clipping prediction.
+// The current implementation is not optimized for large buffer lengths.
+class ClippingPredictorLevelBuffer {
+ public:
+ struct Level {
+ float average;
+ float max;
+ bool operator==(const Level& level) const;
+ };
+
+ // Recommended maximum capacity. It is possible to create a buffer with a
+ // larger capacity, but the implementation is not optimized for large values.
+ static constexpr int kMaxCapacity = 100;
+
+ // Ctor. Sets the buffer capacity to max(1, `capacity`) and logs a warning
+ // message if the capacity is greater than `kMaxCapacity`.
+ explicit ClippingPredictorLevelBuffer(int capacity);
+ ~ClippingPredictorLevelBuffer() {}
+ ClippingPredictorLevelBuffer(const ClippingPredictorLevelBuffer&) = delete;
+ ClippingPredictorLevelBuffer& operator=(const ClippingPredictorLevelBuffer&) =
+ delete;
+
+ void Reset();
+
+ // Returns the current number of items stored in the buffer.
+ int Size() const { return size_; }
+
+ // Returns the capacity of the buffer.
+ int Capacity() const { return data_.size(); }
+
+ // Adds a `level` item into the circular buffer `data_`. Stores at most
+ // `Capacity()` items. If more items are pushed, the new item replaces the
+ // least recently pushed item.
+ void Push(Level level);
+
+ // If at least `num_items` + `delay` items have been pushed, returns the
+ // average and maximum value for the `num_items` most recently pushed items
+ // from `delay` to `delay` - `num_items` (a delay equal to zero corresponds
+ // to the most recently pushed item). The value of `delay` is limited to
+ // [0, N] and `num_items` to [1, M] where N + M is the capacity of the buffer.
+ absl::optional<Level> ComputePartialMetrics(int delay, int num_items) const;
+
+ private:
+ int tail_;
+ int size_;
+ std::vector<Level> data_;
+};
+
+} // namespace webrtc
+
+#endif // MODULES_AUDIO_PROCESSING_AGC_CLIPPING_PREDICTOR_LEVEL_BUFFER_H_
diff --git a/modules/audio_processing/agc/clipping_predictor_level_buffer_unittest.cc b/modules/audio_processing/agc/clipping_predictor_level_buffer_unittest.cc
new file mode 100644
index 0000000000..7e594a1eca
--- /dev/null
+++ b/modules/audio_processing/agc/clipping_predictor_level_buffer_unittest.cc
@@ -0,0 +1,131 @@
+/*
+ * Copyright (c) 2021 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/audio_processing/agc/clipping_predictor_level_buffer.h"
+
+#include <algorithm>
+
+#include "test/gmock.h"
+#include "test/gtest.h"
+
+namespace webrtc {
+namespace {
+
+using ::testing::Eq;
+using ::testing::Optional;
+
+class ClippingPredictorLevelBufferParametrization
+ : public ::testing::TestWithParam<int> {
+ protected:
+ int capacity() const { return GetParam(); }
+};
+
+TEST_P(ClippingPredictorLevelBufferParametrization, CheckEmptyBufferSize) {
+ ClippingPredictorLevelBuffer buffer(capacity());
+ EXPECT_EQ(buffer.Capacity(), std::max(capacity(), 1));
+ EXPECT_EQ(buffer.Size(), 0);
+}
+
+TEST_P(ClippingPredictorLevelBufferParametrization, CheckHalfEmptyBufferSize) {
+ ClippingPredictorLevelBuffer buffer(capacity());
+ for (int i = 0; i < buffer.Capacity() / 2; ++i) {
+ buffer.Push({2, 4});
+ }
+ EXPECT_EQ(buffer.Capacity(), std::max(capacity(), 1));
+ EXPECT_EQ(buffer.Size(), std::max(capacity(), 1) / 2);
+}
+
+TEST_P(ClippingPredictorLevelBufferParametrization, CheckFullBufferSize) {
+ ClippingPredictorLevelBuffer buffer(capacity());
+ for (int i = 0; i < buffer.Capacity(); ++i) {
+ buffer.Push({2, 4});
+ }
+ EXPECT_EQ(buffer.Capacity(), std::max(capacity(), 1));
+ EXPECT_EQ(buffer.Size(), std::max(capacity(), 1));
+}
+
+TEST_P(ClippingPredictorLevelBufferParametrization, CheckLargeBufferSize) {
+ ClippingPredictorLevelBuffer buffer(capacity());
+ for (int i = 0; i < 2 * buffer.Capacity(); ++i) {
+ buffer.Push({2, 4});
+ }
+ EXPECT_EQ(buffer.Capacity(), std::max(capacity(), 1));
+ EXPECT_EQ(buffer.Size(), std::max(capacity(), 1));
+}
+
+TEST_P(ClippingPredictorLevelBufferParametrization, CheckSizeAfterReset) {
+ ClippingPredictorLevelBuffer buffer(capacity());
+ buffer.Push({1, 1});
+ buffer.Push({1, 1});
+ buffer.Reset();
+ EXPECT_EQ(buffer.Capacity(), std::max(capacity(), 1));
+ EXPECT_EQ(buffer.Size(), 0);
+ buffer.Push({1, 1});
+ EXPECT_EQ(buffer.Capacity(), std::max(capacity(), 1));
+ EXPECT_EQ(buffer.Size(), 1);
+}
+
+INSTANTIATE_TEST_SUITE_P(ClippingPredictorLevelBufferTest,
+ ClippingPredictorLevelBufferParametrization,
+ ::testing::Values(-1, 0, 1, 123));
+
+TEST(ClippingPredictorLevelBufferTest, CheckMetricsAfterFullBuffer) {
+ ClippingPredictorLevelBuffer buffer(/*capacity=*/2);
+ buffer.Push({1, 2});
+ buffer.Push({3, 6});
+ EXPECT_THAT(buffer.ComputePartialMetrics(/*delay=*/0, /*num_items=*/1),
+ Optional(Eq(ClippingPredictorLevelBuffer::Level{3, 6})));
+ EXPECT_THAT(buffer.ComputePartialMetrics(/*delay=*/1, /*num_items=*/1),
+ Optional(Eq(ClippingPredictorLevelBuffer::Level{1, 2})));
+ EXPECT_THAT(buffer.ComputePartialMetrics(/*delay=*/0, /*num_items=*/2),
+ Optional(Eq(ClippingPredictorLevelBuffer::Level{2, 6})));
+}
+
+TEST(ClippingPredictorLevelBufferTest, CheckMetricsAfterPushBeyondCapacity) {
+ ClippingPredictorLevelBuffer buffer(/*capacity=*/2);
+ buffer.Push({1, 1});
+ buffer.Push({3, 6});
+ buffer.Push({5, 10});
+ buffer.Push({7, 14});
+ buffer.Push({6, 12});
+ EXPECT_THAT(buffer.ComputePartialMetrics(/*delay=*/0, /*num_items=*/1),
+ Optional(Eq(ClippingPredictorLevelBuffer::Level{6, 12})));
+ EXPECT_THAT(buffer.ComputePartialMetrics(/*delay=*/1, /*num_items=*/1),
+ Optional(Eq(ClippingPredictorLevelBuffer::Level{7, 14})));
+ EXPECT_THAT(buffer.ComputePartialMetrics(/*delay=*/0, /*num_items=*/2),
+ Optional(Eq(ClippingPredictorLevelBuffer::Level{6.5f, 14})));
+}
+
+TEST(ClippingPredictorLevelBufferTest, CheckMetricsAfterTooFewItems) {
+ ClippingPredictorLevelBuffer buffer(/*capacity=*/4);
+ buffer.Push({1, 2});
+ buffer.Push({3, 6});
+ EXPECT_EQ(buffer.ComputePartialMetrics(/*delay=*/0, /*num_items=*/3),
+ absl::nullopt);
+ EXPECT_EQ(buffer.ComputePartialMetrics(/*delay=*/2, /*num_items=*/1),
+ absl::nullopt);
+}
+
+TEST(ClippingPredictorLevelBufferTest, CheckMetricsAfterReset) {
+ ClippingPredictorLevelBuffer buffer(/*capacity=*/2);
+ buffer.Push({1, 2});
+ buffer.Reset();
+ buffer.Push({5, 10});
+ buffer.Push({7, 14});
+ EXPECT_THAT(buffer.ComputePartialMetrics(/*delay=*/0, /*num_items=*/1),
+ Optional(Eq(ClippingPredictorLevelBuffer::Level{7, 14})));
+ EXPECT_THAT(buffer.ComputePartialMetrics(/*delay=*/0, /*num_items=*/2),
+ Optional(Eq(ClippingPredictorLevelBuffer::Level{6, 14})));
+ EXPECT_THAT(buffer.ComputePartialMetrics(/*delay=*/1, /*num_items=*/1),
+ Optional(Eq(ClippingPredictorLevelBuffer::Level{5, 10})));
+}
+
+} // namespace
+} // namespace webrtc
diff --git a/modules/audio_processing/agc/clipping_predictor_unittest.cc b/modules/audio_processing/agc/clipping_predictor_unittest.cc
new file mode 100644
index 0000000000..e848e1a724
--- /dev/null
+++ b/modules/audio_processing/agc/clipping_predictor_unittest.cc
@@ -0,0 +1,491 @@
+/*
+ * Copyright (c) 2021 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/audio_processing/agc/clipping_predictor.h"
+
+#include <cstdint>
+#include <limits>
+#include <tuple>
+
+#include "rtc_base/checks.h"
+#include "test/gmock.h"
+#include "test/gtest.h"
+
+namespace webrtc {
+namespace {
+
+using ::testing::Eq;
+using ::testing::Optional;
+using ClippingPredictorConfig = AudioProcessing::Config::GainController1::
+ AnalogGainController::ClippingPredictor;
+using ClippingPredictorMode = AudioProcessing::Config::GainController1::
+ AnalogGainController::ClippingPredictor::Mode;
+
+constexpr int kSampleRateHz = 32000;
+constexpr int kNumChannels = 1;
+constexpr int kSamplesPerChannel = kSampleRateHz / 100;
+constexpr int kMaxMicLevel = 255;
+constexpr int kMinMicLevel = 12;
+constexpr int kDefaultClippedLevelStep = 15;
+constexpr float kMaxSampleS16 =
+ static_cast<float>(std::numeric_limits<int16_t>::max());
+
+// Threshold in dB corresponding to a signal with an amplitude equal to 99% of
+// the dynamic range - i.e., computed as `20*log10(0.99)`.
+constexpr float kClippingThresholdDb = -0.08729610804900176f;
+
+void CallAnalyze(int num_calls,
+ const AudioFrameView<const float>& frame,
+ ClippingPredictor& predictor) {
+ for (int i = 0; i < num_calls; ++i) {
+ predictor.Analyze(frame);
+ }
+}
+
+// Creates and analyzes an audio frame with a non-zero (approx. 4.15dB) crest
+// factor.
+void AnalyzeNonZeroCrestFactorAudio(int num_calls,
+ int num_channels,
+ float peak_ratio,
+ ClippingPredictor& predictor) {
+ RTC_DCHECK_GT(num_calls, 0);
+ RTC_DCHECK_GT(num_channels, 0);
+ RTC_DCHECK_LE(peak_ratio, 1.0f);
+ std::vector<float*> audio(num_channels);
+ std::vector<float> audio_data(num_channels * kSamplesPerChannel, 0.0f);
+ for (int channel = 0; channel < num_channels; ++channel) {
+ audio[channel] = &audio_data[channel * kSamplesPerChannel];
+ for (int sample = 0; sample < kSamplesPerChannel; sample += 10) {
+ audio[channel][sample] = 0.1f * peak_ratio * kMaxSampleS16;
+ audio[channel][sample + 1] = 0.2f * peak_ratio * kMaxSampleS16;
+ audio[channel][sample + 2] = 0.3f * peak_ratio * kMaxSampleS16;
+ audio[channel][sample + 3] = 0.4f * peak_ratio * kMaxSampleS16;
+ audio[channel][sample + 4] = 0.5f * peak_ratio * kMaxSampleS16;
+ audio[channel][sample + 5] = 0.6f * peak_ratio * kMaxSampleS16;
+ audio[channel][sample + 6] = 0.7f * peak_ratio * kMaxSampleS16;
+ audio[channel][sample + 7] = 0.8f * peak_ratio * kMaxSampleS16;
+ audio[channel][sample + 8] = 0.9f * peak_ratio * kMaxSampleS16;
+ audio[channel][sample + 9] = 1.0f * peak_ratio * kMaxSampleS16;
+ }
+ }
+ AudioFrameView<const float> frame(audio.data(), num_channels,
+ kSamplesPerChannel);
+ CallAnalyze(num_calls, frame, predictor);
+}
+
+void CheckChannelEstimatesWithValue(int num_channels,
+ int level,
+ int default_step,
+ int min_mic_level,
+ int max_mic_level,
+ const ClippingPredictor& predictor,
+ int expected) {
+ for (int i = 0; i < num_channels; ++i) {
+ SCOPED_TRACE(i);
+ EXPECT_THAT(predictor.EstimateClippedLevelStep(
+ i, level, default_step, min_mic_level, max_mic_level),
+ Optional(Eq(expected)));
+ }
+}
+
+void CheckChannelEstimatesWithoutValue(int num_channels,
+ int level,
+ int default_step,
+ int min_mic_level,
+ int max_mic_level,
+ const ClippingPredictor& predictor) {
+ for (int i = 0; i < num_channels; ++i) {
+ SCOPED_TRACE(i);
+ EXPECT_EQ(predictor.EstimateClippedLevelStep(i, level, default_step,
+ min_mic_level, max_mic_level),
+ absl::nullopt);
+ }
+}
+
+// Creates and analyzes an audio frame with a zero crest factor.
+void AnalyzeZeroCrestFactorAudio(int num_calls,
+ int num_channels,
+ float peak_ratio,
+ ClippingPredictor& predictor) {
+ RTC_DCHECK_GT(num_calls, 0);
+ RTC_DCHECK_GT(num_channels, 0);
+ RTC_DCHECK_LE(peak_ratio, 1.f);
+ std::vector<float*> audio(num_channels);
+ std::vector<float> audio_data(num_channels * kSamplesPerChannel, 0.f);
+ for (int channel = 0; channel < num_channels; ++channel) {
+ audio[channel] = &audio_data[channel * kSamplesPerChannel];
+ for (int sample = 0; sample < kSamplesPerChannel; ++sample) {
+ audio[channel][sample] = peak_ratio * kMaxSampleS16;
+ }
+ }
+ auto frame = AudioFrameView<const float>(audio.data(), num_channels,
+ kSamplesPerChannel);
+ CallAnalyze(num_calls, frame, predictor);
+}
+
+TEST(ClippingPeakPredictorTest, NoPredictorCreated) {
+ auto predictor =
+ CreateClippingPredictor(kNumChannels, /*config=*/{/*enabled=*/false});
+ EXPECT_FALSE(predictor);
+}
+
+TEST(ClippingPeakPredictorTest, ClippingEventPredictionCreated) {
+ // TODO(bugs.webrtc.org/12874): Use designated initializers one fixed.
+ auto predictor = CreateClippingPredictor(
+ kNumChannels,
+ /*config=*/{/*enabled=*/true,
+ /*mode=*/ClippingPredictorMode::kClippingEventPrediction});
+ EXPECT_TRUE(predictor);
+}
+
+TEST(ClippingPeakPredictorTest, AdaptiveStepClippingPeakPredictionCreated) {
+ // TODO(bugs.webrtc.org/12874): Use designated initializers one fixed.
+ auto predictor = CreateClippingPredictor(
+ kNumChannels, /*config=*/{
+ /*enabled=*/true,
+ /*mode=*/ClippingPredictorMode::kAdaptiveStepClippingPeakPrediction});
+ EXPECT_TRUE(predictor);
+}
+
+TEST(ClippingPeakPredictorTest, FixedStepClippingPeakPredictionCreated) {
+ // TODO(bugs.webrtc.org/12874): Use designated initializers one fixed.
+ auto predictor = CreateClippingPredictor(
+ kNumChannels, /*config=*/{
+ /*enabled=*/true,
+ /*mode=*/ClippingPredictorMode::kFixedStepClippingPeakPrediction});
+ EXPECT_TRUE(predictor);
+}
+
+class ClippingPredictorParameterization
+ : public ::testing::TestWithParam<std::tuple<int, int, int, int>> {
+ protected:
+ int num_channels() const { return std::get<0>(GetParam()); }
+ ClippingPredictorConfig GetConfig(ClippingPredictorMode mode) const {
+ // TODO(bugs.webrtc.org/12874): Use designated initializers one fixed.
+ return {/*enabled=*/true,
+ /*mode=*/mode,
+ /*window_length=*/std::get<1>(GetParam()),
+ /*reference_window_length=*/std::get<2>(GetParam()),
+ /*reference_window_delay=*/std::get<3>(GetParam()),
+ /*clipping_threshold=*/-1.0f,
+ /*crest_factor_margin=*/0.5f};
+ }
+};
+
+TEST_P(ClippingPredictorParameterization,
+ CheckClippingEventPredictorEstimateAfterCrestFactorDrop) {
+ const ClippingPredictorConfig config =
+ GetConfig(ClippingPredictorMode::kClippingEventPrediction);
+ if (config.reference_window_length + config.reference_window_delay <=
+ config.window_length) {
+ return;
+ }
+ auto predictor = CreateClippingPredictor(num_channels(), config);
+ AnalyzeNonZeroCrestFactorAudio(
+ /*num_calls=*/config.reference_window_length +
+ config.reference_window_delay - config.window_length,
+ num_channels(), /*peak_ratio=*/0.99f, *predictor);
+ CheckChannelEstimatesWithoutValue(num_channels(), /*level=*/255,
+ kDefaultClippedLevelStep, kMinMicLevel,
+ kMaxMicLevel, *predictor);
+ AnalyzeZeroCrestFactorAudio(config.window_length, num_channels(),
+ /*peak_ratio=*/0.99f, *predictor);
+ CheckChannelEstimatesWithValue(
+ num_channels(), /*level=*/255, kDefaultClippedLevelStep, kMinMicLevel,
+ kMaxMicLevel, *predictor, kDefaultClippedLevelStep);
+}
+
+TEST_P(ClippingPredictorParameterization,
+ CheckClippingEventPredictorNoEstimateAfterConstantCrestFactor) {
+ const ClippingPredictorConfig config =
+ GetConfig(ClippingPredictorMode::kClippingEventPrediction);
+ if (config.reference_window_length + config.reference_window_delay <=
+ config.window_length) {
+ return;
+ }
+ auto predictor = CreateClippingPredictor(num_channels(), config);
+ AnalyzeNonZeroCrestFactorAudio(
+ /*num_calls=*/config.reference_window_length +
+ config.reference_window_delay - config.window_length,
+ num_channels(), /*peak_ratio=*/0.99f, *predictor);
+ CheckChannelEstimatesWithoutValue(num_channels(), /*level=*/255,
+ kDefaultClippedLevelStep, kMinMicLevel,
+ kMaxMicLevel, *predictor);
+ AnalyzeNonZeroCrestFactorAudio(/*num_calls=*/config.window_length,
+ num_channels(),
+ /*peak_ratio=*/0.99f, *predictor);
+ CheckChannelEstimatesWithoutValue(num_channels(), /*level=*/255,
+ kDefaultClippedLevelStep, kMinMicLevel,
+ kMaxMicLevel, *predictor);
+}
+
+TEST_P(ClippingPredictorParameterization,
+ CheckClippingPeakPredictorEstimateAfterHighCrestFactor) {
+ const ClippingPredictorConfig config =
+ GetConfig(ClippingPredictorMode::kAdaptiveStepClippingPeakPrediction);
+ if (config.reference_window_length + config.reference_window_delay <=
+ config.window_length) {
+ return;
+ }
+ auto predictor = CreateClippingPredictor(num_channels(), config);
+ AnalyzeNonZeroCrestFactorAudio(
+ /*num_calls=*/config.reference_window_length +
+ config.reference_window_delay - config.window_length,
+ num_channels(), /*peak_ratio=*/0.99f, *predictor);
+ CheckChannelEstimatesWithoutValue(num_channels(), /*level=*/255,
+ kDefaultClippedLevelStep, kMinMicLevel,
+ kMaxMicLevel, *predictor);
+ AnalyzeNonZeroCrestFactorAudio(/*num_calls=*/config.window_length,
+ num_channels(),
+ /*peak_ratio=*/0.99f, *predictor);
+ CheckChannelEstimatesWithValue(
+ num_channels(), /*level=*/255, kDefaultClippedLevelStep, kMinMicLevel,
+ kMaxMicLevel, *predictor, kDefaultClippedLevelStep);
+}
+
+TEST_P(ClippingPredictorParameterization,
+ CheckClippingPeakPredictorNoEstimateAfterLowCrestFactor) {
+ const ClippingPredictorConfig config =
+ GetConfig(ClippingPredictorMode::kAdaptiveStepClippingPeakPrediction);
+ if (config.reference_window_length + config.reference_window_delay <=
+ config.window_length) {
+ return;
+ }
+ auto predictor = CreateClippingPredictor(num_channels(), config);
+ AnalyzeZeroCrestFactorAudio(
+ /*num_calls=*/config.reference_window_length +
+ config.reference_window_delay - config.window_length,
+ num_channels(), /*peak_ratio=*/0.99f, *predictor);
+ CheckChannelEstimatesWithoutValue(num_channels(), /*level=*/255,
+ kDefaultClippedLevelStep, kMinMicLevel,
+ kMaxMicLevel, *predictor);
+ AnalyzeNonZeroCrestFactorAudio(/*num_calls=*/config.window_length,
+ num_channels(),
+ /*peak_ratio=*/0.99f, *predictor);
+ CheckChannelEstimatesWithoutValue(num_channels(), /*level=*/255,
+ kDefaultClippedLevelStep, kMinMicLevel,
+ kMaxMicLevel, *predictor);
+}
+
+INSTANTIATE_TEST_SUITE_P(GainController1ClippingPredictor,
+ ClippingPredictorParameterization,
+ ::testing::Combine(::testing::Values(1, 5),
+ ::testing::Values(1, 5, 10),
+ ::testing::Values(1, 5),
+ ::testing::Values(0, 1, 5)));
+
+class ClippingEventPredictorParameterization
+ : public ::testing::TestWithParam<std::tuple<float, float>> {
+ protected:
+ ClippingPredictorConfig GetConfig() const {
+ // TODO(bugs.webrtc.org/12874): Use designated initializers one fixed.
+ return {/*enabled=*/true,
+ /*mode=*/ClippingPredictorMode::kClippingEventPrediction,
+ /*window_length=*/5,
+ /*reference_window_length=*/5,
+ /*reference_window_delay=*/5,
+ /*clipping_threshold=*/std::get<0>(GetParam()),
+ /*crest_factor_margin=*/std::get<1>(GetParam())};
+ }
+};
+
+TEST_P(ClippingEventPredictorParameterization,
+ CheckEstimateAfterCrestFactorDrop) {
+ const ClippingPredictorConfig config = GetConfig();
+ auto predictor = CreateClippingPredictor(kNumChannels, config);
+ AnalyzeNonZeroCrestFactorAudio(/*num_calls=*/config.reference_window_length,
+ kNumChannels, /*peak_ratio=*/0.99f,
+ *predictor);
+ CheckChannelEstimatesWithoutValue(kNumChannels, /*level=*/255,
+ kDefaultClippedLevelStep, kMinMicLevel,
+ kMaxMicLevel, *predictor);
+ AnalyzeZeroCrestFactorAudio(config.window_length, kNumChannels,
+ /*peak_ratio=*/0.99f, *predictor);
+ // TODO(bugs.webrtc.org/12774): Add clarifying comment.
+ // TODO(bugs.webrtc.org/12774): Remove 4.15f threshold and split tests.
+ if (config.clipping_threshold < kClippingThresholdDb &&
+ config.crest_factor_margin < 4.15f) {
+ CheckChannelEstimatesWithValue(
+ kNumChannels, /*level=*/255, kDefaultClippedLevelStep, kMinMicLevel,
+ kMaxMicLevel, *predictor, kDefaultClippedLevelStep);
+ } else {
+ CheckChannelEstimatesWithoutValue(kNumChannels, /*level=*/255,
+ kDefaultClippedLevelStep, kMinMicLevel,
+ kMaxMicLevel, *predictor);
+ }
+}
+
+INSTANTIATE_TEST_SUITE_P(GainController1ClippingPredictor,
+ ClippingEventPredictorParameterization,
+ ::testing::Combine(::testing::Values(-1.0f, 0.0f),
+ ::testing::Values(3.0f, 4.16f)));
+
+class ClippingPredictorModeParameterization
+ : public ::testing::TestWithParam<ClippingPredictorMode> {
+ protected:
+ ClippingPredictorConfig GetConfig(float clipping_threshold_dbfs) const {
+ // TODO(bugs.webrtc.org/12874): Use designated initializers one fixed.
+ return {/*enabled=*/true,
+ /*mode=*/GetParam(),
+ /*window_length=*/5,
+ /*reference_window_length=*/5,
+ /*reference_window_delay=*/5,
+ /*clipping_threshold=*/clipping_threshold_dbfs,
+ /*crest_factor_margin=*/3.0f};
+ }
+};
+
+TEST_P(ClippingPredictorModeParameterization,
+ CheckEstimateAfterHighCrestFactorWithNoClippingMargin) {
+ const ClippingPredictorConfig config = GetConfig(
+ /*clipping_threshold_dbfs=*/0.0f);
+ auto predictor = CreateClippingPredictor(kNumChannels, config);
+ AnalyzeNonZeroCrestFactorAudio(/*num_calls=*/config.reference_window_length,
+ kNumChannels, /*peak_ratio=*/0.99f,
+ *predictor);
+ CheckChannelEstimatesWithoutValue(kNumChannels, /*level=*/255,
+ kDefaultClippedLevelStep, kMinMicLevel,
+ kMaxMicLevel, *predictor);
+ AnalyzeZeroCrestFactorAudio(config.window_length, kNumChannels,
+ /*peak_ratio=*/0.99f, *predictor);
+ // Since the clipping threshold is set to 0 dBFS, `EstimateClippedLevelStep()`
+ // is expected to return an unavailable value.
+ CheckChannelEstimatesWithoutValue(kNumChannels, /*level=*/255,
+ kDefaultClippedLevelStep, kMinMicLevel,
+ kMaxMicLevel, *predictor);
+}
+
+TEST_P(ClippingPredictorModeParameterization,
+ CheckEstimateAfterHighCrestFactorWithClippingMargin) {
+ const ClippingPredictorConfig config =
+ GetConfig(/*clipping_threshold_dbfs=*/-1.0f);
+ auto predictor = CreateClippingPredictor(kNumChannels, config);
+ AnalyzeNonZeroCrestFactorAudio(/*num_calls=*/config.reference_window_length,
+ kNumChannels,
+ /*peak_ratio=*/0.99f, *predictor);
+ CheckChannelEstimatesWithoutValue(kNumChannels, /*level=*/255,
+ kDefaultClippedLevelStep, kMinMicLevel,
+ kMaxMicLevel, *predictor);
+ AnalyzeZeroCrestFactorAudio(config.window_length, kNumChannels,
+ /*peak_ratio=*/0.99f, *predictor);
+ // TODO(bugs.webrtc.org/12774): Add clarifying comment.
+ const float expected_step =
+ config.mode == ClippingPredictorMode::kAdaptiveStepClippingPeakPrediction
+ ? 17
+ : kDefaultClippedLevelStep;
+ CheckChannelEstimatesWithValue(kNumChannels, /*level=*/255,
+ kDefaultClippedLevelStep, kMinMicLevel,
+ kMaxMicLevel, *predictor, expected_step);
+}
+
+INSTANTIATE_TEST_SUITE_P(
+ GainController1ClippingPredictor,
+ ClippingPredictorModeParameterization,
+ ::testing::Values(
+ ClippingPredictorMode::kAdaptiveStepClippingPeakPrediction,
+ ClippingPredictorMode::kFixedStepClippingPeakPrediction));
+
+TEST(ClippingEventPredictorTest, CheckEstimateAfterReset) {
+ // TODO(bugs.webrtc.org/12874): Use designated initializers one fixed.
+ constexpr ClippingPredictorConfig kConfig{
+ /*enabled=*/true,
+ /*mode=*/ClippingPredictorMode::kClippingEventPrediction,
+ /*window_length=*/5,
+ /*reference_window_length=*/5,
+ /*reference_window_delay=*/5,
+ /*clipping_threshold=*/-1.0f,
+ /*crest_factor_margin=*/3.0f};
+ auto predictor = CreateClippingPredictor(kNumChannels, kConfig);
+ AnalyzeNonZeroCrestFactorAudio(/*num_calls=*/kConfig.reference_window_length,
+ kNumChannels,
+ /*peak_ratio=*/0.99f, *predictor);
+ CheckChannelEstimatesWithoutValue(kNumChannels, /*level=*/255,
+ kDefaultClippedLevelStep, kMinMicLevel,
+ kMaxMicLevel, *predictor);
+ predictor->Reset();
+ AnalyzeZeroCrestFactorAudio(kConfig.window_length, kNumChannels,
+ /*peak_ratio=*/0.99f, *predictor);
+ CheckChannelEstimatesWithoutValue(kNumChannels, /*level=*/255,
+ kDefaultClippedLevelStep, kMinMicLevel,
+ kMaxMicLevel, *predictor);
+}
+
+TEST(ClippingPeakPredictorTest, CheckNoEstimateAfterReset) {
+ // TODO(bugs.webrtc.org/12874): Use designated initializers one fixed.
+ constexpr ClippingPredictorConfig kConfig{
+ /*enabled=*/true,
+ /*mode=*/ClippingPredictorMode::kAdaptiveStepClippingPeakPrediction,
+ /*window_length=*/5,
+ /*reference_window_length=*/5,
+ /*reference_window_delay=*/5,
+ /*clipping_threshold=*/-1.0f};
+ auto predictor = CreateClippingPredictor(kNumChannels, kConfig);
+ AnalyzeNonZeroCrestFactorAudio(/*num_calls=*/kConfig.reference_window_length,
+ kNumChannels,
+ /*peak_ratio=*/0.99f, *predictor);
+ CheckChannelEstimatesWithoutValue(kNumChannels, /*level=*/255,
+ kDefaultClippedLevelStep, kMinMicLevel,
+ kMaxMicLevel, *predictor);
+ predictor->Reset();
+ AnalyzeZeroCrestFactorAudio(kConfig.window_length, kNumChannels,
+ /*peak_ratio=*/0.99f, *predictor);
+ CheckChannelEstimatesWithoutValue(kNumChannels, /*level=*/255,
+ kDefaultClippedLevelStep, kMinMicLevel,
+ kMaxMicLevel, *predictor);
+}
+
+TEST(ClippingPeakPredictorTest, CheckAdaptiveStepEstimate) {
+ // TODO(bugs.webrtc.org/12874): Use designated initializers one fixed.
+ constexpr ClippingPredictorConfig kConfig{
+ /*enabled=*/true,
+ /*mode=*/ClippingPredictorMode::kAdaptiveStepClippingPeakPrediction,
+ /*window_length=*/5,
+ /*reference_window_length=*/5,
+ /*reference_window_delay=*/5,
+ /*clipping_threshold=*/-1.0f};
+ auto predictor = CreateClippingPredictor(kNumChannels, kConfig);
+ AnalyzeNonZeroCrestFactorAudio(/*num_calls=*/kConfig.reference_window_length,
+ kNumChannels, /*peak_ratio=*/0.99f,
+ *predictor);
+ CheckChannelEstimatesWithoutValue(kNumChannels, /*level=*/255,
+ kDefaultClippedLevelStep, kMinMicLevel,
+ kMaxMicLevel, *predictor);
+ AnalyzeZeroCrestFactorAudio(kConfig.window_length, kNumChannels,
+ /*peak_ratio=*/0.99f, *predictor);
+ CheckChannelEstimatesWithValue(kNumChannels, /*level=*/255,
+ kDefaultClippedLevelStep, kMinMicLevel,
+ kMaxMicLevel, *predictor, /*expected=*/17);
+}
+
+TEST(ClippingPeakPredictorTest, CheckFixedStepEstimate) {
+ // TODO(bugs.webrtc.org/12874): Use designated initializers one fixed.
+ constexpr ClippingPredictorConfig kConfig{
+ /*enabled=*/true,
+ /*mode=*/ClippingPredictorMode::kFixedStepClippingPeakPrediction,
+ /*window_length=*/5,
+ /*reference_window_length=*/5,
+ /*reference_window_delay=*/5,
+ /*clipping_threshold=*/-1.0f};
+ auto predictor = CreateClippingPredictor(kNumChannels, kConfig);
+ AnalyzeNonZeroCrestFactorAudio(/*num_calls=*/kConfig.reference_window_length,
+ kNumChannels, /*peak_ratio=*/0.99f,
+ *predictor);
+ CheckChannelEstimatesWithoutValue(kNumChannels, /*level=*/255,
+ kDefaultClippedLevelStep, kMinMicLevel,
+ kMaxMicLevel, *predictor);
+ AnalyzeZeroCrestFactorAudio(kConfig.window_length, kNumChannels,
+ /*peak_ratio=*/0.99f, *predictor);
+ CheckChannelEstimatesWithValue(
+ kNumChannels, /*level=*/255, kDefaultClippedLevelStep, kMinMicLevel,
+ kMaxMicLevel, *predictor, kDefaultClippedLevelStep);
+}
+
+} // namespace
+} // namespace webrtc
diff --git a/modules/audio_processing/agc2/adaptive_agc.cc b/modules/audio_processing/agc2/adaptive_agc.cc
index 8bf192e77f..3fc9008db1 100644
--- a/modules/audio_processing/agc2/adaptive_agc.cc
+++ b/modules/audio_processing/agc2/adaptive_agc.cc
@@ -25,10 +25,6 @@ using AdaptiveDigitalConfig =
using NoiseEstimatorType =
AudioProcessing::Config::GainController2::NoiseEstimator;
-constexpr int kGainApplierAdjacentSpeechFramesThreshold = 1;
-constexpr float kMaxGainChangePerSecondDb = 3.0f;
-constexpr float kMaxOutputNoiseLevelDbfs = -50.0f;
-
// Detects the available CPU features and applies any kill-switches.
AvailableCpuFeatures GetAllowedCpuFeatures(
const AdaptiveDigitalConfig& config) {
@@ -56,29 +52,8 @@ std::unique_ptr<NoiseLevelEstimator> CreateNoiseLevelEstimator(
}
}
-constexpr NoiseEstimatorType kDefaultNoiseLevelEstimatorType =
- NoiseEstimatorType::kNoiseFloor;
-
} // namespace
-AdaptiveAgc::AdaptiveAgc(ApmDataDumper* apm_data_dumper)
- : speech_level_estimator_(apm_data_dumper),
- gain_controller_(apm_data_dumper,
- kGainApplierAdjacentSpeechFramesThreshold,
- kMaxGainChangePerSecondDb,
- kMaxOutputNoiseLevelDbfs),
- apm_data_dumper_(apm_data_dumper),
- noise_level_estimator_(
- CreateNoiseLevelEstimator(kDefaultNoiseLevelEstimatorType,
- apm_data_dumper)),
- saturation_protector_(
- CreateSaturationProtector(kSaturationProtectorInitialHeadroomDb,
- kSaturationProtectorExtraHeadroomDb,
- kGainApplierAdjacentSpeechFramesThreshold,
- apm_data_dumper)) {
- RTC_DCHECK(apm_data_dumper);
-}
-
AdaptiveAgc::AdaptiveAgc(ApmDataDumper* apm_data_dumper,
const AdaptiveDigitalConfig& config)
: speech_level_estimator_(apm_data_dumper,
@@ -87,7 +62,8 @@ AdaptiveAgc::AdaptiveAgc(ApmDataDumper* apm_data_dumper,
gain_controller_(apm_data_dumper,
config.adjacent_speech_frames_threshold,
config.max_gain_change_db_per_second,
- config.max_output_noise_level_dbfs),
+ config.max_output_noise_level_dbfs,
+ config.dry_run),
apm_data_dumper_(apm_data_dumper),
noise_level_estimator_(
CreateNoiseLevelEstimator(config.noise_estimator, apm_data_dumper)),
@@ -106,6 +82,10 @@ AdaptiveAgc::AdaptiveAgc(ApmDataDumper* apm_data_dumper,
AdaptiveAgc::~AdaptiveAgc() = default;
+void AdaptiveAgc::Initialize(int sample_rate_hz, int num_channels) {
+ gain_controller_.Initialize(sample_rate_hz, num_channels);
+}
+
void AdaptiveAgc::Process(AudioFrameView<float> frame, float limiter_envelope) {
AdaptiveDigitalGainApplier::FrameInfo info;
diff --git a/modules/audio_processing/agc2/adaptive_agc.h b/modules/audio_processing/agc2/adaptive_agc.h
index fe814446ff..43c7787e36 100644
--- a/modules/audio_processing/agc2/adaptive_agc.h
+++ b/modules/audio_processing/agc2/adaptive_agc.h
@@ -25,19 +25,21 @@ namespace webrtc {
class ApmDataDumper;
// Adaptive digital gain controller.
-// TODO(crbug.com/webrtc/7494): Unify with `AdaptiveDigitalGainApplier`.
+// TODO(crbug.com/webrtc/7494): Rename to `AdaptiveDigitalGainController`.
class AdaptiveAgc {
public:
- explicit AdaptiveAgc(ApmDataDumper* apm_data_dumper);
- // TODO(crbug.com/webrtc/7494): Remove ctor above.
AdaptiveAgc(
ApmDataDumper* apm_data_dumper,
const AudioProcessing::Config::GainController2::AdaptiveDigital& config);
~AdaptiveAgc();
+ void Initialize(int sample_rate_hz, int num_channels);
+
+ // TODO(crbug.com/webrtc/7494): Add `SetLimiterEnvelope()`.
+
// Analyzes `frame` and applies a digital adaptive gain to it. Takes into
// account the envelope measured by the limiter.
- // TODO(crbug.com/webrtc/7494): Make the class depend on the limiter.
+ // TODO(crbug.com/webrtc/7494): Remove `limiter_envelope`.
void Process(AudioFrameView<float> frame, float limiter_envelope);
// Handles a gain change applied to the input signal (e.g., analog gain).
diff --git a/modules/audio_processing/agc2/adaptive_digital_gain_applier.cc b/modules/audio_processing/agc2/adaptive_digital_gain_applier.cc
index 8a8a7fdc9b..e59b110efe 100644
--- a/modules/audio_processing/agc2/adaptive_digital_gain_applier.cc
+++ b/modules/audio_processing/agc2/adaptive_digital_gain_applier.cc
@@ -92,13 +92,28 @@ float ComputeGainChangeThisFrameDb(float target_gain_db,
max_gain_increase_db);
}
+// Copies the (multichannel) audio samples from `src` into `dst`.
+void CopyAudio(AudioFrameView<const float> src,
+ std::vector<std::vector<float>>& dst) {
+ RTC_DCHECK_GT(src.num_channels(), 0);
+ RTC_DCHECK_GT(src.samples_per_channel(), 0);
+ RTC_DCHECK_EQ(dst.size(), src.num_channels());
+ for (size_t c = 0; c < src.num_channels(); ++c) {
+ rtc::ArrayView<const float> channel_view = src.channel(c);
+ RTC_DCHECK_EQ(channel_view.size(), src.samples_per_channel());
+ RTC_DCHECK_EQ(dst[c].size(), src.samples_per_channel());
+ std::copy(channel_view.begin(), channel_view.end(), dst[c].begin());
+ }
+}
+
} // namespace
AdaptiveDigitalGainApplier::AdaptiveDigitalGainApplier(
ApmDataDumper* apm_data_dumper,
int adjacent_speech_frames_threshold,
float max_gain_change_db_per_second,
- float max_output_noise_level_dbfs)
+ float max_output_noise_level_dbfs,
+ bool dry_run)
: apm_data_dumper_(apm_data_dumper),
gain_applier_(
/*hard_clip_samples=*/false,
@@ -107,13 +122,39 @@ AdaptiveDigitalGainApplier::AdaptiveDigitalGainApplier(
max_gain_change_db_per_10ms_(max_gain_change_db_per_second *
kFrameDurationMs / 1000.f),
max_output_noise_level_dbfs_(max_output_noise_level_dbfs),
+ dry_run_(dry_run),
calls_since_last_gain_log_(0),
frames_to_gain_increase_allowed_(adjacent_speech_frames_threshold_),
last_gain_db_(kInitialAdaptiveDigitalGainDb) {
- RTC_DCHECK_GT(max_gain_change_db_per_second, 0.f);
+ RTC_DCHECK_GT(max_gain_change_db_per_second, 0.0f);
RTC_DCHECK_GE(frames_to_gain_increase_allowed_, 1);
- RTC_DCHECK_GE(max_output_noise_level_dbfs_, -90.f);
- RTC_DCHECK_LE(max_output_noise_level_dbfs_, 0.f);
+ RTC_DCHECK_GE(max_output_noise_level_dbfs_, -90.0f);
+ RTC_DCHECK_LE(max_output_noise_level_dbfs_, 0.0f);
+ Initialize(/*sample_rate_hz=*/48000, /*num_channels=*/1);
+}
+
+void AdaptiveDigitalGainApplier::Initialize(int sample_rate_hz,
+ int num_channels) {
+ if (!dry_run_) {
+ return;
+ }
+ RTC_DCHECK_GT(sample_rate_hz, 0);
+ RTC_DCHECK_GT(num_channels, 0);
+ int frame_size = rtc::CheckedDivExact(sample_rate_hz, 100);
+ bool sample_rate_changed =
+ dry_run_frame_.empty() || // Handle initialization.
+ dry_run_frame_[0].size() != static_cast<size_t>(frame_size);
+ bool num_channels_changed =
+ dry_run_channels_.size() != static_cast<size_t>(num_channels);
+ if (sample_rate_changed || num_channels_changed) {
+ // Resize the multichannel audio vector and update the channel pointers.
+ dry_run_frame_.resize(num_channels);
+ dry_run_channels_.resize(num_channels);
+ for (int c = 0; c < num_channels; ++c) {
+ dry_run_frame_[c].resize(frame_size);
+ dry_run_channels_[c] = dry_run_frame_[c].data();
+ }
+ }
}
void AdaptiveDigitalGainApplier::Process(const FrameInfo& info,
@@ -174,7 +215,19 @@ void AdaptiveDigitalGainApplier::Process(const FrameInfo& info,
gain_applier_.SetGainFactor(
DbToRatio(last_gain_db_ + gain_change_this_frame_db));
}
- gain_applier_.ApplyGain(frame);
+
+ // Modify `frame` only if not running in "dry run" mode.
+ if (!dry_run_) {
+ gain_applier_.ApplyGain(frame);
+ } else {
+ // Copy `frame` so that `ApplyGain()` is called (on a copy).
+ CopyAudio(frame, dry_run_frame_);
+ RTC_DCHECK(!dry_run_channels_.empty());
+ AudioFrameView<float> frame_copy(&dry_run_channels_[0],
+ frame.num_channels(),
+ frame.samples_per_channel());
+ gain_applier_.ApplyGain(frame_copy);
+ }
// Remember that the gain has changed for the next iteration.
last_gain_db_ = last_gain_db_ + gain_change_this_frame_db;
diff --git a/modules/audio_processing/agc2/adaptive_digital_gain_applier.h b/modules/audio_processing/agc2/adaptive_digital_gain_applier.h
index 74220fa861..8b58ea00b2 100644
--- a/modules/audio_processing/agc2/adaptive_digital_gain_applier.h
+++ b/modules/audio_processing/agc2/adaptive_digital_gain_applier.h
@@ -11,6 +11,8 @@
#ifndef MODULES_AUDIO_PROCESSING_AGC2_ADAPTIVE_DIGITAL_GAIN_APPLIER_H_
#define MODULES_AUDIO_PROCESSING_AGC2_ADAPTIVE_DIGITAL_GAIN_APPLIER_H_
+#include <vector>
+
#include "modules/audio_processing/agc2/gain_applier.h"
#include "modules/audio_processing/include/audio_frame_view.h"
@@ -37,15 +39,18 @@ class AdaptiveDigitalGainApplier {
// frames must be observed in order to consider the sequence as speech.
// `max_gain_change_db_per_second` limits the adaptation speed (uniformly
// operated across frames). `max_output_noise_level_dbfs` limits the output
- // noise level.
+ // noise level. If `dry_run` is true, `Process()` will not modify the audio.
AdaptiveDigitalGainApplier(ApmDataDumper* apm_data_dumper,
int adjacent_speech_frames_threshold,
float max_gain_change_db_per_second,
- float max_output_noise_level_dbfs);
+ float max_output_noise_level_dbfs,
+ bool dry_run);
AdaptiveDigitalGainApplier(const AdaptiveDigitalGainApplier&) = delete;
AdaptiveDigitalGainApplier& operator=(const AdaptiveDigitalGainApplier&) =
delete;
+ void Initialize(int sample_rate_hz, int num_channels);
+
// Analyzes `info`, updates the digital gain and applies it to a 10 ms
// `frame`. Supports any sample rate supported by APM.
void Process(const FrameInfo& info, AudioFrameView<float> frame);
@@ -57,10 +62,14 @@ class AdaptiveDigitalGainApplier {
const int adjacent_speech_frames_threshold_;
const float max_gain_change_db_per_10ms_;
const float max_output_noise_level_dbfs_;
+ const bool dry_run_;
int calls_since_last_gain_log_;
int frames_to_gain_increase_allowed_;
float last_gain_db_;
+
+ std::vector<std::vector<float>> dry_run_frame_;
+ std::vector<float*> dry_run_channels_;
};
} // namespace webrtc
diff --git a/modules/audio_processing/agc2/adaptive_digital_gain_applier_unittest.cc b/modules/audio_processing/agc2/adaptive_digital_gain_applier_unittest.cc
index ee9cb02ed6..f4a23a92b9 100644
--- a/modules/audio_processing/agc2/adaptive_digital_gain_applier_unittest.cc
+++ b/modules/audio_processing/agc2/adaptive_digital_gain_applier_unittest.cc
@@ -48,7 +48,8 @@ struct GainApplierHelper {
&apm_data_dumper,
adjacent_speech_frames_threshold,
kMaxGainChangePerSecondDb,
- kMaxOutputNoiseLevelDbfs)) {}
+ kMaxOutputNoiseLevelDbfs,
+ /*dry_run=*/false)) {}
ApmDataDumper apm_data_dumper;
std::unique_ptr<AdaptiveDigitalGainApplier> gain_applier;
};
@@ -67,6 +68,7 @@ constexpr AdaptiveDigitalGainApplier::FrameInfo kFrameInfo{
TEST(GainController2AdaptiveGainApplier, GainApplierShouldNotCrash) {
GainApplierHelper helper;
+ helper.gain_applier->Initialize(/*sample_rate_hz=*/48000, kStereo);
// Make one call with reasonable audio level values and settings.
VectorFloatFrame fake_audio(kStereo, kFrameLen10ms48kHz, 10000.0f);
AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
@@ -80,6 +82,7 @@ TEST(GainController2AdaptiveGainApplier, MaxGainApplied) {
static_cast<int>(kMaxGainDb / kMaxGainChangePerFrameDb) + 10;
GainApplierHelper helper;
+ helper.gain_applier->Initialize(/*sample_rate_hz=*/8000, kMono);
AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
info.speech_level_dbfs = -60.0f;
float applied_gain;
@@ -94,6 +97,7 @@ TEST(GainController2AdaptiveGainApplier, MaxGainApplied) {
TEST(GainController2AdaptiveGainApplier, GainDoesNotChangeFast) {
GainApplierHelper helper;
+ helper.gain_applier->Initialize(/*sample_rate_hz=*/8000, kMono);
constexpr float initial_level_dbfs = -25.0f;
// A few extra frames for safety.
@@ -131,6 +135,7 @@ TEST(GainController2AdaptiveGainApplier, GainDoesNotChangeFast) {
TEST(GainController2AdaptiveGainApplier, GainIsRampedInAFrame) {
GainApplierHelper helper;
+ helper.gain_applier->Initialize(/*sample_rate_hz=*/48000, kMono);
constexpr float initial_level_dbfs = -25.0f;
@@ -155,6 +160,7 @@ TEST(GainController2AdaptiveGainApplier, GainIsRampedInAFrame) {
TEST(GainController2AdaptiveGainApplier, NoiseLimitsGain) {
GainApplierHelper helper;
+ helper.gain_applier->Initialize(/*sample_rate_hz=*/48000, kMono);
constexpr float initial_level_dbfs = -25.0f;
constexpr int num_initial_frames =
@@ -184,6 +190,7 @@ TEST(GainController2AdaptiveGainApplier, NoiseLimitsGain) {
TEST(GainController2GainApplier, CanHandlePositiveSpeechLevels) {
GainApplierHelper helper;
+ helper.gain_applier->Initialize(/*sample_rate_hz=*/48000, kStereo);
// Make one call with positive audio level values and settings.
VectorFloatFrame fake_audio(kStereo, kFrameLen10ms48kHz, 10000.0f);
@@ -194,6 +201,7 @@ TEST(GainController2GainApplier, CanHandlePositiveSpeechLevels) {
TEST(GainController2GainApplier, AudioLevelLimitsGain) {
GainApplierHelper helper;
+ helper.gain_applier->Initialize(/*sample_rate_hz=*/48000, kMono);
constexpr float initial_level_dbfs = -25.0f;
constexpr int num_initial_frames =
@@ -231,6 +239,7 @@ TEST_P(AdaptiveDigitalGainApplierTest,
DoNotIncreaseGainWithTooFewSpeechFrames) {
const int adjacent_speech_frames_threshold = AdjacentSpeechFramesThreshold();
GainApplierHelper helper(adjacent_speech_frames_threshold);
+ helper.gain_applier->Initialize(/*sample_rate_hz=*/48000, kMono);
float prev_gain = 0.0f;
for (int i = 0; i < adjacent_speech_frames_threshold; ++i) {
@@ -248,6 +257,7 @@ TEST_P(AdaptiveDigitalGainApplierTest,
TEST_P(AdaptiveDigitalGainApplierTest, IncreaseGainWithEnoughSpeechFrames) {
const int adjacent_speech_frames_threshold = AdjacentSpeechFramesThreshold();
GainApplierHelper helper(adjacent_speech_frames_threshold);
+ helper.gain_applier->Initialize(/*sample_rate_hz=*/48000, kMono);
float prev_gain = 0.0f;
for (int i = 0; i < adjacent_speech_frames_threshold; ++i) {
@@ -269,5 +279,68 @@ INSTANTIATE_TEST_SUITE_P(GainController2,
AdaptiveDigitalGainApplierTest,
::testing::Values(1, 7, 31));
+// Checks that the input is never modified when running in dry run mode.
+TEST(GainController2GainApplier, DryRunDoesNotChangeInput) {
+ ApmDataDumper apm_data_dumper(0);
+ AdaptiveDigitalGainApplier gain_applier(
+ &apm_data_dumper, /*adjacent_speech_frames_threshold=*/1,
+ kMaxGainChangePerSecondDb, kMaxOutputNoiseLevelDbfs, /*dry_run=*/true);
+ // Simulate an input signal with log speech level.
+ AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
+ info.speech_level_dbfs = -60.0f;
+ // Allow enough time to reach the maximum gain.
+ constexpr int kNumFramesToAdapt =
+ static_cast<int>(kMaxGainDb / kMaxGainChangePerFrameDb) + 10;
+ constexpr float kPcmSamples = 123.456f;
+ // Run the gain applier and check that the PCM samples are not modified.
+ gain_applier.Initialize(/*sample_rate_hz=*/8000, kMono);
+ for (int i = 0; i < kNumFramesToAdapt; ++i) {
+ SCOPED_TRACE(i);
+ VectorFloatFrame fake_audio(kMono, kFrameLen10ms8kHz, kPcmSamples);
+ gain_applier.Process(info, fake_audio.float_frame_view());
+ EXPECT_FLOAT_EQ(fake_audio.float_frame_view().channel(0)[0], kPcmSamples);
+ }
+}
+
+// Checks that no sample is modified before and after the sample rate changes.
+TEST(GainController2GainApplier, DryRunHandlesSampleRateChange) {
+ ApmDataDumper apm_data_dumper(0);
+ AdaptiveDigitalGainApplier gain_applier(
+ &apm_data_dumper, /*adjacent_speech_frames_threshold=*/1,
+ kMaxGainChangePerSecondDb, kMaxOutputNoiseLevelDbfs, /*dry_run=*/true);
+ AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
+ info.speech_level_dbfs = -60.0f;
+ constexpr float kPcmSamples = 123.456f;
+ VectorFloatFrame fake_audio_8k(kMono, kFrameLen10ms8kHz, kPcmSamples);
+ gain_applier.Initialize(/*sample_rate_hz=*/8000, kMono);
+ gain_applier.Process(info, fake_audio_8k.float_frame_view());
+ EXPECT_FLOAT_EQ(fake_audio_8k.float_frame_view().channel(0)[0], kPcmSamples);
+ gain_applier.Initialize(/*sample_rate_hz=*/48000, kMono);
+ VectorFloatFrame fake_audio_48k(kMono, kFrameLen10ms48kHz, kPcmSamples);
+ gain_applier.Process(info, fake_audio_48k.float_frame_view());
+ EXPECT_FLOAT_EQ(fake_audio_48k.float_frame_view().channel(0)[0], kPcmSamples);
+}
+
+// Checks that no sample is modified before and after the number of channels
+// changes.
+TEST(GainController2GainApplier, DryRunHandlesNumChannelsChange) {
+ ApmDataDumper apm_data_dumper(0);
+ AdaptiveDigitalGainApplier gain_applier(
+ &apm_data_dumper, /*adjacent_speech_frames_threshold=*/1,
+ kMaxGainChangePerSecondDb, kMaxOutputNoiseLevelDbfs, /*dry_run=*/true);
+ AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
+ info.speech_level_dbfs = -60.0f;
+ constexpr float kPcmSamples = 123.456f;
+ VectorFloatFrame fake_audio_8k(kMono, kFrameLen10ms8kHz, kPcmSamples);
+ gain_applier.Initialize(/*sample_rate_hz=*/8000, kMono);
+ gain_applier.Process(info, fake_audio_8k.float_frame_view());
+ EXPECT_FLOAT_EQ(fake_audio_8k.float_frame_view().channel(0)[0], kPcmSamples);
+ VectorFloatFrame fake_audio_48k(kStereo, kFrameLen10ms8kHz, kPcmSamples);
+ gain_applier.Initialize(/*sample_rate_hz=*/8000, kStereo);
+ gain_applier.Process(info, fake_audio_48k.float_frame_view());
+ EXPECT_FLOAT_EQ(fake_audio_48k.float_frame_view().channel(0)[0], kPcmSamples);
+ EXPECT_FLOAT_EQ(fake_audio_48k.float_frame_view().channel(1)[0], kPcmSamples);
+}
+
} // namespace
} // namespace webrtc
diff --git a/modules/audio_processing/agc2/agc2_common.h b/modules/audio_processing/agc2/agc2_common.h
index 0f806d3938..adb1614926 100644
--- a/modules/audio_processing/agc2/agc2_common.h
+++ b/modules/audio_processing/agc2/agc2_common.h
@@ -44,8 +44,6 @@ constexpr float kLevelEstimatorLeakFactor =
1.0f - 1.0f / kLevelEstimatorTimeToConfidenceMs;
// Robust VAD probability and speech decisions.
-constexpr int kDefaultVadRnnResetPeriodMs = 1500;
-static_assert(kDefaultVadRnnResetPeriodMs % kFrameDurationMs == 0, "");
constexpr int kDefaultLevelEstimatorAdjacentSpeechFramesThreshold = 12;
// Saturation Protector settings.
diff --git a/modules/audio_processing/agc2/vad_with_level.cc b/modules/audio_processing/agc2/vad_with_level.cc
index 034f2b6ac0..9747ca2370 100644
--- a/modules/audio_processing/agc2/vad_with_level.cc
+++ b/modules/audio_processing/agc2/vad_with_level.cc
@@ -67,10 +67,6 @@ class Vad : public VoiceActivityDetector {
} // namespace
-VadLevelAnalyzer::VadLevelAnalyzer()
- : VadLevelAnalyzer(kDefaultVadRnnResetPeriodMs, GetAvailableCpuFeatures()) {
-}
-
VadLevelAnalyzer::VadLevelAnalyzer(int vad_reset_period_ms,
const AvailableCpuFeatures& cpu_features)
: VadLevelAnalyzer(vad_reset_period_ms,
diff --git a/modules/audio_processing/agc2/vad_with_level.h b/modules/audio_processing/agc2/vad_with_level.h
index 7cd93d6f2b..8d2ae45762 100644
--- a/modules/audio_processing/agc2/vad_with_level.h
+++ b/modules/audio_processing/agc2/vad_with_level.h
@@ -37,8 +37,6 @@ class VadLevelAnalyzer {
virtual float ComputeProbability(AudioFrameView<const float> frame) = 0;
};
- // Ctor. Uses the default VAD with the default settings.
- VadLevelAnalyzer();
// Ctor. `vad_reset_period_ms` indicates the period in milliseconds to call
// `VadLevelAnalyzer::Reset()`; it must be equal to or greater than the
// duration of two frames. Uses `cpu_features` to instantiate the default VAD.
diff --git a/modules/audio_processing/agc2/vad_with_level_unittest.cc b/modules/audio_processing/agc2/vad_with_level_unittest.cc
index 99b0136376..ec8e476965 100644
--- a/modules/audio_processing/agc2/vad_with_level_unittest.cc
+++ b/modules/audio_processing/agc2/vad_with_level_unittest.cc
@@ -71,16 +71,16 @@ struct FrameWithView {
const AudioFrameView<const float> view;
};
-TEST(GainController2VadLevelAnalyzer, PeakLevelGreaterThanRmsLevel) {
+TEST(GainController2VadLevelAnalyzer, RmsLessThanPeakLevel) {
+ auto analyzer = CreateVadLevelAnalyzerWithMockVad(
+ /*vad_reset_period_ms=*/1500,
+ /*speech_probabilities=*/{1.0f},
+ /*expected_vad_reset_calls=*/0);
// Handcrafted frame so that the average is lower than the peak value.
FrameWithView frame(1000.0f); // Constant frame.
frame.samples[10] = 2000.0f; // Except for one peak value.
-
- // Compute audio frame levels (the VAD result is ignored).
- VadLevelAnalyzer analyzer;
- auto levels_and_vad_prob = analyzer.AnalyzeFrame(frame.view);
-
- // Compare peak and RMS levels.
+ // Compute audio frame levels.
+ auto levels_and_vad_prob = analyzer->AnalyzeFrame(frame.view);
EXPECT_LT(levels_and_vad_prob.rms_dbfs, levels_and_vad_prob.peak_dbfs);
}
diff --git a/modules/audio_processing/audio_processing_impl.cc b/modules/audio_processing/audio_processing_impl.cc
index 12646fd243..4a1985545f 100644
--- a/modules/audio_processing/audio_processing_impl.cc
+++ b/modules/audio_processing/audio_processing_impl.cc
@@ -271,7 +271,8 @@ AudioProcessingImpl::AudioProcessingImpl(
!field_trial::IsEnabled(
"WebRTC-ApmExperimentalMultiChannelCaptureKillSwitch"),
EnforceSplitBandHpf(),
- MinimizeProcessingForUnusedOutput()),
+ MinimizeProcessingForUnusedOutput(),
+ field_trial::IsEnabled("WebRTC-TransientSuppressorForcedOff")),
capture_(),
capture_nonlocked_() {
RTC_LOG(LS_INFO) << "Injected APM submodules:"
@@ -290,8 +291,7 @@ AudioProcessingImpl::AudioProcessingImpl(
// If no echo detector is injected, use the ResidualEchoDetector.
if (!submodules_.echo_detector) {
- submodules_.echo_detector =
- new rtc::RefCountedObject<ResidualEchoDetector>();
+ submodules_.echo_detector = rtc::make_ref_counted<ResidualEchoDetector>();
}
#if !(defined(WEBRTC_ANDROID) || defined(WEBRTC_IOS))
@@ -1733,7 +1733,8 @@ bool AudioProcessingImpl::UpdateActiveSubmoduleStates() {
}
void AudioProcessingImpl::InitializeTransientSuppressor() {
- if (config_.transient_suppression.enabled) {
+ if (config_.transient_suppression.enabled &&
+ !constants_.transient_suppressor_forced_off) {
// Attempt to create a transient suppressor, if one is not already created.
if (!submodules_.transient_suppressor) {
submodules_.transient_suppressor =
@@ -1917,7 +1918,11 @@ void AudioProcessingImpl::InitializeGainController1() {
config_.gain_controller1.analog_gain_controller.clipped_level_min,
!config_.gain_controller1.analog_gain_controller
.enable_digital_adaptive,
- capture_nonlocked_.split_rate));
+ capture_nonlocked_.split_rate,
+ config_.gain_controller1.analog_gain_controller.clipped_level_step,
+ config_.gain_controller1.analog_gain_controller.clipped_ratio_threshold,
+ config_.gain_controller1.analog_gain_controller.clipped_wait_frames,
+ config_.gain_controller1.analog_gain_controller.clipping_predictor));
if (re_creation) {
submodules_.agc_manager->set_stream_analog_level(stream_analog_level);
}
@@ -1937,7 +1942,8 @@ void AudioProcessingImpl::InitializeGainController2() {
submodules_.gain_controller2.reset(new GainController2());
}
- submodules_.gain_controller2->Initialize(proc_fullband_sample_rate_hz());
+ submodules_.gain_controller2->Initialize(proc_fullband_sample_rate_hz(),
+ num_input_channels());
submodules_.gain_controller2->ApplyConfig(config_.gain_controller2);
} else {
submodules_.gain_controller2.reset();
diff --git a/modules/audio_processing/audio_processing_impl.h b/modules/audio_processing/audio_processing_impl.h
index e08abd5797..c88cfcde92 100644
--- a/modules/audio_processing/audio_processing_impl.h
+++ b/modules/audio_processing/audio_processing_impl.h
@@ -422,16 +422,19 @@ class AudioProcessingImpl : public AudioProcessing {
ApmConstants(bool multi_channel_render_support,
bool multi_channel_capture_support,
bool enforce_split_band_hpf,
- bool minimize_processing_for_unused_output)
+ bool minimize_processing_for_unused_output,
+ bool transient_suppressor_forced_off)
: multi_channel_render_support(multi_channel_render_support),
multi_channel_capture_support(multi_channel_capture_support),
enforce_split_band_hpf(enforce_split_band_hpf),
minimize_processing_for_unused_output(
- minimize_processing_for_unused_output) {}
+ minimize_processing_for_unused_output),
+ transient_suppressor_forced_off(transient_suppressor_forced_off) {}
bool multi_channel_render_support;
bool multi_channel_capture_support;
bool enforce_split_band_hpf;
bool minimize_processing_for_unused_output;
+ bool transient_suppressor_forced_off;
} constants_;
struct ApmCaptureState {
diff --git a/modules/audio_processing/audio_processing_impl_locking_unittest.cc b/modules/audio_processing/audio_processing_impl_locking_unittest.cc
index ec165aa146..66c1251d4c 100644
--- a/modules/audio_processing/audio_processing_impl_locking_unittest.cc
+++ b/modules/audio_processing/audio_processing_impl_locking_unittest.cc
@@ -387,33 +387,6 @@ class AudioProcessingImplLockTest
void SetUp() override;
void TearDown() override;
- // Thread callback for the render thread
- static void RenderProcessorThreadFunc(void* context) {
- AudioProcessingImplLockTest* impl =
- reinterpret_cast<AudioProcessingImplLockTest*>(context);
- while (!impl->MaybeEndTest()) {
- impl->render_thread_state_.Process();
- }
- }
-
- // Thread callback for the capture thread
- static void CaptureProcessorThreadFunc(void* context) {
- AudioProcessingImplLockTest* impl =
- reinterpret_cast<AudioProcessingImplLockTest*>(context);
- while (!impl->MaybeEndTest()) {
- impl->capture_thread_state_.Process();
- }
- }
-
- // Thread callback for the stats thread
- static void StatsProcessorThreadFunc(void* context) {
- AudioProcessingImplLockTest* impl =
- reinterpret_cast<AudioProcessingImplLockTest*>(context);
- while (!impl->MaybeEndTest()) {
- impl->stats_thread_state_.Process();
- }
- }
-
// Tests whether all the required render and capture side calls have been
// done.
bool TestDone() {
@@ -423,9 +396,28 @@ class AudioProcessingImplLockTest
// Start the threads used in the test.
void StartThreads() {
- render_thread_.Start();
- capture_thread_.Start();
- stats_thread_.Start();
+ const auto attributes =
+ rtc::ThreadAttributes().SetPriority(rtc::ThreadPriority::kRealtime);
+ render_thread_ = rtc::PlatformThread::SpawnJoinable(
+ [this] {
+ while (!MaybeEndTest())
+ render_thread_state_.Process();
+ },
+ "render", attributes);
+ capture_thread_ = rtc::PlatformThread::SpawnJoinable(
+ [this] {
+ while (!MaybeEndTest()) {
+ capture_thread_state_.Process();
+ }
+ },
+ "capture", attributes);
+
+ stats_thread_ = rtc::PlatformThread::SpawnJoinable(
+ [this] {
+ while (!MaybeEndTest())
+ stats_thread_state_.Process();
+ },
+ "stats", attributes);
}
// Event handlers for the test.
@@ -434,9 +426,6 @@ class AudioProcessingImplLockTest
rtc::Event capture_call_event_;
// Thread related variables.
- rtc::PlatformThread render_thread_;
- rtc::PlatformThread capture_thread_;
- rtc::PlatformThread stats_thread_;
mutable RandomGenerator rand_gen_;
std::unique_ptr<AudioProcessing> apm_;
@@ -445,6 +434,9 @@ class AudioProcessingImplLockTest
RenderProcessor render_thread_state_;
CaptureProcessor capture_thread_state_;
StatsProcessor stats_thread_state_;
+ rtc::PlatformThread render_thread_;
+ rtc::PlatformThread capture_thread_;
+ rtc::PlatformThread stats_thread_;
};
// Sleeps a random time between 0 and max_sleep milliseconds.
@@ -485,19 +477,7 @@ void PopulateAudioFrame(float amplitude,
}
AudioProcessingImplLockTest::AudioProcessingImplLockTest()
- : render_thread_(RenderProcessorThreadFunc,
- this,
- "render",
- rtc::kRealtimePriority),
- capture_thread_(CaptureProcessorThreadFunc,
- this,
- "capture",
- rtc::kRealtimePriority),
- stats_thread_(StatsProcessorThreadFunc,
- this,
- "stats",
- rtc::kNormalPriority),
- apm_(AudioProcessingBuilderForTesting().Create()),
+ : apm_(AudioProcessingBuilderForTesting().Create()),
render_thread_state_(kMaxFrameSize,
&rand_gen_,
&render_call_event_,
@@ -549,9 +529,6 @@ void AudioProcessingImplLockTest::SetUp() {
void AudioProcessingImplLockTest::TearDown() {
render_call_event_.Set();
capture_call_event_.Set();
- render_thread_.Stop();
- capture_thread_.Stop();
- stats_thread_.Stop();
}
StatsProcessor::StatsProcessor(RandomGenerator* rand_gen,
diff --git a/modules/audio_processing/audio_processing_impl_unittest.cc b/modules/audio_processing/audio_processing_impl_unittest.cc
index ef1830357a..ca8b8b4c25 100644
--- a/modules/audio_processing/audio_processing_impl_unittest.cc
+++ b/modules/audio_processing/audio_processing_impl_unittest.cc
@@ -544,8 +544,7 @@ TEST(AudioProcessingImplTest, EchoControllerObservesPlayoutVolumeChange) {
TEST(AudioProcessingImplTest, RenderPreProcessorBeforeEchoDetector) {
// Make sure that signal changes caused by a render pre-processing sub-module
// take place before any echo detector analysis.
- rtc::scoped_refptr<TestEchoDetector> test_echo_detector(
- new rtc::RefCountedObject<TestEchoDetector>());
+ auto test_echo_detector = rtc::make_ref_counted<TestEchoDetector>();
std::unique_ptr<CustomProcessing> test_render_pre_processor(
new TestRenderPreProcessor());
// Create APM injecting the test echo detector and render pre-processor.
@@ -605,8 +604,7 @@ TEST(AudioProcessingImplTest, RenderPreProcessorBeforeEchoDetector) {
// config should be bit-exact with running APM with said submodules disabled.
// This mainly tests that SetCreateOptionalSubmodulesForTesting has an effect.
TEST(ApmWithSubmodulesExcludedTest, BitexactWithDisabledModules) {
- rtc::scoped_refptr<AudioProcessingImpl> apm =
- new rtc::RefCountedObject<AudioProcessingImpl>(webrtc::Config());
+ auto apm = rtc::make_ref_counted<AudioProcessingImpl>(webrtc::Config());
ASSERT_EQ(apm->Initialize(), AudioProcessing::kNoError);
ApmSubmoduleCreationOverrides overrides;
@@ -654,8 +652,7 @@ TEST(ApmWithSubmodulesExcludedTest, BitexactWithDisabledModules) {
// Disable transient suppressor creation and run APM in ways that should trigger
// calls to the transient suppressor API.
TEST(ApmWithSubmodulesExcludedTest, ReinitializeTransientSuppressor) {
- rtc::scoped_refptr<AudioProcessingImpl> apm =
- new rtc::RefCountedObject<AudioProcessingImpl>(webrtc::Config());
+ auto apm = rtc::make_ref_counted<AudioProcessingImpl>(webrtc::Config());
ASSERT_EQ(apm->Initialize(), kNoErr);
ApmSubmoduleCreationOverrides overrides;
@@ -716,8 +713,7 @@ TEST(ApmWithSubmodulesExcludedTest, ReinitializeTransientSuppressor) {
// Disable transient suppressor creation and run APM in ways that should trigger
// calls to the transient suppressor API.
TEST(ApmWithSubmodulesExcludedTest, ToggleTransientSuppressor) {
- rtc::scoped_refptr<AudioProcessingImpl> apm =
- new rtc::RefCountedObject<AudioProcessingImpl>(webrtc::Config());
+ auto apm = rtc::make_ref_counted<AudioProcessingImpl>(webrtc::Config());
ASSERT_EQ(apm->Initialize(), AudioProcessing::kNoError);
ApmSubmoduleCreationOverrides overrides;
diff --git a/modules/audio_processing/audio_processing_performance_unittest.cc b/modules/audio_processing/audio_processing_performance_unittest.cc
index 86ff0e8bfe..9585850296 100644
--- a/modules/audio_processing/audio_processing_performance_unittest.cc
+++ b/modules/audio_processing/audio_processing_performance_unittest.cc
@@ -391,15 +391,7 @@ class TimedThreadApiProcessor {
class CallSimulator : public ::testing::TestWithParam<SimulationConfig> {
public:
CallSimulator()
- : render_thread_(new rtc::PlatformThread(RenderProcessorThreadFunc,
- this,
- "render",
- rtc::kRealtimePriority)),
- capture_thread_(new rtc::PlatformThread(CaptureProcessorThreadFunc,
- this,
- "capture",
- rtc::kRealtimePriority)),
- rand_gen_(42U),
+ : rand_gen_(42U),
simulation_config_(static_cast<SimulationConfig>(GetParam())) {}
// Run the call simulation with a timeout.
@@ -434,13 +426,10 @@ class CallSimulator : public ::testing::TestWithParam<SimulationConfig> {
static const int kMinNumFramesToProcess = 150;
static const int32_t kTestTimeout = 3 * 10 * kMinNumFramesToProcess;
- // ::testing::TestWithParam<> implementation.
- void TearDown() override { StopThreads(); }
-
// Stop all running threads.
void StopThreads() {
- render_thread_->Stop();
- capture_thread_->Stop();
+ render_thread_.Finalize();
+ capture_thread_.Finalize();
}
// Simulator and APM setup.
@@ -531,32 +520,28 @@ class CallSimulator : public ::testing::TestWithParam<SimulationConfig> {
kMinNumFramesToProcess, kCaptureInputFloatLevel, num_capture_channels));
}
- // Thread callback for the render thread.
- static void RenderProcessorThreadFunc(void* context) {
- CallSimulator* call_simulator = reinterpret_cast<CallSimulator*>(context);
- while (call_simulator->render_thread_state_->Process()) {
- }
- }
-
- // Thread callback for the capture thread.
- static void CaptureProcessorThreadFunc(void* context) {
- CallSimulator* call_simulator = reinterpret_cast<CallSimulator*>(context);
- while (call_simulator->capture_thread_state_->Process()) {
- }
- }
-
// Start the threads used in the test.
void StartThreads() {
- ASSERT_NO_FATAL_FAILURE(render_thread_->Start());
- ASSERT_NO_FATAL_FAILURE(capture_thread_->Start());
+ const auto attributes =
+ rtc::ThreadAttributes().SetPriority(rtc::ThreadPriority::kRealtime);
+ render_thread_ = rtc::PlatformThread::SpawnJoinable(
+ [this] {
+ while (render_thread_state_->Process()) {
+ }
+ },
+ "render", attributes);
+ capture_thread_ = rtc::PlatformThread::SpawnJoinable(
+ [this] {
+ while (capture_thread_state_->Process()) {
+ }
+ },
+ "capture", attributes);
}
// Event handler for the test.
rtc::Event test_complete_;
// Thread related variables.
- std::unique_ptr<rtc::PlatformThread> render_thread_;
- std::unique_ptr<rtc::PlatformThread> capture_thread_;
Random rand_gen_;
std::unique_ptr<AudioProcessing> apm_;
@@ -565,6 +550,8 @@ class CallSimulator : public ::testing::TestWithParam<SimulationConfig> {
LockedFlag capture_call_checker_;
std::unique_ptr<TimedThreadApiProcessor> render_thread_state_;
std::unique_ptr<TimedThreadApiProcessor> capture_thread_state_;
+ rtc::PlatformThread render_thread_;
+ rtc::PlatformThread capture_thread_;
};
// Implements the callback functionality for the threads.
diff --git a/modules/audio_processing/audio_processing_unittest.cc b/modules/audio_processing/audio_processing_unittest.cc
index 3d562dffcd..4d30a348f6 100644
--- a/modules/audio_processing/audio_processing_unittest.cc
+++ b/modules/audio_processing/audio_processing_unittest.cc
@@ -3039,50 +3039,50 @@ TEST(AudioProcessing, GainController1ConfigNotEqual) {
Toggle(a.enabled);
EXPECT_NE(a, b);
- a.enabled = b.enabled;
+ a = b;
a.mode = AudioProcessing::Config::GainController1::Mode::kAdaptiveDigital;
EXPECT_NE(a, b);
- a.mode = b.mode;
+ a = b;
a.target_level_dbfs++;
EXPECT_NE(a, b);
- a.target_level_dbfs = b.target_level_dbfs;
+ a = b;
a.compression_gain_db++;
EXPECT_NE(a, b);
- a.compression_gain_db = b.compression_gain_db;
+ a = b;
Toggle(a.enable_limiter);
EXPECT_NE(a, b);
- a.enable_limiter = b.enable_limiter;
+ a = b;
a.analog_level_minimum++;
EXPECT_NE(a, b);
- a.analog_level_minimum = b.analog_level_minimum;
+ a = b;
a.analog_level_maximum--;
EXPECT_NE(a, b);
- a.analog_level_maximum = b.analog_level_maximum;
+ a = b;
auto& a_analog = a.analog_gain_controller;
const auto& b_analog = b.analog_gain_controller;
Toggle(a_analog.enabled);
EXPECT_NE(a, b);
- a_analog.enabled = b_analog.enabled;
+ a_analog = b_analog;
a_analog.startup_min_volume++;
EXPECT_NE(a, b);
- a_analog.startup_min_volume = b_analog.startup_min_volume;
+ a_analog = b_analog;
a_analog.clipped_level_min++;
EXPECT_NE(a, b);
- a_analog.clipped_level_min = b_analog.clipped_level_min;
+ a_analog = b_analog;
Toggle(a_analog.enable_digital_adaptive);
EXPECT_NE(a, b);
- a_analog.enable_digital_adaptive = b_analog.enable_digital_adaptive;
+ a_analog = b_analog;
}
TEST(AudioProcessing, GainController2ConfigEqual) {
@@ -3094,7 +3094,7 @@ TEST(AudioProcessing, GainController2ConfigEqual) {
b.enabled = a.enabled;
EXPECT_EQ(a, b);
- a.fixed_digital.gain_db += 1.f;
+ a.fixed_digital.gain_db += 1.0f;
b.fixed_digital.gain_db = a.fixed_digital.gain_db;
EXPECT_EQ(a, b);
@@ -3105,46 +3105,44 @@ TEST(AudioProcessing, GainController2ConfigEqual) {
b_adaptive.enabled = a_adaptive.enabled;
EXPECT_EQ(a, b);
- a_adaptive.vad_probability_attack += 1.f;
- b_adaptive.vad_probability_attack = a_adaptive.vad_probability_attack;
+ Toggle(a_adaptive.dry_run);
+ b_adaptive.dry_run = a_adaptive.dry_run;
EXPECT_EQ(a, b);
- a_adaptive.level_estimator =
- AudioProcessing::Config::GainController2::LevelEstimator::kPeak;
- b_adaptive.level_estimator = a_adaptive.level_estimator;
+ a_adaptive.noise_estimator = AudioProcessing::Config::GainController2::
+ NoiseEstimator::kStationaryNoise;
+ b_adaptive.noise_estimator = a_adaptive.noise_estimator;
EXPECT_EQ(a, b);
- a_adaptive.level_estimator_adjacent_speech_frames_threshold++;
- b_adaptive.level_estimator_adjacent_speech_frames_threshold =
- a_adaptive.level_estimator_adjacent_speech_frames_threshold;
+ a_adaptive.vad_reset_period_ms++;
+ b_adaptive.vad_reset_period_ms = a_adaptive.vad_reset_period_ms;
EXPECT_EQ(a, b);
- Toggle(a_adaptive.use_saturation_protector);
- b_adaptive.use_saturation_protector = a_adaptive.use_saturation_protector;
+ a_adaptive.adjacent_speech_frames_threshold++;
+ b_adaptive.adjacent_speech_frames_threshold =
+ a_adaptive.adjacent_speech_frames_threshold;
EXPECT_EQ(a, b);
- a_adaptive.initial_saturation_margin_db += 1.f;
- b_adaptive.initial_saturation_margin_db =
- a_adaptive.initial_saturation_margin_db;
+ a_adaptive.max_gain_change_db_per_second += 1.0f;
+ b_adaptive.max_gain_change_db_per_second =
+ a_adaptive.max_gain_change_db_per_second;
EXPECT_EQ(a, b);
- a_adaptive.extra_saturation_margin_db += 1.f;
- b_adaptive.extra_saturation_margin_db = a_adaptive.extra_saturation_margin_db;
+ a_adaptive.max_output_noise_level_dbfs += 1.0f;
+ b_adaptive.max_output_noise_level_dbfs =
+ a_adaptive.max_output_noise_level_dbfs;
EXPECT_EQ(a, b);
- a_adaptive.gain_applier_adjacent_speech_frames_threshold++;
- b_adaptive.gain_applier_adjacent_speech_frames_threshold =
- a_adaptive.gain_applier_adjacent_speech_frames_threshold;
+ Toggle(a_adaptive.sse2_allowed);
+ b_adaptive.sse2_allowed = a_adaptive.sse2_allowed;
EXPECT_EQ(a, b);
- a_adaptive.max_gain_change_db_per_second += 1.f;
- b_adaptive.max_gain_change_db_per_second =
- a_adaptive.max_gain_change_db_per_second;
+ Toggle(a_adaptive.avx2_allowed);
+ b_adaptive.avx2_allowed = a_adaptive.avx2_allowed;
EXPECT_EQ(a, b);
- a_adaptive.max_output_noise_level_dbfs -= 1.f;
- b_adaptive.max_output_noise_level_dbfs =
- a_adaptive.max_output_noise_level_dbfs;
+ Toggle(a_adaptive.neon_allowed);
+ b_adaptive.neon_allowed = a_adaptive.neon_allowed;
EXPECT_EQ(a, b);
}
@@ -3156,60 +3154,55 @@ TEST(AudioProcessing, GainController2ConfigNotEqual) {
Toggle(a.enabled);
EXPECT_NE(a, b);
- a.enabled = b.enabled;
+ a = b;
- a.fixed_digital.gain_db += 1.f;
+ a.fixed_digital.gain_db += 1.0f;
EXPECT_NE(a, b);
- a.fixed_digital.gain_db = b.fixed_digital.gain_db;
+ a.fixed_digital = b.fixed_digital;
auto& a_adaptive = a.adaptive_digital;
const auto& b_adaptive = b.adaptive_digital;
Toggle(a_adaptive.enabled);
EXPECT_NE(a, b);
- a_adaptive.enabled = b_adaptive.enabled;
+ a_adaptive = b_adaptive;
- a_adaptive.vad_probability_attack += 1.f;
+ Toggle(a_adaptive.dry_run);
EXPECT_NE(a, b);
- a_adaptive.vad_probability_attack = b_adaptive.vad_probability_attack;
+ a_adaptive = b_adaptive;
- a_adaptive.level_estimator =
- AudioProcessing::Config::GainController2::LevelEstimator::kPeak;
+ a_adaptive.noise_estimator = AudioProcessing::Config::GainController2::
+ NoiseEstimator::kStationaryNoise;
EXPECT_NE(a, b);
- a_adaptive.level_estimator = b_adaptive.level_estimator;
+ a_adaptive = b_adaptive;
- a_adaptive.level_estimator_adjacent_speech_frames_threshold++;
+ a_adaptive.vad_reset_period_ms++;
EXPECT_NE(a, b);
- a_adaptive.level_estimator_adjacent_speech_frames_threshold =
- b_adaptive.level_estimator_adjacent_speech_frames_threshold;
+ a_adaptive = b_adaptive;
- Toggle(a_adaptive.use_saturation_protector);
+ a_adaptive.adjacent_speech_frames_threshold++;
EXPECT_NE(a, b);
- a_adaptive.use_saturation_protector = b_adaptive.use_saturation_protector;
+ a_adaptive = b_adaptive;
- a_adaptive.initial_saturation_margin_db += 1.f;
+ a_adaptive.max_gain_change_db_per_second += 1.0f;
EXPECT_NE(a, b);
- a_adaptive.initial_saturation_margin_db =
- b_adaptive.initial_saturation_margin_db;
+ a_adaptive = b_adaptive;
- a_adaptive.extra_saturation_margin_db += 1.f;
+ a_adaptive.max_output_noise_level_dbfs += 1.0f;
EXPECT_NE(a, b);
- a_adaptive.extra_saturation_margin_db = b_adaptive.extra_saturation_margin_db;
+ a_adaptive = b_adaptive;
- a_adaptive.gain_applier_adjacent_speech_frames_threshold++;
+ Toggle(a_adaptive.sse2_allowed);
EXPECT_NE(a, b);
- a_adaptive.gain_applier_adjacent_speech_frames_threshold =
- b_adaptive.gain_applier_adjacent_speech_frames_threshold;
+ a_adaptive = b_adaptive;
- a_adaptive.max_gain_change_db_per_second += 1.f;
+ Toggle(a_adaptive.avx2_allowed);
EXPECT_NE(a, b);
- a_adaptive.max_gain_change_db_per_second =
- b_adaptive.max_gain_change_db_per_second;
+ a_adaptive = b_adaptive;
- a_adaptive.max_output_noise_level_dbfs -= 1.f;
+ Toggle(a_adaptive.neon_allowed);
EXPECT_NE(a, b);
- a_adaptive.max_output_noise_level_dbfs =
- b_adaptive.max_output_noise_level_dbfs;
+ a_adaptive = b_adaptive;
}
} // namespace webrtc
diff --git a/modules/audio_processing/gain_controller2.cc b/modules/audio_processing/gain_controller2.cc
index 9e3e8e7cae..74b63c9432 100644
--- a/modules/audio_processing/gain_controller2.cc
+++ b/modules/audio_processing/gain_controller2.cc
@@ -26,22 +26,26 @@ int GainController2::instance_count_ = 0;
GainController2::GainController2()
: data_dumper_(rtc::AtomicOps::Increment(&instance_count_)),
gain_applier_(/*hard_clip_samples=*/false,
- /*initial_gain_factor=*/0.f),
+ /*initial_gain_factor=*/0.0f),
limiter_(static_cast<size_t>(48000), &data_dumper_, "Agc2"),
calls_since_last_limiter_log_(0) {
if (config_.adaptive_digital.enabled) {
- adaptive_agc_ = std::make_unique<AdaptiveAgc>(&data_dumper_);
+ adaptive_agc_ =
+ std::make_unique<AdaptiveAgc>(&data_dumper_, config_.adaptive_digital);
}
}
GainController2::~GainController2() = default;
-void GainController2::Initialize(int sample_rate_hz) {
+void GainController2::Initialize(int sample_rate_hz, int num_channels) {
RTC_DCHECK(sample_rate_hz == AudioProcessing::kSampleRate8kHz ||
sample_rate_hz == AudioProcessing::kSampleRate16kHz ||
sample_rate_hz == AudioProcessing::kSampleRate32kHz ||
sample_rate_hz == AudioProcessing::kSampleRate48kHz);
limiter_.SetSampleRate(sample_rate_hz);
+ if (adaptive_agc_) {
+ adaptive_agc_->Initialize(sample_rate_hz, num_channels);
+ }
data_dumper_.InitiateNewSetOfRecordings();
data_dumper_.DumpRaw("sample_rate_hz", sample_rate_hz);
calls_since_last_limiter_log_ = 0;
diff --git a/modules/audio_processing/gain_controller2.h b/modules/audio_processing/gain_controller2.h
index b62890d721..ce758c7834 100644
--- a/modules/audio_processing/gain_controller2.h
+++ b/modules/audio_processing/gain_controller2.h
@@ -34,7 +34,7 @@ class GainController2 {
GainController2& operator=(const GainController2&) = delete;
~GainController2();
- void Initialize(int sample_rate_hz);
+ void Initialize(int sample_rate_hz, int num_channels);
void Process(AudioBuffer* audio);
void NotifyAnalogLevel(int level);
diff --git a/modules/audio_processing/gain_controller2_unittest.cc b/modules/audio_processing/gain_controller2_unittest.cc
index 815d58efe7..85c08bb750 100644
--- a/modules/audio_processing/gain_controller2_unittest.cc
+++ b/modules/audio_processing/gain_controller2_unittest.cc
@@ -65,7 +65,7 @@ std::unique_ptr<GainController2> CreateAgc2FixedDigitalMode(
size_t sample_rate_hz) {
auto agc2 = std::make_unique<GainController2>();
agc2->ApplyConfig(CreateAgc2FixedDigitalModeConfig(fixed_gain_db));
- agc2->Initialize(sample_rate_hz);
+ agc2->Initialize(sample_rate_hz, /*num_channels=*/1);
return agc2;
}
@@ -337,9 +337,10 @@ TEST(GainController2, CheckGainAdaptiveDigital) {
constexpr float kExpectedGainDb = 4.3f;
constexpr float kToleranceDb = 0.5f;
GainController2 gain_controller2;
- gain_controller2.Initialize(AudioProcessing::kSampleRate48kHz);
+ gain_controller2.Initialize(AudioProcessing::kSampleRate48kHz,
+ /*num_channels=*/1);
AudioProcessing::Config::GainController2 config;
- config.fixed_digital.gain_db = 0.f;
+ config.fixed_digital.gain_db = 0.0f;
config.adaptive_digital.enabled = true;
gain_controller2.ApplyConfig(config);
EXPECT_NEAR(
diff --git a/modules/audio_processing/include/audio_processing.cc b/modules/audio_processing/include/audio_processing.cc
index fa45230c6b..44a90d6e76 100644
--- a/modules/audio_processing/include/audio_processing.cc
+++ b/modules/audio_processing/include/audio_processing.cc
@@ -77,33 +77,42 @@ bool Agc1Config::operator==(const Agc1Config& rhs) const {
analog_lhs.startup_min_volume == analog_rhs.startup_min_volume &&
analog_lhs.clipped_level_min == analog_rhs.clipped_level_min &&
analog_lhs.enable_digital_adaptive ==
- analog_rhs.enable_digital_adaptive;
+ analog_rhs.enable_digital_adaptive &&
+ analog_lhs.clipped_level_step == analog_rhs.clipped_level_step &&
+ analog_lhs.clipped_ratio_threshold ==
+ analog_rhs.clipped_ratio_threshold &&
+ analog_lhs.clipped_wait_frames == analog_rhs.clipped_wait_frames &&
+ analog_lhs.clipping_predictor.mode ==
+ analog_rhs.clipping_predictor.mode &&
+ analog_lhs.clipping_predictor.window_length ==
+ analog_rhs.clipping_predictor.window_length &&
+ analog_lhs.clipping_predictor.reference_window_length ==
+ analog_rhs.clipping_predictor.reference_window_length &&
+ analog_lhs.clipping_predictor.reference_window_delay ==
+ analog_rhs.clipping_predictor.reference_window_delay &&
+ analog_lhs.clipping_predictor.clipping_threshold ==
+ analog_rhs.clipping_predictor.clipping_threshold &&
+ analog_lhs.clipping_predictor.crest_factor_margin ==
+ analog_rhs.clipping_predictor.crest_factor_margin;
}
-bool Agc2Config::operator==(const Agc2Config& rhs) const {
- const auto& adaptive_lhs = adaptive_digital;
- const auto& adaptive_rhs = rhs.adaptive_digital;
+bool Agc2Config::AdaptiveDigital::operator==(
+ const Agc2Config::AdaptiveDigital& rhs) const {
+ return enabled == rhs.enabled && dry_run == rhs.dry_run &&
+ noise_estimator == rhs.noise_estimator &&
+ vad_reset_period_ms == rhs.vad_reset_period_ms &&
+ adjacent_speech_frames_threshold ==
+ rhs.adjacent_speech_frames_threshold &&
+ max_gain_change_db_per_second == rhs.max_gain_change_db_per_second &&
+ max_output_noise_level_dbfs == rhs.max_output_noise_level_dbfs &&
+ sse2_allowed == rhs.sse2_allowed && avx2_allowed == rhs.avx2_allowed &&
+ neon_allowed == rhs.neon_allowed;
+}
+bool Agc2Config::operator==(const Agc2Config& rhs) const {
return enabled == rhs.enabled &&
fixed_digital.gain_db == rhs.fixed_digital.gain_db &&
- adaptive_lhs.enabled == adaptive_rhs.enabled &&
- adaptive_lhs.vad_probability_attack ==
- adaptive_rhs.vad_probability_attack &&
- adaptive_lhs.level_estimator == adaptive_rhs.level_estimator &&
- adaptive_lhs.level_estimator_adjacent_speech_frames_threshold ==
- adaptive_rhs.level_estimator_adjacent_speech_frames_threshold &&
- adaptive_lhs.use_saturation_protector ==
- adaptive_rhs.use_saturation_protector &&
- adaptive_lhs.initial_saturation_margin_db ==
- adaptive_rhs.initial_saturation_margin_db &&
- adaptive_lhs.extra_saturation_margin_db ==
- adaptive_rhs.extra_saturation_margin_db &&
- adaptive_lhs.gain_applier_adjacent_speech_frames_threshold ==
- adaptive_rhs.gain_applier_adjacent_speech_frames_threshold &&
- adaptive_lhs.max_gain_change_db_per_second ==
- adaptive_rhs.max_gain_change_db_per_second &&
- adaptive_lhs.max_output_noise_level_dbfs ==
- adaptive_rhs.max_output_noise_level_dbfs;
+ adaptive_digital == rhs.adaptive_digital;
}
bool AudioProcessing::Config::CaptureLevelAdjustment::operator==(
@@ -156,11 +165,46 @@ std::string AudioProcessing::Config::ToString() const {
<< ", enable_limiter: " << gain_controller1.enable_limiter
<< ", analog_level_minimum: " << gain_controller1.analog_level_minimum
<< ", analog_level_maximum: " << gain_controller1.analog_level_maximum
- << " }, gain_controller2: { enabled: " << gain_controller2.enabled
+ << ", analog_gain_controller { enabled: "
+ << gain_controller1.analog_gain_controller.enabled
+ << ", startup_min_volume: "
+ << gain_controller1.analog_gain_controller.startup_min_volume
+ << ", clipped_level_min: "
+ << gain_controller1.analog_gain_controller.clipped_level_min
+ << ", enable_digital_adaptive: "
+ << gain_controller1.analog_gain_controller.enable_digital_adaptive
+ << ", clipped_level_step: "
+ << gain_controller1.analog_gain_controller.clipped_level_step
+ << ", clipped_ratio_threshold: "
+ << gain_controller1.analog_gain_controller.clipped_ratio_threshold
+ << ", clipped_wait_frames: "
+ << gain_controller1.analog_gain_controller.clipped_wait_frames
+ << ", clipping_predictor: { enabled: "
+ << gain_controller1.analog_gain_controller.clipping_predictor.enabled
+ << ", mode: "
+ << gain_controller1.analog_gain_controller.clipping_predictor.mode
+ << ", window_length: "
+ << gain_controller1.analog_gain_controller.clipping_predictor
+ .window_length
+ << ", reference_window_length: "
+ << gain_controller1.analog_gain_controller.clipping_predictor
+ .reference_window_length
+ << ", reference_window_delay: "
+ << gain_controller1.analog_gain_controller.clipping_predictor
+ .reference_window_delay
+ << ", clipping_threshold: "
+ << gain_controller1.analog_gain_controller.clipping_predictor
+ .clipping_threshold
+ << ", crest_factor_margin: "
+ << gain_controller1.analog_gain_controller.clipping_predictor
+ .crest_factor_margin
+ << " }}}, gain_controller2: { enabled: " << gain_controller2.enabled
<< ", fixed_digital: { gain_db: "
<< gain_controller2.fixed_digital.gain_db
<< " }, adaptive_digital: { enabled: "
- << gain_controller2.adaptive_digital.enabled << ", noise_estimator: "
+ << gain_controller2.adaptive_digital.enabled
+ << ", dry_run: " << gain_controller2.adaptive_digital.dry_run
+ << ", noise_estimator: "
<< GainController2NoiseEstimatorToString(
gain_controller2.adaptive_digital.noise_estimator)
<< ", vad_reset_period_ms: "
diff --git a/modules/audio_processing/include/audio_processing.h b/modules/audio_processing/include/audio_processing.h
index 01bb7c33c7..64b1b5d107 100644
--- a/modules/audio_processing/include/audio_processing.h
+++ b/modules/audio_processing/include/audio_processing.h
@@ -59,9 +59,9 @@ class CustomProcessing;
//
// Must be provided through AudioProcessingBuilder().Create(config).
#if defined(WEBRTC_CHROMIUM_BUILD)
-static const int kAgcStartupMinVolume = 85;
+static constexpr int kAgcStartupMinVolume = 85;
#else
-static const int kAgcStartupMinVolume = 0;
+static constexpr int kAgcStartupMinVolume = 0;
#endif // defined(WEBRTC_CHROMIUM_BUILD)
static constexpr int kClippedLevelMin = 70;
@@ -275,7 +275,7 @@ class RTC_EXPORT AudioProcessing : public rtc::RefCountInterface {
// in the analog mode, prescribing an analog gain to be applied at the audio
// HAL.
// Recommended to be enabled on the client-side.
- struct GainController1 {
+ struct RTC_EXPORT GainController1 {
bool operator==(const GainController1& rhs) const;
bool operator!=(const GainController1& rhs) const {
return !(*this == rhs);
@@ -334,6 +334,43 @@ class RTC_EXPORT AudioProcessing : public rtc::RefCountInterface {
// clipping.
int clipped_level_min = kClippedLevelMin;
bool enable_digital_adaptive = true;
+ // Amount the microphone level is lowered with every clipping event.
+ // Limited to (0, 255].
+ int clipped_level_step = 15;
+ // Proportion of clipped samples required to declare a clipping event.
+ // Limited to (0.f, 1.f).
+ float clipped_ratio_threshold = 0.1f;
+ // Time in frames to wait after a clipping event before checking again.
+ // Limited to values higher than 0.
+ int clipped_wait_frames = 300;
+
+ // Enables clipping prediction functionality.
+ struct ClippingPredictor {
+ bool enabled = false;
+ enum Mode {
+ // Clipping event prediction mode with fixed step estimation.
+ kClippingEventPrediction,
+ // Clipped peak estimation mode with adaptive step estimation.
+ kAdaptiveStepClippingPeakPrediction,
+ // Clipped peak estimation mode with fixed step estimation.
+ kFixedStepClippingPeakPrediction,
+ };
+ Mode mode = kClippingEventPrediction;
+ // Number of frames in the sliding analysis window.
+ int window_length = 5;
+ // Number of frames in the sliding reference window.
+ int reference_window_length = 5;
+ // Reference window delay (unit: number of frames).
+ int reference_window_delay = 5;
+ // Clipping prediction threshold (dBFS).
+ float clipping_threshold = -1.0f;
+ // Crest factor drop threshold (dB).
+ float crest_factor_margin = 3.0f;
+ // If true, the recommended clipped level step is used to modify the
+ // analog gain. Otherwise, the predictor runs without affecting the
+ // analog gain.
+ bool use_predicted_step = true;
+ } clipping_predictor;
} analog_gain_controller;
} gain_controller1;
@@ -343,7 +380,7 @@ class RTC_EXPORT AudioProcessing : public rtc::RefCountInterface {
// setting |fixed_gain_db|, the limiter can be turned into a compressor that
// first applies a fixed gain. The adaptive digital AGC can be turned off by
// setting |adaptive_digital_mode=false|.
- struct GainController2 {
+ struct RTC_EXPORT GainController2 {
bool operator==(const GainController2& rhs) const;
bool operator!=(const GainController2& rhs) const {
return !(*this == rhs);
@@ -356,8 +393,15 @@ class RTC_EXPORT AudioProcessing : public rtc::RefCountInterface {
struct FixedDigital {
float gain_db = 0.0f;
} fixed_digital;
- struct AdaptiveDigital {
+ struct RTC_EXPORT AdaptiveDigital {
+ bool operator==(const AdaptiveDigital& rhs) const;
+ bool operator!=(const AdaptiveDigital& rhs) const {
+ return !(*this == rhs);
+ }
+
bool enabled = false;
+ // Run the adaptive digital controller but the signal is not modified.
+ bool dry_run = false;
NoiseEstimator noise_estimator = kNoiseFloor;
int vad_reset_period_ms = 1500;
int adjacent_speech_frames_threshold = 12;
diff --git a/modules/audio_processing/logging/apm_data_dumper.h b/modules/audio_processing/logging/apm_data_dumper.h
index 6d32b32ab5..9c2ac3be5d 100644
--- a/modules/audio_processing/logging/apm_data_dumper.h
+++ b/modules/audio_processing/logging/apm_data_dumper.h
@@ -65,6 +65,15 @@ class ApmDataDumper {
#endif
}
+ // Returns whether dumping functionality is enabled/available.
+ static bool IsAvailable() {
+#if WEBRTC_APM_DEBUG_DUMP == 1
+ return true;
+#else
+ return false;
+#endif
+ }
+
// Default dump set.
static constexpr size_t kDefaultDumpSet = 0;
diff --git a/modules/audio_processing/residual_echo_detector_unittest.cc b/modules/audio_processing/residual_echo_detector_unittest.cc
index 6697cf009d..a5f1409516 100644
--- a/modules/audio_processing/residual_echo_detector_unittest.cc
+++ b/modules/audio_processing/residual_echo_detector_unittest.cc
@@ -18,8 +18,7 @@
namespace webrtc {
TEST(ResidualEchoDetectorTests, Echo) {
- rtc::scoped_refptr<ResidualEchoDetector> echo_detector =
- new rtc::RefCountedObject<ResidualEchoDetector>();
+ auto echo_detector = rtc::make_ref_counted<ResidualEchoDetector>();
echo_detector->SetReliabilityForTest(1.0f);
std::vector<float> ones(160, 1.f);
std::vector<float> zeros(160, 0.f);
@@ -46,8 +45,7 @@ TEST(ResidualEchoDetectorTests, Echo) {
}
TEST(ResidualEchoDetectorTests, NoEcho) {
- rtc::scoped_refptr<ResidualEchoDetector> echo_detector =
- new rtc::RefCountedObject<ResidualEchoDetector>();
+ auto echo_detector = rtc::make_ref_counted<ResidualEchoDetector>();
echo_detector->SetReliabilityForTest(1.0f);
std::vector<float> ones(160, 1.f);
std::vector<float> zeros(160, 0.f);
@@ -69,8 +67,7 @@ TEST(ResidualEchoDetectorTests, NoEcho) {
}
TEST(ResidualEchoDetectorTests, EchoWithRenderClockDrift) {
- rtc::scoped_refptr<ResidualEchoDetector> echo_detector =
- new rtc::RefCountedObject<ResidualEchoDetector>();
+ auto echo_detector = rtc::make_ref_counted<ResidualEchoDetector>();
echo_detector->SetReliabilityForTest(1.0f);
std::vector<float> ones(160, 1.f);
std::vector<float> zeros(160, 0.f);
@@ -107,8 +104,7 @@ TEST(ResidualEchoDetectorTests, EchoWithRenderClockDrift) {
}
TEST(ResidualEchoDetectorTests, EchoWithCaptureClockDrift) {
- rtc::scoped_refptr<ResidualEchoDetector> echo_detector =
- new rtc::RefCountedObject<ResidualEchoDetector>();
+ auto echo_detector = rtc::make_ref_counted<ResidualEchoDetector>();
echo_detector->SetReliabilityForTest(1.0f);
std::vector<float> ones(160, 1.f);
std::vector<float> zeros(160, 0.f);