diff options
Diffstat (limited to 'modules/audio_processing/agc/agc_manager_direct.cc')
-rw-r--r-- | modules/audio_processing/agc/agc_manager_direct.cc | 182 |
1 files changed, 143 insertions, 39 deletions
diff --git a/modules/audio_processing/agc/agc_manager_direct.cc b/modules/audio_processing/agc/agc_manager_direct.cc index 2454d1bbb1..817678801e 100644 --- a/modules/audio_processing/agc/agc_manager_direct.cc +++ b/modules/audio_processing/agc/agc_manager_direct.cc @@ -16,6 +16,7 @@ #include "common_audio/include/audio_util.h" #include "modules/audio_processing/agc/gain_control.h" #include "modules/audio_processing/agc/gain_map_internal.h" +#include "modules/audio_processing/include/audio_frame_view.h" #include "rtc_base/atomic_ops.h" #include "rtc_base/checks.h" #include "rtc_base/logging.h" @@ -27,33 +28,33 @@ namespace webrtc { namespace { -// Amount the microphone level is lowered with every clipping event. -const int kClippedLevelStep = 15; -// Proportion of clipped samples required to declare a clipping event. -const float kClippedRatioThreshold = 0.1f; -// Time in frames to wait after a clipping event before checking again. -const int kClippedWaitFrames = 300; - // Amount of error we tolerate in the microphone level (presumably due to OS // quantization) before we assume the user has manually adjusted the microphone. -const int kLevelQuantizationSlack = 25; +constexpr int kLevelQuantizationSlack = 25; -const int kDefaultCompressionGain = 7; -const int kMaxCompressionGain = 12; -const int kMinCompressionGain = 2; +constexpr int kDefaultCompressionGain = 7; +constexpr int kMaxCompressionGain = 12; +constexpr int kMinCompressionGain = 2; // Controls the rate of compression changes towards the target. -const float kCompressionGainStep = 0.05f; +constexpr float kCompressionGainStep = 0.05f; -const int kMaxMicLevel = 255; +constexpr int kMaxMicLevel = 255; static_assert(kGainMapSize > kMaxMicLevel, "gain map too small"); -const int kMinMicLevel = 12; +constexpr int kMinMicLevel = 12; // Prevent very large microphone level changes. -const int kMaxResidualGainChange = 15; +constexpr int kMaxResidualGainChange = 15; // Maximum additional gain allowed to compensate for microphone level // restrictions from clipping events. -const int kSurplusCompressionGain = 6; +constexpr int kSurplusCompressionGain = 6; + +// History size for the clipping predictor evaluator (unit: number of 10 ms +// frames). +constexpr int kClippingPredictorEvaluatorHistorySize = 32; + +using ClippingPredictorConfig = AudioProcessing::Config::GainController1:: + AnalogGainController::ClippingPredictor; // Returns whether a fall-back solution to choose the maximum level should be // chosen. @@ -132,6 +133,33 @@ float ComputeClippedRatio(const float* const* audio, return static_cast<float>(num_clipped) / (samples_per_channel); } +void LogClippingPredictorMetrics(const ClippingPredictorEvaluator& evaluator) { + RTC_LOG(LS_INFO) << "Clipping predictor metrics: TP " + << evaluator.true_positives() << " TN " + << evaluator.true_negatives() << " FP " + << evaluator.false_positives() << " FN " + << evaluator.false_negatives(); + const float precision_denominator = + evaluator.true_positives() + evaluator.false_positives(); + const float recall_denominator = + evaluator.true_positives() + evaluator.false_negatives(); + if (precision_denominator > 0 && recall_denominator > 0) { + const float precision = evaluator.true_positives() / precision_denominator; + const float recall = evaluator.true_positives() / recall_denominator; + RTC_LOG(LS_INFO) << "Clipping predictor metrics: P " << precision << " R " + << recall; + const float f1_score_denominator = precision + recall; + if (f1_score_denominator > 0.0f) { + const float f1_score = 2 * precision * recall / f1_score_denominator; + RTC_LOG(LS_INFO) << "Clipping predictor metrics: F1 " << f1_score; + RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.Agc.ClippingPredictor.F1Score", + std::round(f1_score * 100.0f), /*min=*/0, + /*max=*/100, + /*bucket_count=*/50); + } + } +} + } // namespace MonoAgc::MonoAgc(ApmDataDumper* data_dumper, @@ -182,19 +210,19 @@ void MonoAgc::Process(const int16_t* audio, } } -void MonoAgc::HandleClipping() { +void MonoAgc::HandleClipping(int clipped_level_step) { // Always decrease the maximum level, even if the current level is below // threshold. - SetMaxLevel(std::max(clipped_level_min_, max_level_ - kClippedLevelStep)); + SetMaxLevel(std::max(clipped_level_min_, max_level_ - clipped_level_step)); if (log_to_histograms_) { RTC_HISTOGRAM_BOOLEAN("WebRTC.Audio.AgcClippingAdjustmentAllowed", - level_ - kClippedLevelStep >= clipped_level_min_); + level_ - clipped_level_step >= clipped_level_min_); } if (level_ > clipped_level_min_) { // Don't try to adjust the level if we're already below the limit. As // a consequence, if the user has brought the level above the limit, we // will still not react until the postproc updates the level. - SetLevel(std::max(clipped_level_min_, level_ - kClippedLevelStep)); + SetLevel(std::max(clipped_level_min_, level_ - clipped_level_step)); // Reset the AGCs for all channels since the level has changed. agc_->Reset(); } @@ -401,35 +429,58 @@ void MonoAgc::UpdateCompressor() { int AgcManagerDirect::instance_counter_ = 0; -AgcManagerDirect::AgcManagerDirect(Agc* agc, - int startup_min_level, - int clipped_level_min, - int sample_rate_hz) +AgcManagerDirect::AgcManagerDirect( + Agc* agc, + int startup_min_level, + int clipped_level_min, + int sample_rate_hz, + int clipped_level_step, + float clipped_ratio_threshold, + int clipped_wait_frames, + const ClippingPredictorConfig& clipping_config) : AgcManagerDirect(/*num_capture_channels*/ 1, startup_min_level, clipped_level_min, /*disable_digital_adaptive*/ false, - sample_rate_hz) { + sample_rate_hz, + clipped_level_step, + clipped_ratio_threshold, + clipped_wait_frames, + clipping_config) { RTC_DCHECK(channel_agcs_[0]); RTC_DCHECK(agc); channel_agcs_[0]->set_agc(agc); } -AgcManagerDirect::AgcManagerDirect(int num_capture_channels, - int startup_min_level, - int clipped_level_min, - bool disable_digital_adaptive, - int sample_rate_hz) +AgcManagerDirect::AgcManagerDirect( + int num_capture_channels, + int startup_min_level, + int clipped_level_min, + bool disable_digital_adaptive, + int sample_rate_hz, + int clipped_level_step, + float clipped_ratio_threshold, + int clipped_wait_frames, + const ClippingPredictorConfig& clipping_config) : data_dumper_( new ApmDataDumper(rtc::AtomicOps::Increment(&instance_counter_))), use_min_channel_level_(!UseMaxAnalogChannelLevel()), sample_rate_hz_(sample_rate_hz), num_capture_channels_(num_capture_channels), disable_digital_adaptive_(disable_digital_adaptive), - frames_since_clipped_(kClippedWaitFrames), + frames_since_clipped_(clipped_wait_frames), capture_output_used_(true), + clipped_level_step_(clipped_level_step), + clipped_ratio_threshold_(clipped_ratio_threshold), + clipped_wait_frames_(clipped_wait_frames), channel_agcs_(num_capture_channels), - new_compressions_to_set_(num_capture_channels) { + new_compressions_to_set_(num_capture_channels), + clipping_predictor_( + CreateClippingPredictor(num_capture_channels, clipping_config)), + use_clipping_predictor_step_(!!clipping_predictor_ && + clipping_config.use_predicted_step), + clipping_predictor_evaluator_(kClippingPredictorEvaluatorHistorySize), + clipping_predictor_log_counter_(0) { const int min_mic_level = GetMinMicLevel(); for (size_t ch = 0; ch < channel_agcs_.size(); ++ch) { ApmDataDumper* data_dumper_ch = ch == 0 ? data_dumper_.get() : nullptr; @@ -438,7 +489,12 @@ AgcManagerDirect::AgcManagerDirect(int num_capture_channels, data_dumper_ch, startup_min_level, clipped_level_min, disable_digital_adaptive_, min_mic_level); } - RTC_DCHECK_LT(0, channel_agcs_.size()); + RTC_DCHECK(!channel_agcs_.empty()); + RTC_DCHECK_GT(clipped_level_step, 0); + RTC_DCHECK_LE(clipped_level_step, 255); + RTC_DCHECK_GT(clipped_ratio_threshold, 0.f); + RTC_DCHECK_LT(clipped_ratio_threshold, 1.f); + RTC_DCHECK_GT(clipped_wait_frames, 0); channel_agcs_[0]->ActivateLogging(); } @@ -453,6 +509,8 @@ void AgcManagerDirect::Initialize() { capture_output_used_ = true; AggregateChannelLevels(); + clipping_predictor_evaluator_.Reset(); + clipping_predictor_log_counter_ = 0; } void AgcManagerDirect::SetupDigitalGainControl( @@ -489,7 +547,13 @@ void AgcManagerDirect::AnalyzePreProcess(const float* const* audio, return; } - if (frames_since_clipped_ < kClippedWaitFrames) { + if (!!clipping_predictor_) { + AudioFrameView<const float> frame = AudioFrameView<const float>( + audio, num_capture_channels_, static_cast<int>(samples_per_channel)); + clipping_predictor_->Analyze(frame); + } + + if (frames_since_clipped_ < clipped_wait_frames_) { ++frames_since_clipped_; return; } @@ -505,14 +569,54 @@ void AgcManagerDirect::AnalyzePreProcess(const float* const* audio, // gain is increased, through SetMaxLevel(). float clipped_ratio = ComputeClippedRatio(audio, num_capture_channels_, samples_per_channel); - - if (clipped_ratio > kClippedRatioThreshold) { - RTC_DLOG(LS_INFO) << "[agc] Clipping detected. clipped_ratio=" - << clipped_ratio; + const bool clipping_detected = clipped_ratio > clipped_ratio_threshold_; + bool clipping_predicted = false; + int predicted_step = 0; + if (!!clipping_predictor_) { + for (int channel = 0; channel < num_capture_channels_; ++channel) { + const auto step = clipping_predictor_->EstimateClippedLevelStep( + channel, stream_analog_level_, clipped_level_step_, + channel_agcs_[channel]->min_mic_level(), kMaxMicLevel); + if (use_clipping_predictor_step_ && step.has_value()) { + predicted_step = std::max(predicted_step, step.value()); + clipping_predicted = true; + } + } + // Clipping prediction evaluation. + absl::optional<int> prediction_interval = + clipping_predictor_evaluator_.Observe(clipping_detected, + clipping_predicted); + if (prediction_interval.has_value()) { + RTC_HISTOGRAM_COUNTS_LINEAR( + "WebRTC.Audio.Agc.ClippingPredictor.PredictionInterval", + prediction_interval.value(), /*min=*/0, + /*max=*/49, /*bucket_count=*/50); + } + constexpr int kNumFramesIn30Seconds = 3000; + clipping_predictor_log_counter_++; + if (clipping_predictor_log_counter_ == kNumFramesIn30Seconds) { + LogClippingPredictorMetrics(clipping_predictor_evaluator_); + clipping_predictor_log_counter_ = 0; + } + } + if (clipping_detected || clipping_predicted) { + int step = clipped_level_step_; + if (clipping_detected) { + RTC_DLOG(LS_INFO) << "[agc] Clipping detected. clipped_ratio=" + << clipped_ratio; + } + if (clipping_predicted) { + step = std::max(predicted_step, clipped_level_step_); + RTC_DLOG(LS_INFO) << "[agc] Clipping predicted. step=" << step; + } for (auto& state_ch : channel_agcs_) { - state_ch->HandleClipping(); + state_ch->HandleClipping(step); } frames_since_clipped_ = 0; + if (!!clipping_predictor_) { + clipping_predictor_->Reset(); + clipping_predictor_evaluator_.Reset(); + } } AggregateChannelLevels(); } |