aboutsummaryrefslogtreecommitdiff
path: root/modules/audio_processing/agc/agc_manager_direct.cc
diff options
context:
space:
mode:
Diffstat (limited to 'modules/audio_processing/agc/agc_manager_direct.cc')
-rw-r--r--modules/audio_processing/agc/agc_manager_direct.cc182
1 files changed, 143 insertions, 39 deletions
diff --git a/modules/audio_processing/agc/agc_manager_direct.cc b/modules/audio_processing/agc/agc_manager_direct.cc
index 2454d1bbb1..817678801e 100644
--- a/modules/audio_processing/agc/agc_manager_direct.cc
+++ b/modules/audio_processing/agc/agc_manager_direct.cc
@@ -16,6 +16,7 @@
#include "common_audio/include/audio_util.h"
#include "modules/audio_processing/agc/gain_control.h"
#include "modules/audio_processing/agc/gain_map_internal.h"
+#include "modules/audio_processing/include/audio_frame_view.h"
#include "rtc_base/atomic_ops.h"
#include "rtc_base/checks.h"
#include "rtc_base/logging.h"
@@ -27,33 +28,33 @@ namespace webrtc {
namespace {
-// Amount the microphone level is lowered with every clipping event.
-const int kClippedLevelStep = 15;
-// Proportion of clipped samples required to declare a clipping event.
-const float kClippedRatioThreshold = 0.1f;
-// Time in frames to wait after a clipping event before checking again.
-const int kClippedWaitFrames = 300;
-
// Amount of error we tolerate in the microphone level (presumably due to OS
// quantization) before we assume the user has manually adjusted the microphone.
-const int kLevelQuantizationSlack = 25;
+constexpr int kLevelQuantizationSlack = 25;
-const int kDefaultCompressionGain = 7;
-const int kMaxCompressionGain = 12;
-const int kMinCompressionGain = 2;
+constexpr int kDefaultCompressionGain = 7;
+constexpr int kMaxCompressionGain = 12;
+constexpr int kMinCompressionGain = 2;
// Controls the rate of compression changes towards the target.
-const float kCompressionGainStep = 0.05f;
+constexpr float kCompressionGainStep = 0.05f;
-const int kMaxMicLevel = 255;
+constexpr int kMaxMicLevel = 255;
static_assert(kGainMapSize > kMaxMicLevel, "gain map too small");
-const int kMinMicLevel = 12;
+constexpr int kMinMicLevel = 12;
// Prevent very large microphone level changes.
-const int kMaxResidualGainChange = 15;
+constexpr int kMaxResidualGainChange = 15;
// Maximum additional gain allowed to compensate for microphone level
// restrictions from clipping events.
-const int kSurplusCompressionGain = 6;
+constexpr int kSurplusCompressionGain = 6;
+
+// History size for the clipping predictor evaluator (unit: number of 10 ms
+// frames).
+constexpr int kClippingPredictorEvaluatorHistorySize = 32;
+
+using ClippingPredictorConfig = AudioProcessing::Config::GainController1::
+ AnalogGainController::ClippingPredictor;
// Returns whether a fall-back solution to choose the maximum level should be
// chosen.
@@ -132,6 +133,33 @@ float ComputeClippedRatio(const float* const* audio,
return static_cast<float>(num_clipped) / (samples_per_channel);
}
+void LogClippingPredictorMetrics(const ClippingPredictorEvaluator& evaluator) {
+ RTC_LOG(LS_INFO) << "Clipping predictor metrics: TP "
+ << evaluator.true_positives() << " TN "
+ << evaluator.true_negatives() << " FP "
+ << evaluator.false_positives() << " FN "
+ << evaluator.false_negatives();
+ const float precision_denominator =
+ evaluator.true_positives() + evaluator.false_positives();
+ const float recall_denominator =
+ evaluator.true_positives() + evaluator.false_negatives();
+ if (precision_denominator > 0 && recall_denominator > 0) {
+ const float precision = evaluator.true_positives() / precision_denominator;
+ const float recall = evaluator.true_positives() / recall_denominator;
+ RTC_LOG(LS_INFO) << "Clipping predictor metrics: P " << precision << " R "
+ << recall;
+ const float f1_score_denominator = precision + recall;
+ if (f1_score_denominator > 0.0f) {
+ const float f1_score = 2 * precision * recall / f1_score_denominator;
+ RTC_LOG(LS_INFO) << "Clipping predictor metrics: F1 " << f1_score;
+ RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.Agc.ClippingPredictor.F1Score",
+ std::round(f1_score * 100.0f), /*min=*/0,
+ /*max=*/100,
+ /*bucket_count=*/50);
+ }
+ }
+}
+
} // namespace
MonoAgc::MonoAgc(ApmDataDumper* data_dumper,
@@ -182,19 +210,19 @@ void MonoAgc::Process(const int16_t* audio,
}
}
-void MonoAgc::HandleClipping() {
+void MonoAgc::HandleClipping(int clipped_level_step) {
// Always decrease the maximum level, even if the current level is below
// threshold.
- SetMaxLevel(std::max(clipped_level_min_, max_level_ - kClippedLevelStep));
+ SetMaxLevel(std::max(clipped_level_min_, max_level_ - clipped_level_step));
if (log_to_histograms_) {
RTC_HISTOGRAM_BOOLEAN("WebRTC.Audio.AgcClippingAdjustmentAllowed",
- level_ - kClippedLevelStep >= clipped_level_min_);
+ level_ - clipped_level_step >= clipped_level_min_);
}
if (level_ > clipped_level_min_) {
// Don't try to adjust the level if we're already below the limit. As
// a consequence, if the user has brought the level above the limit, we
// will still not react until the postproc updates the level.
- SetLevel(std::max(clipped_level_min_, level_ - kClippedLevelStep));
+ SetLevel(std::max(clipped_level_min_, level_ - clipped_level_step));
// Reset the AGCs for all channels since the level has changed.
agc_->Reset();
}
@@ -401,35 +429,58 @@ void MonoAgc::UpdateCompressor() {
int AgcManagerDirect::instance_counter_ = 0;
-AgcManagerDirect::AgcManagerDirect(Agc* agc,
- int startup_min_level,
- int clipped_level_min,
- int sample_rate_hz)
+AgcManagerDirect::AgcManagerDirect(
+ Agc* agc,
+ int startup_min_level,
+ int clipped_level_min,
+ int sample_rate_hz,
+ int clipped_level_step,
+ float clipped_ratio_threshold,
+ int clipped_wait_frames,
+ const ClippingPredictorConfig& clipping_config)
: AgcManagerDirect(/*num_capture_channels*/ 1,
startup_min_level,
clipped_level_min,
/*disable_digital_adaptive*/ false,
- sample_rate_hz) {
+ sample_rate_hz,
+ clipped_level_step,
+ clipped_ratio_threshold,
+ clipped_wait_frames,
+ clipping_config) {
RTC_DCHECK(channel_agcs_[0]);
RTC_DCHECK(agc);
channel_agcs_[0]->set_agc(agc);
}
-AgcManagerDirect::AgcManagerDirect(int num_capture_channels,
- int startup_min_level,
- int clipped_level_min,
- bool disable_digital_adaptive,
- int sample_rate_hz)
+AgcManagerDirect::AgcManagerDirect(
+ int num_capture_channels,
+ int startup_min_level,
+ int clipped_level_min,
+ bool disable_digital_adaptive,
+ int sample_rate_hz,
+ int clipped_level_step,
+ float clipped_ratio_threshold,
+ int clipped_wait_frames,
+ const ClippingPredictorConfig& clipping_config)
: data_dumper_(
new ApmDataDumper(rtc::AtomicOps::Increment(&instance_counter_))),
use_min_channel_level_(!UseMaxAnalogChannelLevel()),
sample_rate_hz_(sample_rate_hz),
num_capture_channels_(num_capture_channels),
disable_digital_adaptive_(disable_digital_adaptive),
- frames_since_clipped_(kClippedWaitFrames),
+ frames_since_clipped_(clipped_wait_frames),
capture_output_used_(true),
+ clipped_level_step_(clipped_level_step),
+ clipped_ratio_threshold_(clipped_ratio_threshold),
+ clipped_wait_frames_(clipped_wait_frames),
channel_agcs_(num_capture_channels),
- new_compressions_to_set_(num_capture_channels) {
+ new_compressions_to_set_(num_capture_channels),
+ clipping_predictor_(
+ CreateClippingPredictor(num_capture_channels, clipping_config)),
+ use_clipping_predictor_step_(!!clipping_predictor_ &&
+ clipping_config.use_predicted_step),
+ clipping_predictor_evaluator_(kClippingPredictorEvaluatorHistorySize),
+ clipping_predictor_log_counter_(0) {
const int min_mic_level = GetMinMicLevel();
for (size_t ch = 0; ch < channel_agcs_.size(); ++ch) {
ApmDataDumper* data_dumper_ch = ch == 0 ? data_dumper_.get() : nullptr;
@@ -438,7 +489,12 @@ AgcManagerDirect::AgcManagerDirect(int num_capture_channels,
data_dumper_ch, startup_min_level, clipped_level_min,
disable_digital_adaptive_, min_mic_level);
}
- RTC_DCHECK_LT(0, channel_agcs_.size());
+ RTC_DCHECK(!channel_agcs_.empty());
+ RTC_DCHECK_GT(clipped_level_step, 0);
+ RTC_DCHECK_LE(clipped_level_step, 255);
+ RTC_DCHECK_GT(clipped_ratio_threshold, 0.f);
+ RTC_DCHECK_LT(clipped_ratio_threshold, 1.f);
+ RTC_DCHECK_GT(clipped_wait_frames, 0);
channel_agcs_[0]->ActivateLogging();
}
@@ -453,6 +509,8 @@ void AgcManagerDirect::Initialize() {
capture_output_used_ = true;
AggregateChannelLevels();
+ clipping_predictor_evaluator_.Reset();
+ clipping_predictor_log_counter_ = 0;
}
void AgcManagerDirect::SetupDigitalGainControl(
@@ -489,7 +547,13 @@ void AgcManagerDirect::AnalyzePreProcess(const float* const* audio,
return;
}
- if (frames_since_clipped_ < kClippedWaitFrames) {
+ if (!!clipping_predictor_) {
+ AudioFrameView<const float> frame = AudioFrameView<const float>(
+ audio, num_capture_channels_, static_cast<int>(samples_per_channel));
+ clipping_predictor_->Analyze(frame);
+ }
+
+ if (frames_since_clipped_ < clipped_wait_frames_) {
++frames_since_clipped_;
return;
}
@@ -505,14 +569,54 @@ void AgcManagerDirect::AnalyzePreProcess(const float* const* audio,
// gain is increased, through SetMaxLevel().
float clipped_ratio =
ComputeClippedRatio(audio, num_capture_channels_, samples_per_channel);
-
- if (clipped_ratio > kClippedRatioThreshold) {
- RTC_DLOG(LS_INFO) << "[agc] Clipping detected. clipped_ratio="
- << clipped_ratio;
+ const bool clipping_detected = clipped_ratio > clipped_ratio_threshold_;
+ bool clipping_predicted = false;
+ int predicted_step = 0;
+ if (!!clipping_predictor_) {
+ for (int channel = 0; channel < num_capture_channels_; ++channel) {
+ const auto step = clipping_predictor_->EstimateClippedLevelStep(
+ channel, stream_analog_level_, clipped_level_step_,
+ channel_agcs_[channel]->min_mic_level(), kMaxMicLevel);
+ if (use_clipping_predictor_step_ && step.has_value()) {
+ predicted_step = std::max(predicted_step, step.value());
+ clipping_predicted = true;
+ }
+ }
+ // Clipping prediction evaluation.
+ absl::optional<int> prediction_interval =
+ clipping_predictor_evaluator_.Observe(clipping_detected,
+ clipping_predicted);
+ if (prediction_interval.has_value()) {
+ RTC_HISTOGRAM_COUNTS_LINEAR(
+ "WebRTC.Audio.Agc.ClippingPredictor.PredictionInterval",
+ prediction_interval.value(), /*min=*/0,
+ /*max=*/49, /*bucket_count=*/50);
+ }
+ constexpr int kNumFramesIn30Seconds = 3000;
+ clipping_predictor_log_counter_++;
+ if (clipping_predictor_log_counter_ == kNumFramesIn30Seconds) {
+ LogClippingPredictorMetrics(clipping_predictor_evaluator_);
+ clipping_predictor_log_counter_ = 0;
+ }
+ }
+ if (clipping_detected || clipping_predicted) {
+ int step = clipped_level_step_;
+ if (clipping_detected) {
+ RTC_DLOG(LS_INFO) << "[agc] Clipping detected. clipped_ratio="
+ << clipped_ratio;
+ }
+ if (clipping_predicted) {
+ step = std::max(predicted_step, clipped_level_step_);
+ RTC_DLOG(LS_INFO) << "[agc] Clipping predicted. step=" << step;
+ }
for (auto& state_ch : channel_agcs_) {
- state_ch->HandleClipping();
+ state_ch->HandleClipping(step);
}
frames_since_clipped_ = 0;
+ if (!!clipping_predictor_) {
+ clipping_predictor_->Reset();
+ clipping_predictor_evaluator_.Reset();
+ }
}
AggregateChannelLevels();
}