aboutsummaryrefslogtreecommitdiff
path: root/webrtc/modules/audio_processing/agc/pitch_based_vad.cc
diff options
context:
space:
mode:
Diffstat (limited to 'webrtc/modules/audio_processing/agc/pitch_based_vad.cc')
-rw-r--r--webrtc/modules/audio_processing/agc/pitch_based_vad.cc122
1 files changed, 122 insertions, 0 deletions
diff --git a/webrtc/modules/audio_processing/agc/pitch_based_vad.cc b/webrtc/modules/audio_processing/agc/pitch_based_vad.cc
new file mode 100644
index 0000000000..0cfa52a010
--- /dev/null
+++ b/webrtc/modules/audio_processing/agc/pitch_based_vad.cc
@@ -0,0 +1,122 @@
+/*
+ * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/modules/audio_processing/agc/pitch_based_vad.h"
+
+#include <assert.h>
+#include <math.h>
+#include <string.h>
+
+#include "webrtc/modules/audio_processing/agc/circular_buffer.h"
+#include "webrtc/modules/audio_processing/agc/common.h"
+#include "webrtc/modules/audio_processing/agc/noise_gmm_tables.h"
+#include "webrtc/modules/audio_processing/agc/voice_gmm_tables.h"
+#include "webrtc/modules/interface/module_common_types.h"
+
+namespace webrtc {
+
+static_assert(kNoiseGmmDim == kVoiceGmmDim,
+ "noise and voice gmm dimension not equal");
+
+// These values should match MATLAB counterparts for unit-tests to pass.
+static const int kPosteriorHistorySize = 500; // 5 sec of 10 ms frames.
+static const double kInitialPriorProbability = 0.3;
+static const int kTransientWidthThreshold = 7;
+static const double kLowProbabilityThreshold = 0.2;
+
+static double LimitProbability(double p) {
+ const double kLimHigh = 0.99;
+ const double kLimLow = 0.01;
+
+ if (p > kLimHigh)
+ p = kLimHigh;
+ else if (p < kLimLow)
+ p = kLimLow;
+ return p;
+}
+
+PitchBasedVad::PitchBasedVad()
+ : p_prior_(kInitialPriorProbability),
+ circular_buffer_(AgcCircularBuffer::Create(kPosteriorHistorySize)) {
+ // Setup noise GMM.
+ noise_gmm_.dimension = kNoiseGmmDim;
+ noise_gmm_.num_mixtures = kNoiseGmmNumMixtures;
+ noise_gmm_.weight = kNoiseGmmWeights;
+ noise_gmm_.mean = &kNoiseGmmMean[0][0];
+ noise_gmm_.covar_inverse = &kNoiseGmmCovarInverse[0][0][0];
+
+ // Setup voice GMM.
+ voice_gmm_.dimension = kVoiceGmmDim;
+ voice_gmm_.num_mixtures = kVoiceGmmNumMixtures;
+ voice_gmm_.weight = kVoiceGmmWeights;
+ voice_gmm_.mean = &kVoiceGmmMean[0][0];
+ voice_gmm_.covar_inverse = &kVoiceGmmCovarInverse[0][0][0];
+}
+
+PitchBasedVad::~PitchBasedVad() {}
+
+int PitchBasedVad::VoicingProbability(const AudioFeatures& features,
+ double* p_combined) {
+ double p;
+ double gmm_features[3];
+ double pdf_features_given_voice;
+ double pdf_features_given_noise;
+ // These limits are the same in matlab implementation 'VoicingProbGMM().'
+ const double kLimLowLogPitchGain = -2.0;
+ const double kLimHighLogPitchGain = -0.9;
+ const double kLimLowSpectralPeak = 200;
+ const double kLimHighSpectralPeak = 2000;
+ const double kEps = 1e-12;
+ for (int n = 0; n < features.num_frames; n++) {
+ gmm_features[0] = features.log_pitch_gain[n];
+ gmm_features[1] = features.spectral_peak[n];
+ gmm_features[2] = features.pitch_lag_hz[n];
+
+ pdf_features_given_voice = EvaluateGmm(gmm_features, voice_gmm_);
+ pdf_features_given_noise = EvaluateGmm(gmm_features, noise_gmm_);
+
+ if (features.spectral_peak[n] < kLimLowSpectralPeak ||
+ features.spectral_peak[n] > kLimHighSpectralPeak ||
+ features.log_pitch_gain[n] < kLimLowLogPitchGain) {
+ pdf_features_given_voice = kEps * pdf_features_given_noise;
+ } else if (features.log_pitch_gain[n] > kLimHighLogPitchGain) {
+ pdf_features_given_noise = kEps * pdf_features_given_voice;
+ }
+
+ p = p_prior_ * pdf_features_given_voice / (pdf_features_given_voice *
+ p_prior_ + pdf_features_given_noise * (1 - p_prior_));
+
+ p = LimitProbability(p);
+
+ // Combine pitch-based probability with standalone probability, before
+ // updating prior probabilities.
+ double prod_active = p * p_combined[n];
+ double prod_inactive = (1 - p) * (1 - p_combined[n]);
+ p_combined[n] = prod_active / (prod_active + prod_inactive);
+
+ if (UpdatePrior(p_combined[n]) < 0)
+ return -1;
+ // Limit prior probability. With a zero prior probability the posterior
+ // probability is always zero.
+ p_prior_ = LimitProbability(p_prior_);
+ }
+ return 0;
+}
+
+int PitchBasedVad::UpdatePrior(double p) {
+ circular_buffer_->Insert(p);
+ if (circular_buffer_->RemoveTransient(kTransientWidthThreshold,
+ kLowProbabilityThreshold) < 0)
+ return -1;
+ p_prior_ = circular_buffer_->Mean();
+ return 0;
+}
+
+} // namespace webrtc