diff options
author | Bjorn Volcker <bjornv@webrtc.org> | 2015-06-25 08:46:02 +0200 |
---|---|---|
committer | Bjorn Volcker <bjornv@webrtc.org> | 2015-06-25 06:46:14 +0000 |
commit | 51c7cbb86ae13bc0c005fc8d14973f3e58d6ff4c (patch) | |
tree | 0e67f73eb7415e656aa62fae92b885b88246f336 /webrtc/modules/audio_processing/vad/vad_audio_proc.cc | |
parent | 518c683f3e413523a458a94b533274bd7f29992d (diff) | |
download | webrtc-51c7cbb86ae13bc0c005fc8d14973f3e58d6ff4c.tar.gz |
Revert "Pull the Voice Activity Detector out from the AGC"
This reverts commit 518c683f3e413523a458a94b533274bd7f29992d.
Breaks Linux-Asan bot
https://uberchromegw.corp.google.com/i/client.webrtc/builders/Linux%20Asan/builds/4348/steps/libjingle_peerconnection_unittest/logs/stdio
BUG=
TBR=aluebs@webrtc.org
Review URL: https://codereview.webrtc.org/1208793002.
Cr-Commit-Position: refs/heads/master@{#9503}
Diffstat (limited to 'webrtc/modules/audio_processing/vad/vad_audio_proc.cc')
-rw-r--r-- | webrtc/modules/audio_processing/vad/vad_audio_proc.cc | 274 |
1 files changed, 0 insertions, 274 deletions
diff --git a/webrtc/modules/audio_processing/vad/vad_audio_proc.cc b/webrtc/modules/audio_processing/vad/vad_audio_proc.cc deleted file mode 100644 index e8f27f802d..0000000000 --- a/webrtc/modules/audio_processing/vad/vad_audio_proc.cc +++ /dev/null @@ -1,274 +0,0 @@ -/* - * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "webrtc/modules/audio_processing/vad/vad_audio_proc.h" - -#include <math.h> -#include <stdio.h> - -#include "webrtc/common_audio/fft4g.h" -#include "webrtc/modules/audio_processing/vad/vad_audio_proc_internal.h" -#include "webrtc/modules/audio_processing/vad/pitch_internal.h" -#include "webrtc/modules/audio_processing/vad/pole_zero_filter.h" -extern "C" { -#include "webrtc/modules/audio_coding/codecs/isac/main/source/codec.h" -#include "webrtc/modules/audio_coding/codecs/isac/main/source/lpc_analysis.h" -#include "webrtc/modules/audio_coding/codecs/isac/main/source/pitch_estimator.h" -#include "webrtc/modules/audio_coding/codecs/isac/main/source/structs.h" -} -#include "webrtc/modules/interface/module_common_types.h" - -namespace webrtc { - -// The following structures are declared anonymous in iSAC's structs.h. To -// forward declare them, we use this derived class trick. -struct VadAudioProc::PitchAnalysisStruct : public ::PitchAnalysisStruct {}; -struct VadAudioProc::PreFiltBankstr : public ::PreFiltBankstr {}; - -static const float kFrequencyResolution = - kSampleRateHz / static_cast<float>(VadAudioProc::kDftSize); -static const int kSilenceRms = 5; - -// TODO(turajs): Make a Create or Init for VadAudioProc. -VadAudioProc::VadAudioProc() - : audio_buffer_(), - num_buffer_samples_(kNumPastSignalSamples), - log_old_gain_(-2), - old_lag_(50), // Arbitrary but valid as pitch-lag (in samples). - pitch_analysis_handle_(new PitchAnalysisStruct), - pre_filter_handle_(new PreFiltBankstr), - high_pass_filter_(PoleZeroFilter::Create(kCoeffNumerator, - kFilterOrder, - kCoeffDenominator, - kFilterOrder)) { - static_assert(kNumPastSignalSamples + kNumSubframeSamples == - sizeof(kLpcAnalWin) / sizeof(kLpcAnalWin[0]), - "lpc analysis window incorrect size"); - static_assert(kLpcOrder + 1 == sizeof(kCorrWeight) / sizeof(kCorrWeight[0]), - "correlation weight incorrect size"); - - // TODO(turajs): Are we doing too much in the constructor? - float data[kDftSize]; - // Make FFT to initialize. - ip_[0] = 0; - WebRtc_rdft(kDftSize, 1, data, ip_, w_fft_); - // TODO(turajs): Need to initialize high-pass filter. - - // Initialize iSAC components. - WebRtcIsac_InitPreFilterbank(pre_filter_handle_.get()); - WebRtcIsac_InitPitchAnalysis(pitch_analysis_handle_.get()); -} - -VadAudioProc::~VadAudioProc() { -} - -void VadAudioProc::ResetBuffer() { - memcpy(audio_buffer_, &audio_buffer_[kNumSamplesToProcess], - sizeof(audio_buffer_[0]) * kNumPastSignalSamples); - num_buffer_samples_ = kNumPastSignalSamples; -} - -int VadAudioProc::ExtractFeatures(const int16_t* frame, - int length, - AudioFeatures* features) { - features->num_frames = 0; - if (length != kNumSubframeSamples) { - return -1; - } - - // High-pass filter to remove the DC component and very low frequency content. - // We have experienced that this high-pass filtering improves voice/non-voiced - // classification. - if (high_pass_filter_->Filter(frame, kNumSubframeSamples, - &audio_buffer_[num_buffer_samples_]) != 0) { - return -1; - } - - num_buffer_samples_ += kNumSubframeSamples; - if (num_buffer_samples_ < kBufferLength) { - return 0; - } - assert(num_buffer_samples_ == kBufferLength); - features->num_frames = kNum10msSubframes; - features->silence = false; - - Rms(features->rms, kMaxNumFrames); - for (int i = 0; i < kNum10msSubframes; ++i) { - if (features->rms[i] < kSilenceRms) { - // PitchAnalysis can cause NaNs in the pitch gain if it's fed silence. - // Bail out here instead. - features->silence = true; - ResetBuffer(); - return 0; - } - } - - PitchAnalysis(features->log_pitch_gain, features->pitch_lag_hz, - kMaxNumFrames); - FindFirstSpectralPeaks(features->spectral_peak, kMaxNumFrames); - ResetBuffer(); - return 0; -} - -// Computes |kLpcOrder + 1| correlation coefficients. -void VadAudioProc::SubframeCorrelation(double* corr, - int length_corr, - int subframe_index) { - assert(length_corr >= kLpcOrder + 1); - double windowed_audio[kNumSubframeSamples + kNumPastSignalSamples]; - int buffer_index = subframe_index * kNumSubframeSamples; - - for (int n = 0; n < kNumSubframeSamples + kNumPastSignalSamples; n++) - windowed_audio[n] = audio_buffer_[buffer_index++] * kLpcAnalWin[n]; - - WebRtcIsac_AutoCorr(corr, windowed_audio, - kNumSubframeSamples + kNumPastSignalSamples, kLpcOrder); -} - -// Compute |kNum10msSubframes| sets of LPC coefficients, one per 10 ms input. -// The analysis window is 15 ms long and it is centered on the first half of -// each 10ms sub-frame. This is equivalent to computing LPC coefficients for the -// first half of each 10 ms subframe. -void VadAudioProc::GetLpcPolynomials(double* lpc, int length_lpc) { - assert(length_lpc >= kNum10msSubframes * (kLpcOrder + 1)); - double corr[kLpcOrder + 1]; - double reflec_coeff[kLpcOrder]; - for (int i = 0, offset_lpc = 0; i < kNum10msSubframes; - i++, offset_lpc += kLpcOrder + 1) { - SubframeCorrelation(corr, kLpcOrder + 1, i); - corr[0] *= 1.0001; - // This makes Lev-Durb a bit more stable. - for (int k = 0; k < kLpcOrder + 1; k++) { - corr[k] *= kCorrWeight[k]; - } - WebRtcIsac_LevDurb(&lpc[offset_lpc], reflec_coeff, corr, kLpcOrder); - } -} - -// Fit a second order curve to these 3 points and find the location of the -// extremum. The points are inverted before curve fitting. -static float QuadraticInterpolation(float prev_val, - float curr_val, - float next_val) { - // Doing the interpolation in |1 / A(z)|^2. - float fractional_index = 0; - next_val = 1.0f / next_val; - prev_val = 1.0f / prev_val; - curr_val = 1.0f / curr_val; - - fractional_index = - -(next_val - prev_val) * 0.5f / (next_val + prev_val - 2.f * curr_val); - assert(fabs(fractional_index) < 1); - return fractional_index; -} - -// 1 / A(z), where A(z) is defined by |lpc| is a model of the spectral envelope -// of the input signal. The local maximum of the spectral envelope corresponds -// with the local minimum of A(z). It saves complexity, as we save one -// inversion. Furthermore, we find the first local maximum of magnitude squared, -// to save on one square root. -void VadAudioProc::FindFirstSpectralPeaks(double* f_peak, int length_f_peak) { - assert(length_f_peak >= kNum10msSubframes); - double lpc[kNum10msSubframes * (kLpcOrder + 1)]; - // For all sub-frames. - GetLpcPolynomials(lpc, kNum10msSubframes * (kLpcOrder + 1)); - - const int kNumDftCoefficients = kDftSize / 2 + 1; - float data[kDftSize]; - - for (int i = 0; i < kNum10msSubframes; i++) { - // Convert to float with zero pad. - memset(data, 0, sizeof(data)); - for (int n = 0; n < kLpcOrder + 1; n++) { - data[n] = static_cast<float>(lpc[i * (kLpcOrder + 1) + n]); - } - // Transform to frequency domain. - WebRtc_rdft(kDftSize, 1, data, ip_, w_fft_); - - int index_peak = 0; - float prev_magn_sqr = data[0] * data[0]; - float curr_magn_sqr = data[2] * data[2] + data[3] * data[3]; - float next_magn_sqr; - bool found_peak = false; - for (int n = 2; n < kNumDftCoefficients - 1; n++) { - next_magn_sqr = - data[2 * n] * data[2 * n] + data[2 * n + 1] * data[2 * n + 1]; - if (curr_magn_sqr < prev_magn_sqr && curr_magn_sqr < next_magn_sqr) { - found_peak = true; - index_peak = n - 1; - break; - } - prev_magn_sqr = curr_magn_sqr; - curr_magn_sqr = next_magn_sqr; - } - float fractional_index = 0; - if (!found_peak) { - // Checking if |kNumDftCoefficients - 1| is the local minimum. - next_magn_sqr = data[1] * data[1]; - if (curr_magn_sqr < prev_magn_sqr && curr_magn_sqr < next_magn_sqr) { - index_peak = kNumDftCoefficients - 1; - } - } else { - // A peak is found, do a simple quadratic interpolation to get a more - // accurate estimate of the peak location. - fractional_index = - QuadraticInterpolation(prev_magn_sqr, curr_magn_sqr, next_magn_sqr); - } - f_peak[i] = (index_peak + fractional_index) * kFrequencyResolution; - } -} - -// Using iSAC functions to estimate pitch gains & lags. -void VadAudioProc::PitchAnalysis(double* log_pitch_gains, - double* pitch_lags_hz, - int length) { - // TODO(turajs): This can be "imported" from iSAC & and the next two - // constants. - assert(length >= kNum10msSubframes); - const int kNumPitchSubframes = 4; - double gains[kNumPitchSubframes]; - double lags[kNumPitchSubframes]; - - const int kNumSubbandFrameSamples = 240; - const int kNumLookaheadSamples = 24; - - float lower[kNumSubbandFrameSamples]; - float upper[kNumSubbandFrameSamples]; - double lower_lookahead[kNumSubbandFrameSamples]; - double upper_lookahead[kNumSubbandFrameSamples]; - double lower_lookahead_pre_filter[kNumSubbandFrameSamples + - kNumLookaheadSamples]; - - // Split signal to lower and upper bands - WebRtcIsac_SplitAndFilterFloat(&audio_buffer_[kNumPastSignalSamples], lower, - upper, lower_lookahead, upper_lookahead, - pre_filter_handle_.get()); - WebRtcIsac_PitchAnalysis(lower_lookahead, lower_lookahead_pre_filter, - pitch_analysis_handle_.get(), lags, gains); - - // Lags are computed on lower-band signal with sampling rate half of the - // input signal. - GetSubframesPitchParameters( - kSampleRateHz / 2, gains, lags, kNumPitchSubframes, kNum10msSubframes, - &log_old_gain_, &old_lag_, log_pitch_gains, pitch_lags_hz); -} - -void VadAudioProc::Rms(double* rms, int length_rms) { - assert(length_rms >= kNum10msSubframes); - int offset = kNumPastSignalSamples; - for (int i = 0; i < kNum10msSubframes; i++) { - rms[i] = 0; - for (int n = 0; n < kNumSubframeSamples; n++, offset++) - rms[i] += audio_buffer_[offset] * audio_buffer_[offset]; - rms[i] = sqrt(rms[i] / kNumSubframeSamples); - } -} - -} // namespace webrtc |