Revert "Pull the Voice Activity Detector out from the AGC"

This reverts commit 518c683f3e413523a458a94b533274bd7f29992d. Breaks Linux-Asan bot https://uberchromegw.corp.google.com/i/client.webrtc/builders/Linux%20Asan/builds/4348/steps/libjingle_peerconnection_unittest/logs/stdio BUG= TBR=aluebs@webrtc.org Review URL: https://codereview.webrtc.org/1208793002. Cr-Commit-Position: refs/heads/master@{#9503}
author: Bjorn Volcker <bjornv@webrtc.org> 2015-06-25 08:46:02 +0200
committer: Bjorn Volcker <bjornv@webrtc.org> 2015-06-25 06:46:14 +0000
commit: 51c7cbb86ae13bc0c005fc8d14973f3e58d6ff4c (patch)
tree: 0e67f73eb7415e656aa62fae92b885b88246f336 /webrtc/modules/audio_processing/vad/vad_audio_proc.cc
parent: 518c683f3e413523a458a94b533274bd7f29992d (diff)
download: webrtc-51c7cbb86ae13bc0c005fc8d14973f3e58d6ff4c.tar.gz
1 files changed, 0 insertions, 274 deletions
diff --git a/webrtc/modules/audio_processing/vad/vad_audio_proc.cc b/webrtc/modules/audio_processing/vad/vad_audio_proc.cc
deleted file mode 100644
index e8f27f802d..0000000000
--- a/webrtc/modules/audio_processing/vad/vad_audio_proc.cc
+++ /dev/null
@@ -1,274 +0,0 @@
-/*
- *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "webrtc/modules/audio_processing/vad/vad_audio_proc.h"
-
-#include <math.h>
-#include <stdio.h>
-
-#include "webrtc/common_audio/fft4g.h"
-#include "webrtc/modules/audio_processing/vad/vad_audio_proc_internal.h"
-#include "webrtc/modules/audio_processing/vad/pitch_internal.h"
-#include "webrtc/modules/audio_processing/vad/pole_zero_filter.h"
-extern "C" {
-#include "webrtc/modules/audio_coding/codecs/isac/main/source/codec.h"
-#include "webrtc/modules/audio_coding/codecs/isac/main/source/lpc_analysis.h"
-#include "webrtc/modules/audio_coding/codecs/isac/main/source/pitch_estimator.h"
-#include "webrtc/modules/audio_coding/codecs/isac/main/source/structs.h"
-}
-#include "webrtc/modules/interface/module_common_types.h"
-
-namespace webrtc {
-
-// The following structures are declared anonymous in iSAC's structs.h. To
-// forward declare them, we use this derived class trick.
-struct VadAudioProc::PitchAnalysisStruct : public ::PitchAnalysisStruct {};
-struct VadAudioProc::PreFiltBankstr : public ::PreFiltBankstr {};
-
-static const float kFrequencyResolution =
-    kSampleRateHz / static_cast<float>(VadAudioProc::kDftSize);
-static const int kSilenceRms = 5;
-
-// TODO(turajs): Make a Create or Init for VadAudioProc.
-VadAudioProc::VadAudioProc()
-    : audio_buffer_(),
-      num_buffer_samples_(kNumPastSignalSamples),
-      log_old_gain_(-2),
-      old_lag_(50),  // Arbitrary but valid as pitch-lag (in samples).
-      pitch_analysis_handle_(new PitchAnalysisStruct),
-      pre_filter_handle_(new PreFiltBankstr),
-      high_pass_filter_(PoleZeroFilter::Create(kCoeffNumerator,
-                                               kFilterOrder,
-                                               kCoeffDenominator,
-                                               kFilterOrder)) {
-  static_assert(kNumPastSignalSamples + kNumSubframeSamples ==
-                    sizeof(kLpcAnalWin) / sizeof(kLpcAnalWin[0]),
-                "lpc analysis window incorrect size");
-  static_assert(kLpcOrder + 1 == sizeof(kCorrWeight) / sizeof(kCorrWeight[0]),
-                "correlation weight incorrect size");
-
-  // TODO(turajs): Are we doing too much in the constructor?
-  float data[kDftSize];
-  // Make FFT to initialize.
-  ip_[0] = 0;
-  WebRtc_rdft(kDftSize, 1, data, ip_, w_fft_);
-  // TODO(turajs): Need to initialize high-pass filter.
-
-  // Initialize iSAC components.
-  WebRtcIsac_InitPreFilterbank(pre_filter_handle_.get());
-  WebRtcIsac_InitPitchAnalysis(pitch_analysis_handle_.get());
-}
-
-VadAudioProc::~VadAudioProc() {
-}
-
-void VadAudioProc::ResetBuffer() {
-  memcpy(audio_buffer_, &audio_buffer_[kNumSamplesToProcess],
-         sizeof(audio_buffer_[0]) * kNumPastSignalSamples);
-  num_buffer_samples_ = kNumPastSignalSamples;
-}
-
-int VadAudioProc::ExtractFeatures(const int16_t* frame,
-                                  int length,
-                                  AudioFeatures* features) {
-  features->num_frames = 0;
-  if (length != kNumSubframeSamples) {
-    return -1;
-  }
-
-  // High-pass filter to remove the DC component and very low frequency content.
-  // We have experienced that this high-pass filtering improves voice/non-voiced
-  // classification.
-  if (high_pass_filter_->Filter(frame, kNumSubframeSamples,
-                                &audio_buffer_[num_buffer_samples_]) != 0) {
-    return -1;
-  }
-
-  num_buffer_samples_ += kNumSubframeSamples;
-  if (num_buffer_samples_ < kBufferLength) {
-    return 0;
-  }
-  assert(num_buffer_samples_ == kBufferLength);
-  features->num_frames = kNum10msSubframes;
-  features->silence = false;
-
-  Rms(features->rms, kMaxNumFrames);
-  for (int i = 0; i < kNum10msSubframes; ++i) {
-    if (features->rms[i] < kSilenceRms) {
-      // PitchAnalysis can cause NaNs in the pitch gain if it's fed silence.
-      // Bail out here instead.
-      features->silence = true;
-      ResetBuffer();
-      return 0;
-    }
-  }
-
-  PitchAnalysis(features->log_pitch_gain, features->pitch_lag_hz,
-                kMaxNumFrames);
-  FindFirstSpectralPeaks(features->spectral_peak, kMaxNumFrames);
-  ResetBuffer();
-  return 0;
-}
-
-// Computes |kLpcOrder + 1| correlation coefficients.
-void VadAudioProc::SubframeCorrelation(double* corr,
-                                       int length_corr,
-                                       int subframe_index) {
-  assert(length_corr >= kLpcOrder + 1);
-  double windowed_audio[kNumSubframeSamples + kNumPastSignalSamples];
-  int buffer_index = subframe_index * kNumSubframeSamples;
-
-  for (int n = 0; n < kNumSubframeSamples + kNumPastSignalSamples; n++)
-    windowed_audio[n] = audio_buffer_[buffer_index++] * kLpcAnalWin[n];
-
-  WebRtcIsac_AutoCorr(corr, windowed_audio,
-                      kNumSubframeSamples + kNumPastSignalSamples, kLpcOrder);
-}
-
-// Compute |kNum10msSubframes| sets of LPC coefficients, one per 10 ms input.
-// The analysis window is 15 ms long and it is centered on the first half of
-// each 10ms sub-frame. This is equivalent to computing LPC coefficients for the
-// first half of each 10 ms subframe.
-void VadAudioProc::GetLpcPolynomials(double* lpc, int length_lpc) {
-  assert(length_lpc >= kNum10msSubframes * (kLpcOrder + 1));
-  double corr[kLpcOrder + 1];
-  double reflec_coeff[kLpcOrder];
-  for (int i = 0, offset_lpc = 0; i < kNum10msSubframes;
-       i++, offset_lpc += kLpcOrder + 1) {
-    SubframeCorrelation(corr, kLpcOrder + 1, i);
-    corr[0] *= 1.0001;
-    // This makes Lev-Durb a bit more stable.
-    for (int k = 0; k < kLpcOrder + 1; k++) {
-      corr[k] *= kCorrWeight[k];
-    }
-    WebRtcIsac_LevDurb(&lpc[offset_lpc], reflec_coeff, corr, kLpcOrder);
-  }
-}
-
-// Fit a second order curve to these 3 points and find the location of the
-// extremum. The points are inverted before curve fitting.
-static float QuadraticInterpolation(float prev_val,
-                                    float curr_val,
-                                    float next_val) {
-  // Doing the interpolation in |1 / A(z)|^2.
-  float fractional_index = 0;
-  next_val = 1.0f / next_val;
-  prev_val = 1.0f / prev_val;
-  curr_val = 1.0f / curr_val;
-
-  fractional_index =
-      -(next_val - prev_val) * 0.5f / (next_val + prev_val - 2.f * curr_val);
-  assert(fabs(fractional_index) < 1);
-  return fractional_index;
-}
-
-// 1 / A(z), where A(z) is defined by |lpc| is a model of the spectral envelope
-// of the input signal. The local maximum of the spectral envelope corresponds
-// with the local minimum of A(z). It saves complexity, as we save one
-// inversion. Furthermore, we find the first local maximum of magnitude squared,
-// to save on one square root.
-void VadAudioProc::FindFirstSpectralPeaks(double* f_peak, int length_f_peak) {
-  assert(length_f_peak >= kNum10msSubframes);
-  double lpc[kNum10msSubframes * (kLpcOrder + 1)];
-  // For all sub-frames.
-  GetLpcPolynomials(lpc, kNum10msSubframes * (kLpcOrder + 1));
-
-  const int kNumDftCoefficients = kDftSize / 2 + 1;
-  float data[kDftSize];
-
-  for (int i = 0; i < kNum10msSubframes; i++) {
-    // Convert to float with zero pad.
-    memset(data, 0, sizeof(data));
-    for (int n = 0; n < kLpcOrder + 1; n++) {
-      data[n] = static_cast<float>(lpc[i * (kLpcOrder + 1) + n]);
-    }
-    // Transform to frequency domain.
-    WebRtc_rdft(kDftSize, 1, data, ip_, w_fft_);
-
-    int index_peak = 0;
-    float prev_magn_sqr = data[0] * data[0];
-    float curr_magn_sqr = data[2] * data[2] + data[3] * data[3];
-    float next_magn_sqr;
-    bool found_peak = false;
-    for (int n = 2; n < kNumDftCoefficients - 1; n++) {
-      next_magn_sqr =
-          data[2 * n] * data[2 * n] + data[2 * n + 1] * data[2 * n + 1];
-      if (curr_magn_sqr < prev_magn_sqr && curr_magn_sqr < next_magn_sqr) {
-        found_peak = true;
-        index_peak = n - 1;
-        break;
-      }
-      prev_magn_sqr = curr_magn_sqr;
-      curr_magn_sqr = next_magn_sqr;
-    }
-    float fractional_index = 0;
-    if (!found_peak) {
-      // Checking if |kNumDftCoefficients - 1| is the local minimum.
-      next_magn_sqr = data[1] * data[1];
-      if (curr_magn_sqr < prev_magn_sqr && curr_magn_sqr < next_magn_sqr) {
-        index_peak = kNumDftCoefficients - 1;
-      }
-    } else {
-      // A peak is found, do a simple quadratic interpolation to get a more
-      // accurate estimate of the peak location.
-      fractional_index =
-          QuadraticInterpolation(prev_magn_sqr, curr_magn_sqr, next_magn_sqr);
-    }
-    f_peak[i] = (index_peak + fractional_index) * kFrequencyResolution;
-  }
-}
-
-// Using iSAC functions to estimate pitch gains & lags.
-void VadAudioProc::PitchAnalysis(double* log_pitch_gains,
-                                 double* pitch_lags_hz,
-                                 int length) {
-  // TODO(turajs): This can be "imported" from iSAC & and the next two
-  // constants.
-  assert(length >= kNum10msSubframes);
-  const int kNumPitchSubframes = 4;
-  double gains[kNumPitchSubframes];
-  double lags[kNumPitchSubframes];
-
-  const int kNumSubbandFrameSamples = 240;
-  const int kNumLookaheadSamples = 24;
-
-  float lower[kNumSubbandFrameSamples];
-  float upper[kNumSubbandFrameSamples];
-  double lower_lookahead[kNumSubbandFrameSamples];
-  double upper_lookahead[kNumSubbandFrameSamples];
-  double lower_lookahead_pre_filter[kNumSubbandFrameSamples +
-                                    kNumLookaheadSamples];
-
-  // Split signal to lower and upper bands
-  WebRtcIsac_SplitAndFilterFloat(&audio_buffer_[kNumPastSignalSamples], lower,
-                                 upper, lower_lookahead, upper_lookahead,
-                                 pre_filter_handle_.get());
-  WebRtcIsac_PitchAnalysis(lower_lookahead, lower_lookahead_pre_filter,
-                           pitch_analysis_handle_.get(), lags, gains);
-
-  // Lags are computed on lower-band signal with sampling rate half of the
-  // input signal.
-  GetSubframesPitchParameters(
-      kSampleRateHz / 2, gains, lags, kNumPitchSubframes, kNum10msSubframes,
-      &log_old_gain_, &old_lag_, log_pitch_gains, pitch_lags_hz);
-}
-
-void VadAudioProc::Rms(double* rms, int length_rms) {
-  assert(length_rms >= kNum10msSubframes);
-  int offset = kNumPastSignalSamples;
-  for (int i = 0; i < kNum10msSubframes; i++) {
-    rms[i] = 0;
-    for (int n = 0; n < kNumSubframeSamples; n++, offset++)
-      rms[i] += audio_buffer_[offset] * audio_buffer_[offset];
-    rms[i] = sqrt(rms[i] / kNumSubframeSamples);
-  }
-}
-
-}  // namespace webrtc
author	Bjorn Volcker <bjornv@webrtc.org>	2015-06-25 08:46:02 +0200
committer	Bjorn Volcker <bjornv@webrtc.org>	2015-06-25 06:46:14 +0000
commit	51c7cbb86ae13bc0c005fc8d14973f3e58d6ff4c (patch)
tree	0e67f73eb7415e656aa62fae92b885b88246f336 /webrtc/modules/audio_processing/vad/vad_audio_proc.cc
parent	518c683f3e413523a458a94b533274bd7f29992d (diff)
download	webrtc-51c7cbb86ae13bc0c005fc8d14973f3e58d6ff4c.tar.gz