aboutsummaryrefslogtreecommitdiff
path: root/webrtc/modules
diff options
context:
space:
mode:
authorsolenberg <solenberg@webrtc.org>2015-12-16 03:31:12 -0800
committerCommit bot <commit-bot@chromium.org>2015-12-16 11:31:16 +0000
commita29386c26d515be9fbeaeca3e0bc6019f29142c2 (patch)
treebe4b047cd3308751afb9d4367545426297d31109 /webrtc/modules
parent672aba3f57061e33dd802d9a391c54bdfed952c3 (diff)
downloadwebrtc-a29386c26d515be9fbeaeca3e0bc6019f29142c2.tar.gz
Make VoiceDetection not a ProcessingComponent (bit exact).
BUG=webrtc:5354 Review URL: https://codereview.webrtc.org/1494593004 Cr-Commit-Position: refs/heads/master@{#11047}
Diffstat (limited to 'webrtc/modules')
-rw-r--r--webrtc/modules/audio_processing/audio_processing_impl.cc23
-rw-r--r--webrtc/modules/audio_processing/audio_processing_impl.h2
-rw-r--r--webrtc/modules/audio_processing/voice_detection_impl.cc177
-rw-r--r--webrtc/modules/audio_processing/voice_detection_impl.h50
4 files changed, 107 insertions, 145 deletions
diff --git a/webrtc/modules/audio_processing/audio_processing_impl.cc b/webrtc/modules/audio_processing/audio_processing_impl.cc
index 67ad266d50..2143fe1878 100644
--- a/webrtc/modules/audio_processing/audio_processing_impl.cc
+++ b/webrtc/modules/audio_processing/audio_processing_impl.cc
@@ -149,8 +149,7 @@ struct AudioProcessingImpl::ApmPublicSubmodules {
ApmPublicSubmodules()
: echo_cancellation(nullptr),
echo_control_mobile(nullptr),
- gain_control(nullptr),
- voice_detection(nullptr) {}
+ gain_control(nullptr) {}
// Accessed externally of APM without any lock acquired.
EchoCancellationImpl* echo_cancellation;
EchoControlMobileImpl* echo_control_mobile;
@@ -158,7 +157,7 @@ struct AudioProcessingImpl::ApmPublicSubmodules {
rtc::scoped_ptr<HighPassFilterImpl> high_pass_filter;
rtc::scoped_ptr<LevelEstimatorImpl> level_estimator;
rtc::scoped_ptr<NoiseSuppressionImpl> noise_suppression;
- VoiceDetectionImpl* voice_detection;
+ rtc::scoped_ptr<VoiceDetectionImpl> voice_detection;
rtc::scoped_ptr<GainControlForNewAgc> gain_control_for_new_agc;
// Accessed internally from both render and capture.
@@ -246,8 +245,8 @@ AudioProcessingImpl::AudioProcessingImpl(const Config& config,
new LevelEstimatorImpl(&crit_capture_));
public_submodules_->noise_suppression.reset(
new NoiseSuppressionImpl(&crit_capture_));
- public_submodules_->voice_detection =
- new VoiceDetectionImpl(this, &crit_capture_);
+ public_submodules_->voice_detection.reset(
+ new VoiceDetectionImpl(&crit_capture_));
public_submodules_->gain_control_for_new_agc.reset(
new GainControlForNewAgc(public_submodules_->gain_control));
@@ -257,8 +256,6 @@ AudioProcessingImpl::AudioProcessingImpl(const Config& config,
public_submodules_->echo_control_mobile);
private_submodules_->component_list.push_back(
public_submodules_->gain_control);
- private_submodules_->component_list.push_back(
- public_submodules_->voice_detection);
}
SetExtraOptions(config);
@@ -396,6 +393,7 @@ int AudioProcessingImpl::InitializeLocked() {
InitializeHighPassFilter();
InitializeNoiseSuppression();
InitializeLevelEstimator();
+ InitializeVoiceDetection();
#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
if (debug_dump_.debug_file->Open()) {
@@ -776,7 +774,7 @@ int AudioProcessingImpl::ProcessStreamLocked() {
public_submodules_->noise_suppression->ProcessCaptureAudio(ca);
RETURN_ON_ERR(
public_submodules_->echo_control_mobile->ProcessCaptureAudio(ca));
- RETURN_ON_ERR(public_submodules_->voice_detection->ProcessCaptureAudio(ca));
+ public_submodules_->voice_detection->ProcessCaptureAudio(ca);
if (constants_.use_new_agc &&
public_submodules_->gain_control->is_enabled() &&
@@ -1162,7 +1160,7 @@ NoiseSuppression* AudioProcessingImpl::noise_suppression() const {
VoiceDetection* AudioProcessingImpl::voice_detection() const {
// Adding a lock here has no effect as it allows any access to the submodule
// from the returned pointer.
- return public_submodules_->voice_detection;
+ return public_submodules_->voice_detection.get();
}
bool AudioProcessingImpl::is_data_processed() const {
@@ -1185,6 +1183,9 @@ bool AudioProcessingImpl::is_data_processed() const {
if (public_submodules_->level_estimator->is_enabled()) {
enabled_count++;
}
+ if (public_submodules_->voice_detection->is_enabled()) {
+ enabled_count++;
+ }
// Data is unchanged if no components are enabled, or if only
// public_submodules_->level_estimator
@@ -1313,6 +1314,10 @@ void AudioProcessingImpl::InitializeLevelEstimator() {
public_submodules_->level_estimator->Initialize();
}
+void AudioProcessingImpl::InitializeVoiceDetection() {
+ public_submodules_->voice_detection->Initialize(proc_split_sample_rate_hz());
+}
+
void AudioProcessingImpl::MaybeUpdateHistograms() {
static const int kMinDiffDelayMs = 60;
diff --git a/webrtc/modules/audio_processing/audio_processing_impl.h b/webrtc/modules/audio_processing/audio_processing_impl.h
index 87a145ab3c..3506ac4dc0 100644
--- a/webrtc/modules/audio_processing/audio_processing_impl.h
+++ b/webrtc/modules/audio_processing/audio_processing_impl.h
@@ -188,6 +188,8 @@ class AudioProcessingImpl : public AudioProcessing {
EXCLUSIVE_LOCKS_REQUIRED(crit_capture_);
void InitializeLevelEstimator()
EXCLUSIVE_LOCKS_REQUIRED(crit_capture_);
+ void InitializeVoiceDetection()
+ EXCLUSIVE_LOCKS_REQUIRED(crit_capture_);
int InitializeLocked(const ProcessingConfig& config)
EXCLUSIVE_LOCKS_REQUIRED(crit_render_, crit_capture_);
diff --git a/webrtc/modules/audio_processing/voice_detection_impl.cc b/webrtc/modules/audio_processing/voice_detection_impl.cc
index 25c7269cb4..22d218c371 100644
--- a/webrtc/modules/audio_processing/voice_detection_impl.cc
+++ b/webrtc/modules/audio_processing/voice_detection_impl.cc
@@ -10,66 +10,61 @@
#include "webrtc/modules/audio_processing/voice_detection_impl.h"
-#include <assert.h>
-
-#include "webrtc/base/criticalsection.h"
-#include "webrtc/base/thread_checker.h"
#include "webrtc/common_audio/vad/include/webrtc_vad.h"
#include "webrtc/modules/audio_processing/audio_buffer.h"
namespace webrtc {
-
-typedef VadInst Handle;
-
-namespace {
-int MapSetting(VoiceDetection::Likelihood likelihood) {
- switch (likelihood) {
- case VoiceDetection::kVeryLowLikelihood:
- return 3;
- case VoiceDetection::kLowLikelihood:
- return 2;
- case VoiceDetection::kModerateLikelihood:
- return 1;
- case VoiceDetection::kHighLikelihood:
- return 0;
+class VoiceDetectionImpl::Vad {
+ public:
+ Vad() {
+ state_ = WebRtcVad_Create();
+ RTC_CHECK(state_);
+ int error = WebRtcVad_Init(state_);
+ RTC_DCHECK_EQ(0, error);
}
- assert(false);
- return -1;
-}
-} // namespace
-
-VoiceDetectionImpl::VoiceDetectionImpl(const AudioProcessing* apm,
- rtc::CriticalSection* crit)
- : ProcessingComponent(),
- apm_(apm),
- crit_(crit),
- stream_has_voice_(false),
- using_external_vad_(false),
- likelihood_(kLowLikelihood),
- frame_size_ms_(10),
- frame_size_samples_(0) {
- RTC_DCHECK(apm);
+ ~Vad() {
+ WebRtcVad_Free(state_);
+ }
+ VadInst* state() { return state_; }
+ private:
+ VadInst* state_ = nullptr;
+ RTC_DISALLOW_COPY_AND_ASSIGN(Vad);
+};
+
+VoiceDetectionImpl::VoiceDetectionImpl(rtc::CriticalSection* crit)
+ : crit_(crit) {
RTC_DCHECK(crit);
}
VoiceDetectionImpl::~VoiceDetectionImpl() {}
-int VoiceDetectionImpl::ProcessCaptureAudio(AudioBuffer* audio) {
+void VoiceDetectionImpl::Initialize(int sample_rate_hz) {
rtc::CritScope cs(crit_);
- if (!is_component_enabled()) {
- return apm_->kNoError;
+ sample_rate_hz_ = sample_rate_hz;
+ rtc::scoped_ptr<Vad> new_vad;
+ if (enabled_) {
+ new_vad.reset(new Vad());
}
+ vad_.swap(new_vad);
+ using_external_vad_ = false;
+ frame_size_samples_ =
+ static_cast<size_t>(frame_size_ms_ * sample_rate_hz_) / 1000;
+ set_likelihood(likelihood_);
+}
+void VoiceDetectionImpl::ProcessCaptureAudio(AudioBuffer* audio) {
+ rtc::CritScope cs(crit_);
+ if (!enabled_) {
+ return;
+ }
if (using_external_vad_) {
using_external_vad_ = false;
- return apm_->kNoError;
+ return;
}
- assert(audio->num_frames_per_band() <= 160);
+ RTC_DCHECK_GE(160u, audio->num_frames_per_band());
// TODO(ajm): concatenate data in frame buffer here.
-
- int vad_ret = WebRtcVad_Process(static_cast<Handle*>(handle(0)),
- apm_->proc_split_sample_rate_hz(),
+ int vad_ret = WebRtcVad_Process(vad_->state(), sample_rate_hz_,
audio->mixed_low_pass_data(),
frame_size_samples_);
if (vad_ret == 0) {
@@ -79,27 +74,29 @@ int VoiceDetectionImpl::ProcessCaptureAudio(AudioBuffer* audio) {
stream_has_voice_ = true;
audio->set_activity(AudioFrame::kVadActive);
} else {
- return apm_->kUnspecifiedError;
+ RTC_NOTREACHED();
}
-
- return apm_->kNoError;
}
int VoiceDetectionImpl::Enable(bool enable) {
rtc::CritScope cs(crit_);
- return EnableComponent(enable);
+ if (enabled_ != enable) {
+ enabled_ = enable;
+ Initialize(sample_rate_hz_);
+ }
+ return AudioProcessing::kNoError;
}
bool VoiceDetectionImpl::is_enabled() const {
rtc::CritScope cs(crit_);
- return is_component_enabled();
+ return enabled_;
}
int VoiceDetectionImpl::set_stream_has_voice(bool has_voice) {
rtc::CritScope cs(crit_);
using_external_vad_ = true;
stream_has_voice_ = has_voice;
- return apm_->kNoError;
+ return AudioProcessing::kNoError;
}
bool VoiceDetectionImpl::stream_has_voice() const {
@@ -111,12 +108,30 @@ bool VoiceDetectionImpl::stream_has_voice() const {
int VoiceDetectionImpl::set_likelihood(VoiceDetection::Likelihood likelihood) {
rtc::CritScope cs(crit_);
- if (MapSetting(likelihood) == -1) {
- return apm_->kBadParameterError;
- }
-
likelihood_ = likelihood;
- return Configure();
+ if (enabled_) {
+ int mode = 2;
+ switch (likelihood) {
+ case VoiceDetection::kVeryLowLikelihood:
+ mode = 3;
+ break;
+ case VoiceDetection::kLowLikelihood:
+ mode = 2;
+ break;
+ case VoiceDetection::kModerateLikelihood:
+ mode = 1;
+ break;
+ case VoiceDetection::kHighLikelihood:
+ mode = 0;
+ break;
+ default:
+ RTC_NOTREACHED();
+ break;
+ }
+ int error = WebRtcVad_set_mode(vad_->state(), mode);
+ RTC_DCHECK_EQ(0, error);
+ }
+ return AudioProcessing::kNoError;
}
VoiceDetection::Likelihood VoiceDetectionImpl::likelihood() const {
@@ -126,64 +141,14 @@ VoiceDetection::Likelihood VoiceDetectionImpl::likelihood() const {
int VoiceDetectionImpl::set_frame_size_ms(int size) {
rtc::CritScope cs(crit_);
- assert(size == 10); // TODO(ajm): remove when supported.
- if (size != 10 &&
- size != 20 &&
- size != 30) {
- return apm_->kBadParameterError;
- }
-
+ RTC_DCHECK_EQ(10, size); // TODO(ajm): remove when supported.
frame_size_ms_ = size;
-
- return Initialize();
+ Initialize(sample_rate_hz_);
+ return AudioProcessing::kNoError;
}
int VoiceDetectionImpl::frame_size_ms() const {
rtc::CritScope cs(crit_);
return frame_size_ms_;
}
-
-int VoiceDetectionImpl::Initialize() {
- int err = ProcessingComponent::Initialize();
-
- rtc::CritScope cs(crit_);
- if (err != apm_->kNoError || !is_component_enabled()) {
- return err;
- }
-
- using_external_vad_ = false;
- frame_size_samples_ = static_cast<size_t>(
- frame_size_ms_ * apm_->proc_split_sample_rate_hz() / 1000);
- // TODO(ajm): intialize frame buffer here.
-
- return apm_->kNoError;
-}
-
-void* VoiceDetectionImpl::CreateHandle() const {
- return WebRtcVad_Create();
-}
-
-void VoiceDetectionImpl::DestroyHandle(void* handle) const {
- WebRtcVad_Free(static_cast<Handle*>(handle));
-}
-
-int VoiceDetectionImpl::InitializeHandle(void* handle) const {
- return WebRtcVad_Init(static_cast<Handle*>(handle));
-}
-
-int VoiceDetectionImpl::ConfigureHandle(void* handle) const {
- rtc::CritScope cs(crit_);
- return WebRtcVad_set_mode(static_cast<Handle*>(handle),
- MapSetting(likelihood_));
-}
-
-int VoiceDetectionImpl::num_handles_required() const {
- return 1;
-}
-
-int VoiceDetectionImpl::GetHandleError(void* handle) const {
- // The VAD has no get_error() function.
- assert(handle != NULL);
- return apm_->kUnspecifiedError;
-}
} // namespace webrtc
diff --git a/webrtc/modules/audio_processing/voice_detection_impl.h b/webrtc/modules/audio_processing/voice_detection_impl.h
index 3a1193c1d7..0d6d8cf14a 100644
--- a/webrtc/modules/audio_processing/voice_detection_impl.h
+++ b/webrtc/modules/audio_processing/voice_detection_impl.h
@@ -11,31 +11,27 @@
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_VOICE_DETECTION_IMPL_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_VOICE_DETECTION_IMPL_H_
+#include "webrtc/base/constructormagic.h"
#include "webrtc/base/criticalsection.h"
+#include "webrtc/base/scoped_ptr.h"
#include "webrtc/modules/audio_processing/include/audio_processing.h"
-#include "webrtc/modules/audio_processing/processing_component.h"
namespace webrtc {
class AudioBuffer;
-class VoiceDetectionImpl : public VoiceDetection,
- public ProcessingComponent {
+class VoiceDetectionImpl : public VoiceDetection {
public:
- VoiceDetectionImpl(const AudioProcessing* apm, rtc::CriticalSection* crit);
- virtual ~VoiceDetectionImpl();
+ explicit VoiceDetectionImpl(rtc::CriticalSection* crit);
+ ~VoiceDetectionImpl() override;
- int ProcessCaptureAudio(AudioBuffer* audio);
+ // TODO(peah): Fold into ctor, once public API is removed.
+ void Initialize(int sample_rate_hz);
+ void ProcessCaptureAudio(AudioBuffer* audio);
// VoiceDetection implementation.
- bool is_enabled() const override;
-
- // ProcessingComponent implementation.
- int Initialize() override;
-
- private:
- // VoiceDetection implementation.
int Enable(bool enable) override;
+ bool is_enabled() const override;
int set_stream_has_voice(bool has_voice) override;
bool stream_has_voice() const override;
int set_likelihood(Likelihood likelihood) override;
@@ -43,24 +39,18 @@ class VoiceDetectionImpl : public VoiceDetection,
int set_frame_size_ms(int size) override;
int frame_size_ms() const override;
- // ProcessingComponent implementation.
- void* CreateHandle() const override;
- int InitializeHandle(void* handle) const override;
- int ConfigureHandle(void* handle) const override;
- void DestroyHandle(void* handle) const override;
- int num_handles_required() const override;
- int GetHandleError(void* handle) const override;
-
- // Not guarded as its public API is thread safe.
- const AudioProcessing* apm_;
-
+ private:
+ class Vad;
rtc::CriticalSection* const crit_;
-
- bool stream_has_voice_ GUARDED_BY(crit_);
- bool using_external_vad_ GUARDED_BY(crit_);
- Likelihood likelihood_ GUARDED_BY(crit_);
- int frame_size_ms_ GUARDED_BY(crit_);
- size_t frame_size_samples_ GUARDED_BY(crit_);
+ bool enabled_ GUARDED_BY(crit_) = false;
+ bool stream_has_voice_ GUARDED_BY(crit_) = false;
+ bool using_external_vad_ GUARDED_BY(crit_) = false;
+ Likelihood likelihood_ GUARDED_BY(crit_) = kLowLikelihood;
+ int frame_size_ms_ GUARDED_BY(crit_) = 10;
+ size_t frame_size_samples_ GUARDED_BY(crit_) = 0;
+ int sample_rate_hz_ GUARDED_BY(crit_) = 0;
+ rtc::scoped_ptr<Vad> vad_ GUARDED_BY(crit_);
+ RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(VoiceDetectionImpl);
};
} // namespace webrtc