diff options
author | solenberg <solenberg@webrtc.org> | 2015-12-16 03:31:12 -0800 |
---|---|---|
committer | Commit bot <commit-bot@chromium.org> | 2015-12-16 11:31:16 +0000 |
commit | a29386c26d515be9fbeaeca3e0bc6019f29142c2 (patch) | |
tree | be4b047cd3308751afb9d4367545426297d31109 /webrtc/modules | |
parent | 672aba3f57061e33dd802d9a391c54bdfed952c3 (diff) | |
download | webrtc-a29386c26d515be9fbeaeca3e0bc6019f29142c2.tar.gz |
Make VoiceDetection not a ProcessingComponent (bit exact).
BUG=webrtc:5354
Review URL: https://codereview.webrtc.org/1494593004
Cr-Commit-Position: refs/heads/master@{#11047}
Diffstat (limited to 'webrtc/modules')
4 files changed, 107 insertions, 145 deletions
diff --git a/webrtc/modules/audio_processing/audio_processing_impl.cc b/webrtc/modules/audio_processing/audio_processing_impl.cc index 67ad266d50..2143fe1878 100644 --- a/webrtc/modules/audio_processing/audio_processing_impl.cc +++ b/webrtc/modules/audio_processing/audio_processing_impl.cc @@ -149,8 +149,7 @@ struct AudioProcessingImpl::ApmPublicSubmodules { ApmPublicSubmodules() : echo_cancellation(nullptr), echo_control_mobile(nullptr), - gain_control(nullptr), - voice_detection(nullptr) {} + gain_control(nullptr) {} // Accessed externally of APM without any lock acquired. EchoCancellationImpl* echo_cancellation; EchoControlMobileImpl* echo_control_mobile; @@ -158,7 +157,7 @@ struct AudioProcessingImpl::ApmPublicSubmodules { rtc::scoped_ptr<HighPassFilterImpl> high_pass_filter; rtc::scoped_ptr<LevelEstimatorImpl> level_estimator; rtc::scoped_ptr<NoiseSuppressionImpl> noise_suppression; - VoiceDetectionImpl* voice_detection; + rtc::scoped_ptr<VoiceDetectionImpl> voice_detection; rtc::scoped_ptr<GainControlForNewAgc> gain_control_for_new_agc; // Accessed internally from both render and capture. @@ -246,8 +245,8 @@ AudioProcessingImpl::AudioProcessingImpl(const Config& config, new LevelEstimatorImpl(&crit_capture_)); public_submodules_->noise_suppression.reset( new NoiseSuppressionImpl(&crit_capture_)); - public_submodules_->voice_detection = - new VoiceDetectionImpl(this, &crit_capture_); + public_submodules_->voice_detection.reset( + new VoiceDetectionImpl(&crit_capture_)); public_submodules_->gain_control_for_new_agc.reset( new GainControlForNewAgc(public_submodules_->gain_control)); @@ -257,8 +256,6 @@ AudioProcessingImpl::AudioProcessingImpl(const Config& config, public_submodules_->echo_control_mobile); private_submodules_->component_list.push_back( public_submodules_->gain_control); - private_submodules_->component_list.push_back( - public_submodules_->voice_detection); } SetExtraOptions(config); @@ -396,6 +393,7 @@ int AudioProcessingImpl::InitializeLocked() { InitializeHighPassFilter(); InitializeNoiseSuppression(); InitializeLevelEstimator(); + InitializeVoiceDetection(); #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP if (debug_dump_.debug_file->Open()) { @@ -776,7 +774,7 @@ int AudioProcessingImpl::ProcessStreamLocked() { public_submodules_->noise_suppression->ProcessCaptureAudio(ca); RETURN_ON_ERR( public_submodules_->echo_control_mobile->ProcessCaptureAudio(ca)); - RETURN_ON_ERR(public_submodules_->voice_detection->ProcessCaptureAudio(ca)); + public_submodules_->voice_detection->ProcessCaptureAudio(ca); if (constants_.use_new_agc && public_submodules_->gain_control->is_enabled() && @@ -1162,7 +1160,7 @@ NoiseSuppression* AudioProcessingImpl::noise_suppression() const { VoiceDetection* AudioProcessingImpl::voice_detection() const { // Adding a lock here has no effect as it allows any access to the submodule // from the returned pointer. - return public_submodules_->voice_detection; + return public_submodules_->voice_detection.get(); } bool AudioProcessingImpl::is_data_processed() const { @@ -1185,6 +1183,9 @@ bool AudioProcessingImpl::is_data_processed() const { if (public_submodules_->level_estimator->is_enabled()) { enabled_count++; } + if (public_submodules_->voice_detection->is_enabled()) { + enabled_count++; + } // Data is unchanged if no components are enabled, or if only // public_submodules_->level_estimator @@ -1313,6 +1314,10 @@ void AudioProcessingImpl::InitializeLevelEstimator() { public_submodules_->level_estimator->Initialize(); } +void AudioProcessingImpl::InitializeVoiceDetection() { + public_submodules_->voice_detection->Initialize(proc_split_sample_rate_hz()); +} + void AudioProcessingImpl::MaybeUpdateHistograms() { static const int kMinDiffDelayMs = 60; diff --git a/webrtc/modules/audio_processing/audio_processing_impl.h b/webrtc/modules/audio_processing/audio_processing_impl.h index 87a145ab3c..3506ac4dc0 100644 --- a/webrtc/modules/audio_processing/audio_processing_impl.h +++ b/webrtc/modules/audio_processing/audio_processing_impl.h @@ -188,6 +188,8 @@ class AudioProcessingImpl : public AudioProcessing { EXCLUSIVE_LOCKS_REQUIRED(crit_capture_); void InitializeLevelEstimator() EXCLUSIVE_LOCKS_REQUIRED(crit_capture_); + void InitializeVoiceDetection() + EXCLUSIVE_LOCKS_REQUIRED(crit_capture_); int InitializeLocked(const ProcessingConfig& config) EXCLUSIVE_LOCKS_REQUIRED(crit_render_, crit_capture_); diff --git a/webrtc/modules/audio_processing/voice_detection_impl.cc b/webrtc/modules/audio_processing/voice_detection_impl.cc index 25c7269cb4..22d218c371 100644 --- a/webrtc/modules/audio_processing/voice_detection_impl.cc +++ b/webrtc/modules/audio_processing/voice_detection_impl.cc @@ -10,66 +10,61 @@ #include "webrtc/modules/audio_processing/voice_detection_impl.h" -#include <assert.h> - -#include "webrtc/base/criticalsection.h" -#include "webrtc/base/thread_checker.h" #include "webrtc/common_audio/vad/include/webrtc_vad.h" #include "webrtc/modules/audio_processing/audio_buffer.h" namespace webrtc { - -typedef VadInst Handle; - -namespace { -int MapSetting(VoiceDetection::Likelihood likelihood) { - switch (likelihood) { - case VoiceDetection::kVeryLowLikelihood: - return 3; - case VoiceDetection::kLowLikelihood: - return 2; - case VoiceDetection::kModerateLikelihood: - return 1; - case VoiceDetection::kHighLikelihood: - return 0; +class VoiceDetectionImpl::Vad { + public: + Vad() { + state_ = WebRtcVad_Create(); + RTC_CHECK(state_); + int error = WebRtcVad_Init(state_); + RTC_DCHECK_EQ(0, error); } - assert(false); - return -1; -} -} // namespace - -VoiceDetectionImpl::VoiceDetectionImpl(const AudioProcessing* apm, - rtc::CriticalSection* crit) - : ProcessingComponent(), - apm_(apm), - crit_(crit), - stream_has_voice_(false), - using_external_vad_(false), - likelihood_(kLowLikelihood), - frame_size_ms_(10), - frame_size_samples_(0) { - RTC_DCHECK(apm); + ~Vad() { + WebRtcVad_Free(state_); + } + VadInst* state() { return state_; } + private: + VadInst* state_ = nullptr; + RTC_DISALLOW_COPY_AND_ASSIGN(Vad); +}; + +VoiceDetectionImpl::VoiceDetectionImpl(rtc::CriticalSection* crit) + : crit_(crit) { RTC_DCHECK(crit); } VoiceDetectionImpl::~VoiceDetectionImpl() {} -int VoiceDetectionImpl::ProcessCaptureAudio(AudioBuffer* audio) { +void VoiceDetectionImpl::Initialize(int sample_rate_hz) { rtc::CritScope cs(crit_); - if (!is_component_enabled()) { - return apm_->kNoError; + sample_rate_hz_ = sample_rate_hz; + rtc::scoped_ptr<Vad> new_vad; + if (enabled_) { + new_vad.reset(new Vad()); } + vad_.swap(new_vad); + using_external_vad_ = false; + frame_size_samples_ = + static_cast<size_t>(frame_size_ms_ * sample_rate_hz_) / 1000; + set_likelihood(likelihood_); +} +void VoiceDetectionImpl::ProcessCaptureAudio(AudioBuffer* audio) { + rtc::CritScope cs(crit_); + if (!enabled_) { + return; + } if (using_external_vad_) { using_external_vad_ = false; - return apm_->kNoError; + return; } - assert(audio->num_frames_per_band() <= 160); + RTC_DCHECK_GE(160u, audio->num_frames_per_band()); // TODO(ajm): concatenate data in frame buffer here. - - int vad_ret = WebRtcVad_Process(static_cast<Handle*>(handle(0)), - apm_->proc_split_sample_rate_hz(), + int vad_ret = WebRtcVad_Process(vad_->state(), sample_rate_hz_, audio->mixed_low_pass_data(), frame_size_samples_); if (vad_ret == 0) { @@ -79,27 +74,29 @@ int VoiceDetectionImpl::ProcessCaptureAudio(AudioBuffer* audio) { stream_has_voice_ = true; audio->set_activity(AudioFrame::kVadActive); } else { - return apm_->kUnspecifiedError; + RTC_NOTREACHED(); } - - return apm_->kNoError; } int VoiceDetectionImpl::Enable(bool enable) { rtc::CritScope cs(crit_); - return EnableComponent(enable); + if (enabled_ != enable) { + enabled_ = enable; + Initialize(sample_rate_hz_); + } + return AudioProcessing::kNoError; } bool VoiceDetectionImpl::is_enabled() const { rtc::CritScope cs(crit_); - return is_component_enabled(); + return enabled_; } int VoiceDetectionImpl::set_stream_has_voice(bool has_voice) { rtc::CritScope cs(crit_); using_external_vad_ = true; stream_has_voice_ = has_voice; - return apm_->kNoError; + return AudioProcessing::kNoError; } bool VoiceDetectionImpl::stream_has_voice() const { @@ -111,12 +108,30 @@ bool VoiceDetectionImpl::stream_has_voice() const { int VoiceDetectionImpl::set_likelihood(VoiceDetection::Likelihood likelihood) { rtc::CritScope cs(crit_); - if (MapSetting(likelihood) == -1) { - return apm_->kBadParameterError; - } - likelihood_ = likelihood; - return Configure(); + if (enabled_) { + int mode = 2; + switch (likelihood) { + case VoiceDetection::kVeryLowLikelihood: + mode = 3; + break; + case VoiceDetection::kLowLikelihood: + mode = 2; + break; + case VoiceDetection::kModerateLikelihood: + mode = 1; + break; + case VoiceDetection::kHighLikelihood: + mode = 0; + break; + default: + RTC_NOTREACHED(); + break; + } + int error = WebRtcVad_set_mode(vad_->state(), mode); + RTC_DCHECK_EQ(0, error); + } + return AudioProcessing::kNoError; } VoiceDetection::Likelihood VoiceDetectionImpl::likelihood() const { @@ -126,64 +141,14 @@ VoiceDetection::Likelihood VoiceDetectionImpl::likelihood() const { int VoiceDetectionImpl::set_frame_size_ms(int size) { rtc::CritScope cs(crit_); - assert(size == 10); // TODO(ajm): remove when supported. - if (size != 10 && - size != 20 && - size != 30) { - return apm_->kBadParameterError; - } - + RTC_DCHECK_EQ(10, size); // TODO(ajm): remove when supported. frame_size_ms_ = size; - - return Initialize(); + Initialize(sample_rate_hz_); + return AudioProcessing::kNoError; } int VoiceDetectionImpl::frame_size_ms() const { rtc::CritScope cs(crit_); return frame_size_ms_; } - -int VoiceDetectionImpl::Initialize() { - int err = ProcessingComponent::Initialize(); - - rtc::CritScope cs(crit_); - if (err != apm_->kNoError || !is_component_enabled()) { - return err; - } - - using_external_vad_ = false; - frame_size_samples_ = static_cast<size_t>( - frame_size_ms_ * apm_->proc_split_sample_rate_hz() / 1000); - // TODO(ajm): intialize frame buffer here. - - return apm_->kNoError; -} - -void* VoiceDetectionImpl::CreateHandle() const { - return WebRtcVad_Create(); -} - -void VoiceDetectionImpl::DestroyHandle(void* handle) const { - WebRtcVad_Free(static_cast<Handle*>(handle)); -} - -int VoiceDetectionImpl::InitializeHandle(void* handle) const { - return WebRtcVad_Init(static_cast<Handle*>(handle)); -} - -int VoiceDetectionImpl::ConfigureHandle(void* handle) const { - rtc::CritScope cs(crit_); - return WebRtcVad_set_mode(static_cast<Handle*>(handle), - MapSetting(likelihood_)); -} - -int VoiceDetectionImpl::num_handles_required() const { - return 1; -} - -int VoiceDetectionImpl::GetHandleError(void* handle) const { - // The VAD has no get_error() function. - assert(handle != NULL); - return apm_->kUnspecifiedError; -} } // namespace webrtc diff --git a/webrtc/modules/audio_processing/voice_detection_impl.h b/webrtc/modules/audio_processing/voice_detection_impl.h index 3a1193c1d7..0d6d8cf14a 100644 --- a/webrtc/modules/audio_processing/voice_detection_impl.h +++ b/webrtc/modules/audio_processing/voice_detection_impl.h @@ -11,31 +11,27 @@ #ifndef WEBRTC_MODULES_AUDIO_PROCESSING_VOICE_DETECTION_IMPL_H_ #define WEBRTC_MODULES_AUDIO_PROCESSING_VOICE_DETECTION_IMPL_H_ +#include "webrtc/base/constructormagic.h" #include "webrtc/base/criticalsection.h" +#include "webrtc/base/scoped_ptr.h" #include "webrtc/modules/audio_processing/include/audio_processing.h" -#include "webrtc/modules/audio_processing/processing_component.h" namespace webrtc { class AudioBuffer; -class VoiceDetectionImpl : public VoiceDetection, - public ProcessingComponent { +class VoiceDetectionImpl : public VoiceDetection { public: - VoiceDetectionImpl(const AudioProcessing* apm, rtc::CriticalSection* crit); - virtual ~VoiceDetectionImpl(); + explicit VoiceDetectionImpl(rtc::CriticalSection* crit); + ~VoiceDetectionImpl() override; - int ProcessCaptureAudio(AudioBuffer* audio); + // TODO(peah): Fold into ctor, once public API is removed. + void Initialize(int sample_rate_hz); + void ProcessCaptureAudio(AudioBuffer* audio); // VoiceDetection implementation. - bool is_enabled() const override; - - // ProcessingComponent implementation. - int Initialize() override; - - private: - // VoiceDetection implementation. int Enable(bool enable) override; + bool is_enabled() const override; int set_stream_has_voice(bool has_voice) override; bool stream_has_voice() const override; int set_likelihood(Likelihood likelihood) override; @@ -43,24 +39,18 @@ class VoiceDetectionImpl : public VoiceDetection, int set_frame_size_ms(int size) override; int frame_size_ms() const override; - // ProcessingComponent implementation. - void* CreateHandle() const override; - int InitializeHandle(void* handle) const override; - int ConfigureHandle(void* handle) const override; - void DestroyHandle(void* handle) const override; - int num_handles_required() const override; - int GetHandleError(void* handle) const override; - - // Not guarded as its public API is thread safe. - const AudioProcessing* apm_; - + private: + class Vad; rtc::CriticalSection* const crit_; - - bool stream_has_voice_ GUARDED_BY(crit_); - bool using_external_vad_ GUARDED_BY(crit_); - Likelihood likelihood_ GUARDED_BY(crit_); - int frame_size_ms_ GUARDED_BY(crit_); - size_t frame_size_samples_ GUARDED_BY(crit_); + bool enabled_ GUARDED_BY(crit_) = false; + bool stream_has_voice_ GUARDED_BY(crit_) = false; + bool using_external_vad_ GUARDED_BY(crit_) = false; + Likelihood likelihood_ GUARDED_BY(crit_) = kLowLikelihood; + int frame_size_ms_ GUARDED_BY(crit_) = 10; + size_t frame_size_samples_ GUARDED_BY(crit_) = 0; + int sample_rate_hz_ GUARDED_BY(crit_) = 0; + rtc::scoped_ptr<Vad> vad_ GUARDED_BY(crit_); + RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(VoiceDetectionImpl); }; } // namespace webrtc |