diff options
Diffstat (limited to 'src/common_audio/vad/main/source/vad_core.h')
-rw-r--r-- | src/common_audio/vad/main/source/vad_core.h | 132 |
1 files changed, 132 insertions, 0 deletions
diff --git a/src/common_audio/vad/main/source/vad_core.h b/src/common_audio/vad/main/source/vad_core.h new file mode 100644 index 0000000000..544caf5ab3 --- /dev/null +++ b/src/common_audio/vad/main/source/vad_core.h @@ -0,0 +1,132 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +/* + * This header file includes the descriptions of the core VAD calls. + */ + +#ifndef WEBRTC_VAD_CORE_H_ +#define WEBRTC_VAD_CORE_H_ + +#include "typedefs.h" +#include "vad_defines.h" + +typedef struct VadInstT_ +{ + + WebRtc_Word16 vad; + WebRtc_Word32 downsampling_filter_states[4]; + WebRtc_Word16 noise_means[NUM_TABLE_VALUES]; + WebRtc_Word16 speech_means[NUM_TABLE_VALUES]; + WebRtc_Word16 noise_stds[NUM_TABLE_VALUES]; + WebRtc_Word16 speech_stds[NUM_TABLE_VALUES]; + WebRtc_Word32 frame_counter; + WebRtc_Word16 over_hang; // Over Hang + WebRtc_Word16 num_of_speech; + WebRtc_Word16 index_vector[16 * NUM_CHANNELS]; + WebRtc_Word16 low_value_vector[16 * NUM_CHANNELS]; + WebRtc_Word16 mean_value[NUM_CHANNELS]; + WebRtc_Word16 upper_state[5]; + WebRtc_Word16 lower_state[5]; + WebRtc_Word16 hp_filter_state[4]; + WebRtc_Word16 over_hang_max_1[3]; + WebRtc_Word16 over_hang_max_2[3]; + WebRtc_Word16 individual[3]; + WebRtc_Word16 total[3]; + + short init_flag; + +} VadInstT; + +/**************************************************************************** + * WebRtcVad_InitCore(...) + * + * This function initializes a VAD instance + * + * Input: + * - inst : Instance that should be initialized + * - mode : Aggressiveness degree + * 0 (High quality) - 3 (Highly aggressive) + * + * Output: + * - inst : Initialized instance + * + * Return value : 0 - Ok + * -1 - Error + */ +int WebRtcVad_InitCore(VadInstT* inst, short mode); + +/**************************************************************************** + * WebRtcVad_set_mode_core(...) + * + * This function changes the VAD settings + * + * Input: + * - inst : VAD instance + * - mode : Aggressiveness degree + * 0 (High quality) - 3 (Highly aggressive) + * + * Output: + * - inst : Changed instance + * + * Return value : 0 - Ok + * -1 - Error + */ + +int WebRtcVad_set_mode_core(VadInstT* inst, short mode); + +/**************************************************************************** + * WebRtcVad_CalcVad32khz(...) + * WebRtcVad_CalcVad16khz(...) + * WebRtcVad_CalcVad8khz(...) + * + * Calculate probability for active speech and make VAD decision. + * + * Input: + * - inst : Instance that should be initialized + * - speech_frame : Input speech frame + * - frame_length : Number of input samples + * + * Output: + * - inst : Updated filter states etc. + * + * Return value : VAD decision + * 0 - No active speech + * 1-6 - Active speech + */ +WebRtc_Word16 WebRtcVad_CalcVad32khz(VadInstT* inst, WebRtc_Word16* speech_frame, + int frame_length); +WebRtc_Word16 WebRtcVad_CalcVad16khz(VadInstT* inst, WebRtc_Word16* speech_frame, + int frame_length); +WebRtc_Word16 WebRtcVad_CalcVad8khz(VadInstT* inst, WebRtc_Word16* speech_frame, + int frame_length); + +/**************************************************************************** + * WebRtcVad_GmmProbability(...) + * + * This function calculates the probabilities for background noise and + * speech using Gaussian Mixture Models. A hypothesis-test is performed to decide + * which type of signal is most probable. + * + * Input: + * - inst : Pointer to VAD instance + * - feature_vector : Feature vector = log10(energy in frequency band) + * - total_power : Total power in frame. + * - frame_length : Number of input samples + * + * Output: + * VAD decision : 0 - noise, 1 - speech + * + */ +WebRtc_Word16 WebRtcVad_GmmProbability(VadInstT* inst, WebRtc_Word16* feature_vector, + WebRtc_Word16 total_power, int frame_length); + +#endif // WEBRTC_VAD_CORE_H_ |