aboutsummaryrefslogtreecommitdiff
path: root/src/common_audio/vad/main/source/vad_core.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/common_audio/vad/main/source/vad_core.h')
-rw-r--r--src/common_audio/vad/main/source/vad_core.h132
1 files changed, 132 insertions, 0 deletions
diff --git a/src/common_audio/vad/main/source/vad_core.h b/src/common_audio/vad/main/source/vad_core.h
new file mode 100644
index 0000000000..544caf5ab3
--- /dev/null
+++ b/src/common_audio/vad/main/source/vad_core.h
@@ -0,0 +1,132 @@
+/*
+ * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/*
+ * This header file includes the descriptions of the core VAD calls.
+ */
+
+#ifndef WEBRTC_VAD_CORE_H_
+#define WEBRTC_VAD_CORE_H_
+
+#include "typedefs.h"
+#include "vad_defines.h"
+
+typedef struct VadInstT_
+{
+
+ WebRtc_Word16 vad;
+ WebRtc_Word32 downsampling_filter_states[4];
+ WebRtc_Word16 noise_means[NUM_TABLE_VALUES];
+ WebRtc_Word16 speech_means[NUM_TABLE_VALUES];
+ WebRtc_Word16 noise_stds[NUM_TABLE_VALUES];
+ WebRtc_Word16 speech_stds[NUM_TABLE_VALUES];
+ WebRtc_Word32 frame_counter;
+ WebRtc_Word16 over_hang; // Over Hang
+ WebRtc_Word16 num_of_speech;
+ WebRtc_Word16 index_vector[16 * NUM_CHANNELS];
+ WebRtc_Word16 low_value_vector[16 * NUM_CHANNELS];
+ WebRtc_Word16 mean_value[NUM_CHANNELS];
+ WebRtc_Word16 upper_state[5];
+ WebRtc_Word16 lower_state[5];
+ WebRtc_Word16 hp_filter_state[4];
+ WebRtc_Word16 over_hang_max_1[3];
+ WebRtc_Word16 over_hang_max_2[3];
+ WebRtc_Word16 individual[3];
+ WebRtc_Word16 total[3];
+
+ short init_flag;
+
+} VadInstT;
+
+/****************************************************************************
+ * WebRtcVad_InitCore(...)
+ *
+ * This function initializes a VAD instance
+ *
+ * Input:
+ * - inst : Instance that should be initialized
+ * - mode : Aggressiveness degree
+ * 0 (High quality) - 3 (Highly aggressive)
+ *
+ * Output:
+ * - inst : Initialized instance
+ *
+ * Return value : 0 - Ok
+ * -1 - Error
+ */
+int WebRtcVad_InitCore(VadInstT* inst, short mode);
+
+/****************************************************************************
+ * WebRtcVad_set_mode_core(...)
+ *
+ * This function changes the VAD settings
+ *
+ * Input:
+ * - inst : VAD instance
+ * - mode : Aggressiveness degree
+ * 0 (High quality) - 3 (Highly aggressive)
+ *
+ * Output:
+ * - inst : Changed instance
+ *
+ * Return value : 0 - Ok
+ * -1 - Error
+ */
+
+int WebRtcVad_set_mode_core(VadInstT* inst, short mode);
+
+/****************************************************************************
+ * WebRtcVad_CalcVad32khz(...)
+ * WebRtcVad_CalcVad16khz(...)
+ * WebRtcVad_CalcVad8khz(...)
+ *
+ * Calculate probability for active speech and make VAD decision.
+ *
+ * Input:
+ * - inst : Instance that should be initialized
+ * - speech_frame : Input speech frame
+ * - frame_length : Number of input samples
+ *
+ * Output:
+ * - inst : Updated filter states etc.
+ *
+ * Return value : VAD decision
+ * 0 - No active speech
+ * 1-6 - Active speech
+ */
+WebRtc_Word16 WebRtcVad_CalcVad32khz(VadInstT* inst, WebRtc_Word16* speech_frame,
+ int frame_length);
+WebRtc_Word16 WebRtcVad_CalcVad16khz(VadInstT* inst, WebRtc_Word16* speech_frame,
+ int frame_length);
+WebRtc_Word16 WebRtcVad_CalcVad8khz(VadInstT* inst, WebRtc_Word16* speech_frame,
+ int frame_length);
+
+/****************************************************************************
+ * WebRtcVad_GmmProbability(...)
+ *
+ * This function calculates the probabilities for background noise and
+ * speech using Gaussian Mixture Models. A hypothesis-test is performed to decide
+ * which type of signal is most probable.
+ *
+ * Input:
+ * - inst : Pointer to VAD instance
+ * - feature_vector : Feature vector = log10(energy in frequency band)
+ * - total_power : Total power in frame.
+ * - frame_length : Number of input samples
+ *
+ * Output:
+ * VAD decision : 0 - noise, 1 - speech
+ *
+ */
+WebRtc_Word16 WebRtcVad_GmmProbability(VadInstT* inst, WebRtc_Word16* feature_vector,
+ WebRtc_Word16 total_power, int frame_length);
+
+#endif // WEBRTC_VAD_CORE_H_