aboutsummaryrefslogtreecommitdiff
path: root/src/common_audio/vad/main
diff options
context:
space:
mode:
Diffstat (limited to 'src/common_audio/vad/main')
-rw-r--r--src/common_audio/vad/main/interface/webrtc_vad.h159
-rw-r--r--src/common_audio/vad/main/source/Android.mk64
-rw-r--r--src/common_audio/vad/main/source/vad.gyp51
-rw-r--r--src/common_audio/vad/main/source/vad_const.c80
-rw-r--r--src/common_audio/vad/main/source/vad_const.h59
-rw-r--r--src/common_audio/vad/main/source/vad_core.c685
-rw-r--r--src/common_audio/vad/main/source/vad_core.h132
-rw-r--r--src/common_audio/vad/main/source/vad_defines.h95
-rw-r--r--src/common_audio/vad/main/source/vad_filterbank.c267
-rw-r--r--src/common_audio/vad/main/source/vad_filterbank.h143
-rw-r--r--src/common_audio/vad/main/source/vad_gmm.c70
-rw-r--r--src/common_audio/vad/main/source/vad_gmm.h47
-rw-r--r--src/common_audio/vad/main/source/vad_sp.c231
-rw-r--r--src/common_audio/vad/main/source/vad_sp.h60
-rw-r--r--src/common_audio/vad/main/source/webrtc_vad.c197
-rw-r--r--src/common_audio/vad/main/test/unit_test/unit_test.cc123
-rw-r--r--src/common_audio/vad/main/test/unit_test/unit_test.h28
17 files changed, 2491 insertions, 0 deletions
diff --git a/src/common_audio/vad/main/interface/webrtc_vad.h b/src/common_audio/vad/main/interface/webrtc_vad.h
new file mode 100644
index 0000000000..6e3eb74ab5
--- /dev/null
+++ b/src/common_audio/vad/main/interface/webrtc_vad.h
@@ -0,0 +1,159 @@
+/*
+ * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/*
+ * This header file includes the VAD API calls. Specific function calls are given below.
+ */
+
+#ifndef WEBRTC_VAD_WEBRTC_VAD_H_
+#define WEBRTC_VAD_WEBRTC_VAD_H_
+
+#include "typedefs.h"
+
+typedef struct WebRtcVadInst VadInst;
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+/****************************************************************************
+ * WebRtcVad_get_version(...)
+ *
+ * This function returns the version number of the code.
+ *
+ * Output:
+ * - version : Pointer to a buffer where the version info will
+ * be stored.
+ * Input:
+ * - size_bytes : Size of the buffer.
+ *
+ */
+WebRtc_Word16 WebRtcVad_get_version(char *version, size_t size_bytes);
+
+/****************************************************************************
+ * WebRtcVad_AssignSize(...)
+ *
+ * This functions get the size needed for storing the instance for encoder
+ * and decoder, respectively
+ *
+ * Input/Output:
+ * - size_in_bytes : Pointer to integer where the size is returned
+ *
+ * Return value : 0
+ */
+WebRtc_Word16 WebRtcVad_AssignSize(int *size_in_bytes);
+
+/****************************************************************************
+ * WebRtcVad_Assign(...)
+ *
+ * This functions Assigns memory for the instances.
+ *
+ * Input:
+ * - vad_inst_addr : Address to where to assign memory
+ * Output:
+ * - vad_inst : Pointer to the instance that should be created
+ *
+ * Return value : 0 - Ok
+ * -1 - Error
+ */
+WebRtc_Word16 WebRtcVad_Assign(VadInst **vad_inst, void *vad_inst_addr);
+
+/****************************************************************************
+ * WebRtcVad_Create(...)
+ *
+ * This function creates an instance to the VAD structure
+ *
+ * Input:
+ * - vad_inst : Pointer to VAD instance that should be created
+ *
+ * Output:
+ * - vad_inst : Pointer to created VAD instance
+ *
+ * Return value : 0 - Ok
+ * -1 - Error
+ */
+WebRtc_Word16 WebRtcVad_Create(VadInst **vad_inst);
+
+/****************************************************************************
+ * WebRtcVad_Free(...)
+ *
+ * This function frees the dynamic memory of a specified VAD instance
+ *
+ * Input:
+ * - vad_inst : Pointer to VAD instance that should be freed
+ *
+ * Return value : 0 - Ok
+ * -1 - Error
+ */
+WebRtc_Word16 WebRtcVad_Free(VadInst *vad_inst);
+
+/****************************************************************************
+ * WebRtcVad_Init(...)
+ *
+ * This function initializes a VAD instance
+ *
+ * Input:
+ * - vad_inst : Instance that should be initialized
+ *
+ * Output:
+ * - vad_inst : Initialized instance
+ *
+ * Return value : 0 - Ok
+ * -1 - Error
+ */
+WebRtc_Word16 WebRtcVad_Init(VadInst *vad_inst);
+
+/****************************************************************************
+ * WebRtcVad_set_mode(...)
+ *
+ * This function initializes a VAD instance
+ *
+ * Input:
+ * - vad_inst : VAD instance
+ * - mode : Aggressiveness setting (0, 1, 2, or 3)
+ *
+ * Output:
+ * - vad_inst : Initialized instance
+ *
+ * Return value : 0 - Ok
+ * -1 - Error
+ */
+WebRtc_Word16 WebRtcVad_set_mode(VadInst *vad_inst, WebRtc_Word16 mode);
+
+/****************************************************************************
+ * WebRtcVad_Process(...)
+ *
+ * This functions does a VAD for the inserted speech frame
+ *
+ * Input
+ * - vad_inst : VAD Instance. Needs to be initiated before call.
+ * - fs : sampling frequency (Hz): 8000, 16000, or 32000
+ * - speech_frame : Pointer to speech frame buffer
+ * - frame_length : Length of speech frame buffer in number of samples
+ *
+ * Output:
+ * - vad_inst : Updated VAD instance
+ *
+ * Return value : 1 - Active Voice
+ * 0 - Non-active Voice
+ * -1 - Error
+ */
+WebRtc_Word16 WebRtcVad_Process(VadInst *vad_inst,
+ WebRtc_Word16 fs,
+ WebRtc_Word16 *speech_frame,
+ WebRtc_Word16 frame_length);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // WEBRTC_VAD_WEBRTC_VAD_H_
diff --git a/src/common_audio/vad/main/source/Android.mk b/src/common_audio/vad/main/source/Android.mk
new file mode 100644
index 0000000000..f52df935d1
--- /dev/null
+++ b/src/common_audio/vad/main/source/Android.mk
@@ -0,0 +1,64 @@
+# Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+#
+# Use of this source code is governed by a BSD-style license
+# that can be found in the LICENSE file in the root of the source
+# tree. An additional intellectual property rights grant can be found
+# in the file PATENTS. All contributing project authors may
+# be found in the AUTHORS file in the root of the source tree.
+
+LOCAL_PATH := $(call my-dir)
+
+include $(CLEAR_VARS)
+
+LOCAL_ARM_MODE := arm
+LOCAL_MODULE_CLASS := STATIC_LIBRARIES
+LOCAL_MODULE := libwebrtc_vad
+LOCAL_MODULE_TAGS := optional
+LOCAL_GENERATED_SOURCES :=
+LOCAL_SRC_FILES := webrtc_vad.c \
+ vad_const.c \
+ vad_core.c \
+ vad_filterbank.c \
+ vad_gmm.c \
+ vad_sp.c
+
+# Flags passed to both C and C++ files.
+MY_CFLAGS :=
+MY_CFLAGS_C :=
+MY_DEFS := '-DNO_TCMALLOC' \
+ '-DNO_HEAPCHECKER' \
+ '-DWEBRTC_TARGET_PC' \
+ '-DWEBRTC_LINUX'
+ifeq ($(TARGET_ARCH),arm)
+MY_DEFS += \
+ '-DWEBRTC_ANDROID' \
+ '-DANDROID'
+endif
+LOCAL_CFLAGS := $(MY_CFLAGS_C) $(MY_CFLAGS) $(MY_DEFS)
+
+# Include paths placed before CFLAGS/CPPFLAGS
+LOCAL_C_INCLUDES := $(LOCAL_PATH)/../../../.. \
+ $(LOCAL_PATH)/../interface \
+ $(LOCAL_PATH)/../../../signal_processing_library/main/interface
+
+# Flags passed to only C++ (and not C) files.
+LOCAL_CPPFLAGS :=
+
+LOCAL_LDFLAGS :=
+
+LOCAL_STATIC_LIBRARIES :=
+
+LOCAL_SHARED_LIBRARIES := libdl \
+ libstlport
+LOCAL_ADDITIONAL_DEPENDENCIES :=
+
+ifeq ($(TARGET_OS)-$(TARGET_SIMULATOR),linux-true)
+LOCAL_LDLIBS += -ldl -lpthread
+endif
+
+ifneq ($(TARGET_SIMULATOR),true)
+LOCAL_SHARED_LIBRARIES += libdl
+endif
+
+include external/stlport/libstlport.mk
+include $(BUILD_STATIC_LIBRARY)
diff --git a/src/common_audio/vad/main/source/vad.gyp b/src/common_audio/vad/main/source/vad.gyp
new file mode 100644
index 0000000000..754b684d5b
--- /dev/null
+++ b/src/common_audio/vad/main/source/vad.gyp
@@ -0,0 +1,51 @@
+# Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+#
+# Use of this source code is governed by a BSD-style license
+# that can be found in the LICENSE file in the root of the source
+# tree. An additional intellectual property rights grant can be found
+# in the file PATENTS. All contributing project authors may
+# be found in the AUTHORS file in the root of the source tree.
+
+{
+ 'includes': [
+ '../../../../common_settings.gypi', # Common settings
+ ],
+ 'targets': [
+ {
+ 'target_name': 'vad',
+ 'type': '<(library)',
+ 'dependencies': [
+ '../../../signal_processing_library/main/source/spl.gyp:spl',
+ ],
+ 'include_dirs': [
+ '../interface',
+ ],
+ 'direct_dependent_settings': {
+ 'include_dirs': [
+ '../interface',
+ ],
+ },
+ 'sources': [
+ '../interface/webrtc_vad.h',
+ 'webrtc_vad.c',
+ 'vad_const.c',
+ 'vad_const.h',
+ 'vad_defines.h',
+ 'vad_core.c',
+ 'vad_core.h',
+ 'vad_filterbank.c',
+ 'vad_filterbank.h',
+ 'vad_gmm.c',
+ 'vad_gmm.h',
+ 'vad_sp.c',
+ 'vad_sp.h',
+ ],
+ },
+ ],
+}
+
+# Local Variables:
+# tab-width:2
+# indent-tabs-mode:nil
+# End:
+# vim: set expandtab tabstop=2 shiftwidth=2:
diff --git a/src/common_audio/vad/main/source/vad_const.c b/src/common_audio/vad/main/source/vad_const.c
new file mode 100644
index 0000000000..47b6a4b8ca
--- /dev/null
+++ b/src/common_audio/vad/main/source/vad_const.c
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+/*
+ * This file includes the constant values used internally in VAD.
+ */
+
+#include "vad_const.h"
+
+// Spectrum Weighting
+const WebRtc_Word16 kSpectrumWeight[6] = {6, 8, 10, 12, 14, 16};
+
+const WebRtc_Word16 kCompVar = 22005;
+
+// Constant 160*log10(2) in Q9
+const WebRtc_Word16 kLogConst = 24660;
+
+// Constant log2(exp(1)) in Q12
+const WebRtc_Word16 kLog10Const = 5909;
+
+// Q15
+const WebRtc_Word16 kNoiseUpdateConst = 655;
+const WebRtc_Word16 kSpeechUpdateConst = 6554;
+
+// Q8
+const WebRtc_Word16 kBackEta = 154;
+
+// Coefficients used by WebRtcVad_HpOutput, Q14
+const WebRtc_Word16 kHpZeroCoefs[3] = {6631, -13262, 6631};
+const WebRtc_Word16 kHpPoleCoefs[3] = {16384, -7756, 5620};
+
+// Allpass filter coefficients, upper and lower, in Q15
+// Upper: 0.64, Lower: 0.17
+const WebRtc_Word16 kAllPassCoefsQ15[2] = {20972, 5571};
+const WebRtc_Word16 kAllPassCoefsQ13[2] = {5243, 1392}; // Q13
+
+// Minimum difference between the two models, Q5
+const WebRtc_Word16 kMinimumDifference[6] = {544, 544, 576, 576, 576, 576};
+
+// Upper limit of mean value for speech model, Q7
+const WebRtc_Word16 kMaximumSpeech[6] = {11392, 11392, 11520, 11520, 11520, 11520};
+
+// Minimum value for mean value
+const WebRtc_Word16 kMinimumMean[2] = {640, 768};
+
+// Upper limit of mean value for noise model, Q7
+const WebRtc_Word16 kMaximumNoise[6] = {9216, 9088, 8960, 8832, 8704, 8576};
+
+// Adjustment for division with two in WebRtcVad_SplitFilter
+const WebRtc_Word16 kOffsetVector[6] = {368, 368, 272, 176, 176, 176};
+
+// Start values for the Gaussian models, Q7
+// Weights for the two Gaussians for the six channels (noise)
+const WebRtc_Word16 kNoiseDataWeights[12] = {34, 62, 72, 66, 53, 25, 94, 66, 56, 62, 75, 103};
+
+// Weights for the two Gaussians for the six channels (speech)
+const WebRtc_Word16 kSpeechDataWeights[12] = {48, 82, 45, 87, 50, 47, 80, 46, 83, 41, 78, 81};
+
+// Means for the two Gaussians for the six channels (noise)
+const WebRtc_Word16 kNoiseDataMeans[12] = {6738, 4892, 7065, 6715, 6771, 3369, 7646, 3863,
+ 7820, 7266, 5020, 4362};
+
+// Means for the two Gaussians for the six channels (speech)
+const WebRtc_Word16 kSpeechDataMeans[12] = {8306, 10085, 10078, 11823, 11843, 6309, 9473,
+ 9571, 10879, 7581, 8180, 7483};
+
+// Stds for the two Gaussians for the six channels (noise)
+const WebRtc_Word16 kNoiseDataStds[12] = {378, 1064, 493, 582, 688, 593, 474, 697, 475, 688,
+ 421, 455};
+
+// Stds for the two Gaussians for the six channels (speech)
+const WebRtc_Word16 kSpeechDataStds[12] = {555, 505, 567, 524, 585, 1231, 509, 828, 492, 1540,
+ 1079, 850};
diff --git a/src/common_audio/vad/main/source/vad_const.h b/src/common_audio/vad/main/source/vad_const.h
new file mode 100644
index 0000000000..89804379be
--- /dev/null
+++ b/src/common_audio/vad/main/source/vad_const.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/*
+ * This header file includes the declarations of the internally used constants.
+ */
+
+#ifndef WEBRTC_VAD_CONST_H_
+#define WEBRTC_VAD_CONST_H_
+
+#include "typedefs.h"
+
+// TODO(ajm): give these internal-linkage by moving to the appropriate file
+// where possible, and otherwise tag with WebRtcVad_.
+
+// Spectrum Weighting
+extern const WebRtc_Word16 kSpectrumWeight[];
+extern const WebRtc_Word16 kCompVar;
+// Logarithm constant
+extern const WebRtc_Word16 kLogConst;
+extern const WebRtc_Word16 kLog10Const;
+// Q15
+extern const WebRtc_Word16 kNoiseUpdateConst;
+extern const WebRtc_Word16 kSpeechUpdateConst;
+// Q8
+extern const WebRtc_Word16 kBackEta;
+// Coefficients used by WebRtcVad_HpOutput, Q14
+extern const WebRtc_Word16 kHpZeroCoefs[];
+extern const WebRtc_Word16 kHpPoleCoefs[];
+// Allpass filter coefficients, upper and lower, in Q15 resp. Q13
+extern const WebRtc_Word16 kAllPassCoefsQ15[];
+extern const WebRtc_Word16 kAllPassCoefsQ13[];
+// Minimum difference between the two models, Q5
+extern const WebRtc_Word16 kMinimumDifference[];
+// Maximum value when updating the speech model, Q7
+extern const WebRtc_Word16 kMaximumSpeech[];
+// Minimum value for mean value
+extern const WebRtc_Word16 kMinimumMean[];
+// Upper limit of mean value for noise model, Q7
+extern const WebRtc_Word16 kMaximumNoise[];
+// Adjustment for division with two in WebRtcVad_SplitFilter
+extern const WebRtc_Word16 kOffsetVector[];
+// Start values for the Gaussian models, Q7
+extern const WebRtc_Word16 kNoiseDataWeights[];
+extern const WebRtc_Word16 kSpeechDataWeights[];
+extern const WebRtc_Word16 kNoiseDataMeans[];
+extern const WebRtc_Word16 kSpeechDataMeans[];
+extern const WebRtc_Word16 kNoiseDataStds[];
+extern const WebRtc_Word16 kSpeechDataStds[];
+
+#endif // WEBRTC_VAD_CONST_H_
diff --git a/src/common_audio/vad/main/source/vad_core.c b/src/common_audio/vad/main/source/vad_core.c
new file mode 100644
index 0000000000..e8829993d5
--- /dev/null
+++ b/src/common_audio/vad/main/source/vad_core.c
@@ -0,0 +1,685 @@
+/*
+ * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/*
+ * This file includes the implementation of the core functionality in VAD.
+ * For function description, see vad_core.h.
+ */
+
+#include "vad_core.h"
+#include "vad_const.h"
+#include "vad_defines.h"
+#include "vad_filterbank.h"
+#include "vad_gmm.h"
+#include "vad_sp.h"
+#include "signal_processing_library.h"
+
+static const int kInitCheck = 42;
+
+// Initialize VAD
+int WebRtcVad_InitCore(VadInstT *inst, short mode)
+{
+ int i;
+
+ // Initialization of struct
+ inst->vad = 1;
+ inst->frame_counter = 0;
+ inst->over_hang = 0;
+ inst->num_of_speech = 0;
+
+ // Initialization of downsampling filter state
+ inst->downsampling_filter_states[0] = 0;
+ inst->downsampling_filter_states[1] = 0;
+ inst->downsampling_filter_states[2] = 0;
+ inst->downsampling_filter_states[3] = 0;
+
+ // Read initial PDF parameters
+ for (i = 0; i < NUM_TABLE_VALUES; i++)
+ {
+ inst->noise_means[i] = kNoiseDataMeans[i];
+ inst->speech_means[i] = kSpeechDataMeans[i];
+ inst->noise_stds[i] = kNoiseDataStds[i];
+ inst->speech_stds[i] = kSpeechDataStds[i];
+ }
+
+ // Index and Minimum value vectors are initialized
+ for (i = 0; i < 16 * NUM_CHANNELS; i++)
+ {
+ inst->low_value_vector[i] = 10000;
+ inst->index_vector[i] = 0;
+ }
+
+ for (i = 0; i < 5; i++)
+ {
+ inst->upper_state[i] = 0;
+ inst->lower_state[i] = 0;
+ }
+
+ for (i = 0; i < 4; i++)
+ {
+ inst->hp_filter_state[i] = 0;
+ }
+
+ // Init mean value memory, for FindMin function
+ inst->mean_value[0] = 1600;
+ inst->mean_value[1] = 1600;
+ inst->mean_value[2] = 1600;
+ inst->mean_value[3] = 1600;
+ inst->mean_value[4] = 1600;
+ inst->mean_value[5] = 1600;
+
+ if (mode == 0)
+ {
+ // Quality mode
+ inst->over_hang_max_1[0] = OHMAX1_10MS_Q; // Overhang short speech burst
+ inst->over_hang_max_1[1] = OHMAX1_20MS_Q; // Overhang short speech burst
+ inst->over_hang_max_1[2] = OHMAX1_30MS_Q; // Overhang short speech burst
+ inst->over_hang_max_2[0] = OHMAX2_10MS_Q; // Overhang long speech burst
+ inst->over_hang_max_2[1] = OHMAX2_20MS_Q; // Overhang long speech burst
+ inst->over_hang_max_2[2] = OHMAX2_30MS_Q; // Overhang long speech burst
+
+ inst->individual[0] = INDIVIDUAL_10MS_Q;
+ inst->individual[1] = INDIVIDUAL_20MS_Q;
+ inst->individual[2] = INDIVIDUAL_30MS_Q;
+
+ inst->total[0] = TOTAL_10MS_Q;
+ inst->total[1] = TOTAL_20MS_Q;
+ inst->total[2] = TOTAL_30MS_Q;
+ } else if (mode == 1)
+ {
+ // Low bitrate mode
+ inst->over_hang_max_1[0] = OHMAX1_10MS_LBR; // Overhang short speech burst
+ inst->over_hang_max_1[1] = OHMAX1_20MS_LBR; // Overhang short speech burst
+ inst->over_hang_max_1[2] = OHMAX1_30MS_LBR; // Overhang short speech burst
+ inst->over_hang_max_2[0] = OHMAX2_10MS_LBR; // Overhang long speech burst
+ inst->over_hang_max_2[1] = OHMAX2_20MS_LBR; // Overhang long speech burst
+ inst->over_hang_max_2[2] = OHMAX2_30MS_LBR; // Overhang long speech burst
+
+ inst->individual[0] = INDIVIDUAL_10MS_LBR;
+ inst->individual[1] = INDIVIDUAL_20MS_LBR;
+ inst->individual[2] = INDIVIDUAL_30MS_LBR;
+
+ inst->total[0] = TOTAL_10MS_LBR;
+ inst->total[1] = TOTAL_20MS_LBR;
+ inst->total[2] = TOTAL_30MS_LBR;
+ } else if (mode == 2)
+ {
+ // Aggressive mode
+ inst->over_hang_max_1[0] = OHMAX1_10MS_AGG; // Overhang short speech burst
+ inst->over_hang_max_1[1] = OHMAX1_20MS_AGG; // Overhang short speech burst
+ inst->over_hang_max_1[2] = OHMAX1_30MS_AGG; // Overhang short speech burst
+ inst->over_hang_max_2[0] = OHMAX2_10MS_AGG; // Overhang long speech burst
+ inst->over_hang_max_2[1] = OHMAX2_20MS_AGG; // Overhang long speech burst
+ inst->over_hang_max_2[2] = OHMAX2_30MS_AGG; // Overhang long speech burst
+
+ inst->individual[0] = INDIVIDUAL_10MS_AGG;
+ inst->individual[1] = INDIVIDUAL_20MS_AGG;
+ inst->individual[2] = INDIVIDUAL_30MS_AGG;
+
+ inst->total[0] = TOTAL_10MS_AGG;
+ inst->total[1] = TOTAL_20MS_AGG;
+ inst->total[2] = TOTAL_30MS_AGG;
+ } else
+ {
+ // Very aggressive mode
+ inst->over_hang_max_1[0] = OHMAX1_10MS_VAG; // Overhang short speech burst
+ inst->over_hang_max_1[1] = OHMAX1_20MS_VAG; // Overhang short speech burst
+ inst->over_hang_max_1[2] = OHMAX1_30MS_VAG; // Overhang short speech burst
+ inst->over_hang_max_2[0] = OHMAX2_10MS_VAG; // Overhang long speech burst
+ inst->over_hang_max_2[1] = OHMAX2_20MS_VAG; // Overhang long speech burst
+ inst->over_hang_max_2[2] = OHMAX2_30MS_VAG; // Overhang long speech burst
+
+ inst->individual[0] = INDIVIDUAL_10MS_VAG;
+ inst->individual[1] = INDIVIDUAL_20MS_VAG;
+ inst->individual[2] = INDIVIDUAL_30MS_VAG;
+
+ inst->total[0] = TOTAL_10MS_VAG;
+ inst->total[1] = TOTAL_20MS_VAG;
+ inst->total[2] = TOTAL_30MS_VAG;
+ }
+
+ inst->init_flag = kInitCheck;
+
+ return 0;
+}
+
+// Set aggressiveness mode
+int WebRtcVad_set_mode_core(VadInstT *inst, short mode)
+{
+
+ if (mode == 0)
+ {
+ // Quality mode
+ inst->over_hang_max_1[0] = OHMAX1_10MS_Q; // Overhang short speech burst
+ inst->over_hang_max_1[1] = OHMAX1_20MS_Q; // Overhang short speech burst
+ inst->over_hang_max_1[2] = OHMAX1_30MS_Q; // Overhang short speech burst
+ inst->over_hang_max_2[0] = OHMAX2_10MS_Q; // Overhang long speech burst
+ inst->over_hang_max_2[1] = OHMAX2_20MS_Q; // Overhang long speech burst
+ inst->over_hang_max_2[2] = OHMAX2_30MS_Q; // Overhang long speech burst
+
+ inst->individual[0] = INDIVIDUAL_10MS_Q;
+ inst->individual[1] = INDIVIDUAL_20MS_Q;
+ inst->individual[2] = INDIVIDUAL_30MS_Q;
+
+ inst->total[0] = TOTAL_10MS_Q;
+ inst->total[1] = TOTAL_20MS_Q;
+ inst->total[2] = TOTAL_30MS_Q;
+ } else if (mode == 1)
+ {
+ // Low bitrate mode
+ inst->over_hang_max_1[0] = OHMAX1_10MS_LBR; // Overhang short speech burst
+ inst->over_hang_max_1[1] = OHMAX1_20MS_LBR; // Overhang short speech burst
+ inst->over_hang_max_1[2] = OHMAX1_30MS_LBR; // Overhang short speech burst
+ inst->over_hang_max_2[0] = OHMAX2_10MS_LBR; // Overhang long speech burst
+ inst->over_hang_max_2[1] = OHMAX2_20MS_LBR; // Overhang long speech burst
+ inst->over_hang_max_2[2] = OHMAX2_30MS_LBR; // Overhang long speech burst
+
+ inst->individual[0] = INDIVIDUAL_10MS_LBR;
+ inst->individual[1] = INDIVIDUAL_20MS_LBR;
+ inst->individual[2] = INDIVIDUAL_30MS_LBR;
+
+ inst->total[0] = TOTAL_10MS_LBR;
+ inst->total[1] = TOTAL_20MS_LBR;
+ inst->total[2] = TOTAL_30MS_LBR;
+ } else if (mode == 2)
+ {
+ // Aggressive mode
+ inst->over_hang_max_1[0] = OHMAX1_10MS_AGG; // Overhang short speech burst
+ inst->over_hang_max_1[1] = OHMAX1_20MS_AGG; // Overhang short speech burst
+ inst->over_hang_max_1[2] = OHMAX1_30MS_AGG; // Overhang short speech burst
+ inst->over_hang_max_2[0] = OHMAX2_10MS_AGG; // Overhang long speech burst
+ inst->over_hang_max_2[1] = OHMAX2_20MS_AGG; // Overhang long speech burst
+ inst->over_hang_max_2[2] = OHMAX2_30MS_AGG; // Overhang long speech burst
+
+ inst->individual[0] = INDIVIDUAL_10MS_AGG;
+ inst->individual[1] = INDIVIDUAL_20MS_AGG;
+ inst->individual[2] = INDIVIDUAL_30MS_AGG;
+
+ inst->total[0] = TOTAL_10MS_AGG;
+ inst->total[1] = TOTAL_20MS_AGG;
+ inst->total[2] = TOTAL_30MS_AGG;
+ } else if (mode == 3)
+ {
+ // Very aggressive mode
+ inst->over_hang_max_1[0] = OHMAX1_10MS_VAG; // Overhang short speech burst
+ inst->over_hang_max_1[1] = OHMAX1_20MS_VAG; // Overhang short speech burst
+ inst->over_hang_max_1[2] = OHMAX1_30MS_VAG; // Overhang short speech burst
+ inst->over_hang_max_2[0] = OHMAX2_10MS_VAG; // Overhang long speech burst
+ inst->over_hang_max_2[1] = OHMAX2_20MS_VAG; // Overhang long speech burst
+ inst->over_hang_max_2[2] = OHMAX2_30MS_VAG; // Overhang long speech burst
+
+ inst->individual[0] = INDIVIDUAL_10MS_VAG;
+ inst->individual[1] = INDIVIDUAL_20MS_VAG;
+ inst->individual[2] = INDIVIDUAL_30MS_VAG;
+
+ inst->total[0] = TOTAL_10MS_VAG;
+ inst->total[1] = TOTAL_20MS_VAG;
+ inst->total[2] = TOTAL_30MS_VAG;
+ } else
+ {
+ return -1;
+ }
+
+ return 0;
+}
+
+// Calculate VAD decision by first extracting feature values and then calculate
+// probability for both speech and background noise.
+
+WebRtc_Word16 WebRtcVad_CalcVad32khz(VadInstT *inst, WebRtc_Word16 *speech_frame,
+ int frame_length)
+{
+ WebRtc_Word16 len, vad;
+ WebRtc_Word16 speechWB[480]; // Downsampled speech frame: 960 samples (30ms in SWB)
+ WebRtc_Word16 speechNB[240]; // Downsampled speech frame: 480 samples (30ms in WB)
+
+
+ // Downsample signal 32->16->8 before doing VAD
+ WebRtcVad_Downsampling(speech_frame, speechWB, &(inst->downsampling_filter_states[2]),
+ frame_length);
+ len = WEBRTC_SPL_RSHIFT_W16(frame_length, 1);
+
+ WebRtcVad_Downsampling(speechWB, speechNB, inst->downsampling_filter_states, len);
+ len = WEBRTC_SPL_RSHIFT_W16(len, 1);
+
+ // Do VAD on an 8 kHz signal
+ vad = WebRtcVad_CalcVad8khz(inst, speechNB, len);
+
+ return vad;
+}
+
+WebRtc_Word16 WebRtcVad_CalcVad16khz(VadInstT *inst, WebRtc_Word16 *speech_frame,
+ int frame_length)
+{
+ WebRtc_Word16 len, vad;
+ WebRtc_Word16 speechNB[240]; // Downsampled speech frame: 480 samples (30ms in WB)
+
+ // Wideband: Downsample signal before doing VAD
+ WebRtcVad_Downsampling(speech_frame, speechNB, inst->downsampling_filter_states,
+ frame_length);
+
+ len = WEBRTC_SPL_RSHIFT_W16(frame_length, 1);
+ vad = WebRtcVad_CalcVad8khz(inst, speechNB, len);
+
+ return vad;
+}
+
+WebRtc_Word16 WebRtcVad_CalcVad8khz(VadInstT *inst, WebRtc_Word16 *speech_frame,
+ int frame_length)
+{
+ WebRtc_Word16 feature_vector[NUM_CHANNELS], total_power;
+
+ // Get power in the bands
+ total_power = WebRtcVad_get_features(inst, speech_frame, frame_length, feature_vector);
+
+ // Make a VAD
+ inst->vad = WebRtcVad_GmmProbability(inst, feature_vector, total_power, frame_length);
+
+ return inst->vad;
+}
+
+// Calculate probability for both speech and background noise, and perform a
+// hypothesis-test.
+WebRtc_Word16 WebRtcVad_GmmProbability(VadInstT *inst, WebRtc_Word16 *feature_vector,
+ WebRtc_Word16 total_power, int frame_length)
+{
+ int n, k;
+ WebRtc_Word16 backval;
+ WebRtc_Word16 h0, h1;
+ WebRtc_Word16 ratvec, xval;
+ WebRtc_Word16 vadflag;
+ WebRtc_Word16 shifts0, shifts1;
+ WebRtc_Word16 tmp16, tmp16_1, tmp16_2;
+ WebRtc_Word16 diff, nr, pos;
+ WebRtc_Word16 nmk, nmk2, nmk3, smk, smk2, nsk, ssk;
+ WebRtc_Word16 delt, ndelt;
+ WebRtc_Word16 maxspe, maxmu;
+ WebRtc_Word16 deltaN[NUM_TABLE_VALUES], deltaS[NUM_TABLE_VALUES];
+ WebRtc_Word16 ngprvec[NUM_TABLE_VALUES], sgprvec[NUM_TABLE_VALUES];
+ WebRtc_Word32 h0test, h1test;
+ WebRtc_Word32 tmp32_1, tmp32_2;
+ WebRtc_Word32 dotVal;
+ WebRtc_Word32 nmid, smid;
+ WebRtc_Word32 probn[NUM_MODELS], probs[NUM_MODELS];
+ WebRtc_Word16 *nmean1ptr, *nmean2ptr, *smean1ptr, *smean2ptr, *nstd1ptr, *nstd2ptr,
+ *sstd1ptr, *sstd2ptr;
+ WebRtc_Word16 overhead1, overhead2, individualTest, totalTest;
+
+ // Set the thresholds to different values based on frame length
+ if (frame_length == 80)
+ {
+ // 80 input samples
+ overhead1 = inst->over_hang_max_1[0];
+ overhead2 = inst->over_hang_max_2[0];
+ individualTest = inst->individual[0];
+ totalTest = inst->total[0];
+ } else if (frame_length == 160)
+ {
+ // 160 input samples
+ overhead1 = inst->over_hang_max_1[1];
+ overhead2 = inst->over_hang_max_2[1];
+ individualTest = inst->individual[1];
+ totalTest = inst->total[1];
+ } else
+ {
+ // 240 input samples
+ overhead1 = inst->over_hang_max_1[2];
+ overhead2 = inst->over_hang_max_2[2];
+ individualTest = inst->individual[2];
+ totalTest = inst->total[2];
+ }
+
+ if (total_power > MIN_ENERGY)
+ { // If signal present at all
+
+ // Set pointers to the gaussian parameters
+ nmean1ptr = &inst->noise_means[0];
+ nmean2ptr = &inst->noise_means[NUM_CHANNELS];
+ smean1ptr = &inst->speech_means[0];
+ smean2ptr = &inst->speech_means[NUM_CHANNELS];
+ nstd1ptr = &inst->noise_stds[0];
+ nstd2ptr = &inst->noise_stds[NUM_CHANNELS];
+ sstd1ptr = &inst->speech_stds[0];
+ sstd2ptr = &inst->speech_stds[NUM_CHANNELS];
+
+ vadflag = 0;
+ dotVal = 0;
+ for (n = 0; n < NUM_CHANNELS; n++)
+ { // For all channels
+
+ pos = WEBRTC_SPL_LSHIFT_W16(n, 1);
+ xval = feature_vector[n];
+
+ // Probability for Noise, Q7 * Q20 = Q27
+ tmp32_1 = WebRtcVad_GaussianProbability(xval, *nmean1ptr++, *nstd1ptr++,
+ &deltaN[pos]);
+ probn[0] = (WebRtc_Word32)(kNoiseDataWeights[n] * tmp32_1);
+ tmp32_1 = WebRtcVad_GaussianProbability(xval, *nmean2ptr++, *nstd2ptr++,
+ &deltaN[pos + 1]);
+ probn[1] = (WebRtc_Word32)(kNoiseDataWeights[n + NUM_CHANNELS] * tmp32_1);
+ h0test = probn[0] + probn[1]; // Q27
+ h0 = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(h0test, 12); // Q15
+
+ // Probability for Speech
+ tmp32_1 = WebRtcVad_GaussianProbability(xval, *smean1ptr++, *sstd1ptr++,
+ &deltaS[pos]);
+ probs[0] = (WebRtc_Word32)(kSpeechDataWeights[n] * tmp32_1);
+ tmp32_1 = WebRtcVad_GaussianProbability(xval, *smean2ptr++, *sstd2ptr++,
+ &deltaS[pos + 1]);
+ probs[1] = (WebRtc_Word32)(kSpeechDataWeights[n + NUM_CHANNELS] * tmp32_1);
+ h1test = probs[0] + probs[1]; // Q27
+ h1 = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(h1test, 12); // Q15
+
+ // Get likelihood ratio. Approximate log2(H1/H0) with shifts0 - shifts1
+ shifts0 = WebRtcSpl_NormW32(h0test);
+ shifts1 = WebRtcSpl_NormW32(h1test);
+
+ if ((h0test > 0) && (h1test > 0))
+ {
+ ratvec = shifts0 - shifts1;
+ } else if (h1test > 0)
+ {
+ ratvec = 31 - shifts1;
+ } else if (h0test > 0)
+ {
+ ratvec = shifts0 - 31;
+ } else
+ {
+ ratvec = 0;
+ }
+
+ // VAD decision with spectrum weighting
+ dotVal += WEBRTC_SPL_MUL_16_16(ratvec, kSpectrumWeight[n]);
+
+ // Individual channel test
+ if ((ratvec << 2) > individualTest)
+ {
+ vadflag = 1;
+ }
+
+ // Probabilities used when updating model
+ if (h0 > 0)
+ {
+ tmp32_1 = probn[0] & 0xFFFFF000; // Q27
+ tmp32_2 = WEBRTC_SPL_LSHIFT_W32(tmp32_1, 2); // Q29
+ ngprvec[pos] = (WebRtc_Word16)WebRtcSpl_DivW32W16(tmp32_2, h0);
+ ngprvec[pos + 1] = 16384 - ngprvec[pos];
+ } else
+ {
+ ngprvec[pos] = 16384;
+ ngprvec[pos + 1] = 0;
+ }
+
+ // Probabilities used when updating model
+ if (h1 > 0)
+ {
+ tmp32_1 = probs[0] & 0xFFFFF000;
+ tmp32_2 = WEBRTC_SPL_LSHIFT_W32(tmp32_1, 2);
+ sgprvec[pos] = (WebRtc_Word16)WebRtcSpl_DivW32W16(tmp32_2, h1);
+ sgprvec[pos + 1] = 16384 - sgprvec[pos];
+ } else
+ {
+ sgprvec[pos] = 0;
+ sgprvec[pos + 1] = 0;
+ }
+ }
+
+ // Overall test
+ if (dotVal >= totalTest)
+ {
+ vadflag |= 1;
+ }
+
+ // Set pointers to the means and standard deviations.
+ nmean1ptr = &inst->noise_means[0];
+ smean1ptr = &inst->speech_means[0];
+ nstd1ptr = &inst->noise_stds[0];
+ sstd1ptr = &inst->speech_stds[0];
+
+ maxspe = 12800;
+
+ // Update the model's parameters
+ for (n = 0; n < NUM_CHANNELS; n++)
+ {
+
+ pos = WEBRTC_SPL_LSHIFT_W16(n, 1);
+
+ // Get min value in past which is used for long term correction
+ backval = WebRtcVad_FindMinimum(inst, feature_vector[n], n); // Q4
+
+ // Compute the "global" mean, that is the sum of the two means weighted
+ nmid = WEBRTC_SPL_MUL_16_16(kNoiseDataWeights[n], *nmean1ptr); // Q7 * Q7
+ nmid += WEBRTC_SPL_MUL_16_16(kNoiseDataWeights[n+NUM_CHANNELS],
+ *(nmean1ptr+NUM_CHANNELS));
+ tmp16_1 = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(nmid, 6); // Q8
+
+ for (k = 0; k < NUM_MODELS; k++)
+ {
+
+ nr = pos + k;
+
+ nmean2ptr = nmean1ptr + k * NUM_CHANNELS;
+ smean2ptr = smean1ptr + k * NUM_CHANNELS;
+ nstd2ptr = nstd1ptr + k * NUM_CHANNELS;
+ sstd2ptr = sstd1ptr + k * NUM_CHANNELS;
+ nmk = *nmean2ptr;
+ smk = *smean2ptr;
+ nsk = *nstd2ptr;
+ ssk = *sstd2ptr;
+
+ // Update noise mean vector if the frame consists of noise only
+ nmk2 = nmk;
+ if (!vadflag)
+ {
+ // deltaN = (x-mu)/sigma^2
+ // ngprvec[k] = probn[k]/(probn[0] + probn[1])
+
+ delt = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(ngprvec[nr],
+ deltaN[nr], 11); // Q14*Q11
+ nmk2 = nmk + (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(delt,
+ kNoiseUpdateConst,
+ 22); // Q7+(Q14*Q15>>22)
+ }
+
+ // Long term correction of the noise mean
+ ndelt = WEBRTC_SPL_LSHIFT_W16(backval, 4);
+ ndelt -= tmp16_1; // Q8 - Q8
+ nmk3 = nmk2 + (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(ndelt,
+ kBackEta,
+ 9); // Q7+(Q8*Q8)>>9
+
+ // Control that the noise mean does not drift to much
+ tmp16 = WEBRTC_SPL_LSHIFT_W16(k+5, 7);
+ if (nmk3 < tmp16)
+ nmk3 = tmp16;
+ tmp16 = WEBRTC_SPL_LSHIFT_W16(72+k-n, 7);
+ if (nmk3 > tmp16)
+ nmk3 = tmp16;
+ *nmean2ptr = nmk3;
+
+ if (vadflag)
+ {
+ // Update speech mean vector:
+ // deltaS = (x-mu)/sigma^2
+ // sgprvec[k] = probn[k]/(probn[0] + probn[1])
+
+ delt = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(sgprvec[nr],
+ deltaS[nr],
+ 11); // (Q14*Q11)>>11=Q14
+ tmp16 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(delt,
+ kSpeechUpdateConst,
+ 21) + 1;
+ smk2 = smk + (tmp16 >> 1); // Q7 + (Q14 * Q15 >> 22)
+
+ // Control that the speech mean does not drift to much
+ maxmu = maxspe + 640;
+ if (smk2 < kMinimumMean[k])
+ smk2 = kMinimumMean[k];
+ if (smk2 > maxmu)
+ smk2 = maxmu;
+
+ *smean2ptr = smk2;
+
+ // (Q7>>3) = Q4
+ tmp16 = WEBRTC_SPL_RSHIFT_W16((smk + 4), 3);
+
+ tmp16 = feature_vector[n] - tmp16; // Q4
+ tmp32_1 = WEBRTC_SPL_MUL_16_16_RSFT(deltaS[nr], tmp16, 3);
+ tmp32_2 = tmp32_1 - (WebRtc_Word32)4096; // Q12
+ tmp16 = WEBRTC_SPL_RSHIFT_W16((sgprvec[nr]), 2);
+ tmp32_1 = (WebRtc_Word32)(tmp16 * tmp32_2);// (Q15>>3)*(Q14>>2)=Q12*Q12=Q24
+
+ tmp32_2 = WEBRTC_SPL_RSHIFT_W32(tmp32_1, 4); // Q20
+
+ // 0.1 * Q20 / Q7 = Q13
+ if (tmp32_2 > 0)
+ tmp16 = (WebRtc_Word16)WebRtcSpl_DivW32W16(tmp32_2, ssk * 10);
+ else
+ {
+ tmp16 = (WebRtc_Word16)WebRtcSpl_DivW32W16(-tmp32_2, ssk * 10);
+ tmp16 = -tmp16;
+ }
+ // divide by 4 giving an update factor of 0.025
+ tmp16 += 128; // Rounding
+ ssk += WEBRTC_SPL_RSHIFT_W16(tmp16, 8);
+ // Division with 8 plus Q7
+ if (ssk < MIN_STD)
+ ssk = MIN_STD;
+ *sstd2ptr = ssk;
+ } else
+ {
+ // Update GMM variance vectors
+ // deltaN * (feature_vector[n] - nmk) - 1, Q11 * Q4
+ tmp16 = feature_vector[n] - WEBRTC_SPL_RSHIFT_W16(nmk, 3);
+
+ // (Q15>>3) * (Q14>>2) = Q12 * Q12 = Q24
+ tmp32_1 = WEBRTC_SPL_MUL_16_16_RSFT(deltaN[nr], tmp16, 3) - 4096;
+ tmp16 = WEBRTC_SPL_RSHIFT_W16((ngprvec[nr]+2), 2);
+ tmp32_2 = (WebRtc_Word32)(tmp16 * tmp32_1);
+ tmp32_1 = WEBRTC_SPL_RSHIFT_W32(tmp32_2, 14);
+ // Q20 * approx 0.001 (2^-10=0.0009766)
+
+ // Q20 / Q7 = Q13
+ tmp16 = (WebRtc_Word16)WebRtcSpl_DivW32W16(tmp32_1, nsk);
+ if (tmp32_1 > 0)
+ tmp16 = (WebRtc_Word16)WebRtcSpl_DivW32W16(tmp32_1, nsk);
+ else
+ {
+ tmp16 = (WebRtc_Word16)WebRtcSpl_DivW32W16(-tmp32_1, nsk);
+ tmp16 = -tmp16;
+ }
+ tmp16 += 32; // Rounding
+ nsk += WEBRTC_SPL_RSHIFT_W16(tmp16, 6);
+
+ if (nsk < MIN_STD)
+ nsk = MIN_STD;
+
+ *nstd2ptr = nsk;
+ }
+ }
+
+ // Separate models if they are too close - nmid in Q14
+ nmid = WEBRTC_SPL_MUL_16_16(kNoiseDataWeights[n], *nmean1ptr);
+ nmid += WEBRTC_SPL_MUL_16_16(kNoiseDataWeights[n+NUM_CHANNELS], *nmean2ptr);
+
+ // smid in Q14
+ smid = WEBRTC_SPL_MUL_16_16(kSpeechDataWeights[n], *smean1ptr);
+ smid += WEBRTC_SPL_MUL_16_16(kSpeechDataWeights[n+NUM_CHANNELS], *smean2ptr);
+
+ // diff = "global" speech mean - "global" noise mean
+ diff = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(smid, 9);
+ tmp16 = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(nmid, 9);
+ diff -= tmp16;
+
+ if (diff < kMinimumDifference[n])
+ {
+
+ tmp16 = kMinimumDifference[n] - diff; // Q5
+
+ // tmp16_1 = ~0.8 * (kMinimumDifference - diff) in Q7
+ // tmp16_2 = ~0.2 * (kMinimumDifference - diff) in Q7
+ tmp16_1 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(13, tmp16, 2);
+ tmp16_2 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(3, tmp16, 2);
+
+ // First Gauss, speech model
+ tmp16 = tmp16_1 + *smean1ptr;
+ *smean1ptr = tmp16;
+ smid = WEBRTC_SPL_MUL_16_16(tmp16, kSpeechDataWeights[n]);
+
+ // Second Gauss, speech model
+ tmp16 = tmp16_1 + *smean2ptr;
+ *smean2ptr = tmp16;
+ smid += WEBRTC_SPL_MUL_16_16(tmp16, kSpeechDataWeights[n+NUM_CHANNELS]);
+
+ // First Gauss, noise model
+ tmp16 = *nmean1ptr - tmp16_2;
+ *nmean1ptr = tmp16;
+
+ nmid = WEBRTC_SPL_MUL_16_16(tmp16, kNoiseDataWeights[n]);
+
+ // Second Gauss, noise model
+ tmp16 = *nmean2ptr - tmp16_2;
+ *nmean2ptr = tmp16;
+ nmid += WEBRTC_SPL_MUL_16_16(tmp16, kNoiseDataWeights[n+NUM_CHANNELS]);
+ }
+
+ // Control that the speech & noise means do not drift to much
+ maxspe = kMaximumSpeech[n];
+ tmp16_2 = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(smid, 7);
+ if (tmp16_2 > maxspe)
+ { // Upper limit of speech model
+ tmp16_2 -= maxspe;
+
+ *smean1ptr -= tmp16_2;
+ *smean2ptr -= tmp16_2;
+ }
+
+ tmp16_2 = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(nmid, 7);
+ if (tmp16_2 > kMaximumNoise[n])
+ {
+ tmp16_2 -= kMaximumNoise[n];
+
+ *nmean1ptr -= tmp16_2;
+ *nmean2ptr -= tmp16_2;
+ }
+
+ *nmean1ptr++;
+ *smean1ptr++;
+ *nstd1ptr++;
+ *sstd1ptr++;
+ }
+ inst->frame_counter++;
+ } else
+ {
+ vadflag = 0;
+ }
+
+ // Hangover smoothing
+ if (!vadflag)
+ {
+ if (inst->over_hang > 0)
+ {
+ vadflag = 2 + inst->over_hang;
+ inst->over_hang = inst->over_hang - 1;
+ }
+ inst->num_of_speech = 0;
+ } else
+ {
+ inst->num_of_speech = inst->num_of_speech + 1;
+ if (inst->num_of_speech > NSP_MAX)
+ {
+ inst->num_of_speech = NSP_MAX;
+ inst->over_hang = overhead2;
+ } else
+ inst->over_hang = overhead1;
+ }
+ return vadflag;
+}
diff --git a/src/common_audio/vad/main/source/vad_core.h b/src/common_audio/vad/main/source/vad_core.h
new file mode 100644
index 0000000000..544caf5ab3
--- /dev/null
+++ b/src/common_audio/vad/main/source/vad_core.h
@@ -0,0 +1,132 @@
+/*
+ * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/*
+ * This header file includes the descriptions of the core VAD calls.
+ */
+
+#ifndef WEBRTC_VAD_CORE_H_
+#define WEBRTC_VAD_CORE_H_
+
+#include "typedefs.h"
+#include "vad_defines.h"
+
+typedef struct VadInstT_
+{
+
+ WebRtc_Word16 vad;
+ WebRtc_Word32 downsampling_filter_states[4];
+ WebRtc_Word16 noise_means[NUM_TABLE_VALUES];
+ WebRtc_Word16 speech_means[NUM_TABLE_VALUES];
+ WebRtc_Word16 noise_stds[NUM_TABLE_VALUES];
+ WebRtc_Word16 speech_stds[NUM_TABLE_VALUES];
+ WebRtc_Word32 frame_counter;
+ WebRtc_Word16 over_hang; // Over Hang
+ WebRtc_Word16 num_of_speech;
+ WebRtc_Word16 index_vector[16 * NUM_CHANNELS];
+ WebRtc_Word16 low_value_vector[16 * NUM_CHANNELS];
+ WebRtc_Word16 mean_value[NUM_CHANNELS];
+ WebRtc_Word16 upper_state[5];
+ WebRtc_Word16 lower_state[5];
+ WebRtc_Word16 hp_filter_state[4];
+ WebRtc_Word16 over_hang_max_1[3];
+ WebRtc_Word16 over_hang_max_2[3];
+ WebRtc_Word16 individual[3];
+ WebRtc_Word16 total[3];
+
+ short init_flag;
+
+} VadInstT;
+
+/****************************************************************************
+ * WebRtcVad_InitCore(...)
+ *
+ * This function initializes a VAD instance
+ *
+ * Input:
+ * - inst : Instance that should be initialized
+ * - mode : Aggressiveness degree
+ * 0 (High quality) - 3 (Highly aggressive)
+ *
+ * Output:
+ * - inst : Initialized instance
+ *
+ * Return value : 0 - Ok
+ * -1 - Error
+ */
+int WebRtcVad_InitCore(VadInstT* inst, short mode);
+
+/****************************************************************************
+ * WebRtcVad_set_mode_core(...)
+ *
+ * This function changes the VAD settings
+ *
+ * Input:
+ * - inst : VAD instance
+ * - mode : Aggressiveness degree
+ * 0 (High quality) - 3 (Highly aggressive)
+ *
+ * Output:
+ * - inst : Changed instance
+ *
+ * Return value : 0 - Ok
+ * -1 - Error
+ */
+
+int WebRtcVad_set_mode_core(VadInstT* inst, short mode);
+
+/****************************************************************************
+ * WebRtcVad_CalcVad32khz(...)
+ * WebRtcVad_CalcVad16khz(...)
+ * WebRtcVad_CalcVad8khz(...)
+ *
+ * Calculate probability for active speech and make VAD decision.
+ *
+ * Input:
+ * - inst : Instance that should be initialized
+ * - speech_frame : Input speech frame
+ * - frame_length : Number of input samples
+ *
+ * Output:
+ * - inst : Updated filter states etc.
+ *
+ * Return value : VAD decision
+ * 0 - No active speech
+ * 1-6 - Active speech
+ */
+WebRtc_Word16 WebRtcVad_CalcVad32khz(VadInstT* inst, WebRtc_Word16* speech_frame,
+ int frame_length);
+WebRtc_Word16 WebRtcVad_CalcVad16khz(VadInstT* inst, WebRtc_Word16* speech_frame,
+ int frame_length);
+WebRtc_Word16 WebRtcVad_CalcVad8khz(VadInstT* inst, WebRtc_Word16* speech_frame,
+ int frame_length);
+
+/****************************************************************************
+ * WebRtcVad_GmmProbability(...)
+ *
+ * This function calculates the probabilities for background noise and
+ * speech using Gaussian Mixture Models. A hypothesis-test is performed to decide
+ * which type of signal is most probable.
+ *
+ * Input:
+ * - inst : Pointer to VAD instance
+ * - feature_vector : Feature vector = log10(energy in frequency band)
+ * - total_power : Total power in frame.
+ * - frame_length : Number of input samples
+ *
+ * Output:
+ * VAD decision : 0 - noise, 1 - speech
+ *
+ */
+WebRtc_Word16 WebRtcVad_GmmProbability(VadInstT* inst, WebRtc_Word16* feature_vector,
+ WebRtc_Word16 total_power, int frame_length);
+
+#endif // WEBRTC_VAD_CORE_H_
diff --git a/src/common_audio/vad/main/source/vad_defines.h b/src/common_audio/vad/main/source/vad_defines.h
new file mode 100644
index 0000000000..b33af2ef7d
--- /dev/null
+++ b/src/common_audio/vad/main/source/vad_defines.h
@@ -0,0 +1,95 @@
+/*
+ * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/*
+ * This header file includes the macros used in VAD.
+ */
+
+#ifndef WEBRTC_VAD_DEFINES_H_
+#define WEBRTC_VAD_DEFINES_H_
+
+#define NUM_CHANNELS 6 // Eight frequency bands
+#define NUM_MODELS 2 // Number of Gaussian models
+#define NUM_TABLE_VALUES NUM_CHANNELS * NUM_MODELS
+
+#define MIN_ENERGY 10
+#define ALPHA1 6553 // 0.2 in Q15
+#define ALPHA2 32439 // 0.99 in Q15
+#define NSP_MAX 6 // Maximum number of VAD=1 frames in a row counted
+#define MIN_STD 384 // Minimum standard deviation
+// Mode 0, Quality thresholds - Different thresholds for the different frame lengths
+#define INDIVIDUAL_10MS_Q 24
+#define INDIVIDUAL_20MS_Q 21 // (log10(2)*66)<<2 ~=16
+#define INDIVIDUAL_30MS_Q 24
+
+#define TOTAL_10MS_Q 57
+#define TOTAL_20MS_Q 48
+#define TOTAL_30MS_Q 57
+
+#define OHMAX1_10MS_Q 8 // Max Overhang 1
+#define OHMAX2_10MS_Q 14 // Max Overhang 2
+#define OHMAX1_20MS_Q 4 // Max Overhang 1
+#define OHMAX2_20MS_Q 7 // Max Overhang 2
+#define OHMAX1_30MS_Q 3
+#define OHMAX2_30MS_Q 5
+
+// Mode 1, Low bitrate thresholds - Different thresholds for the different frame lengths
+#define INDIVIDUAL_10MS_LBR 37
+#define INDIVIDUAL_20MS_LBR 32
+#define INDIVIDUAL_30MS_LBR 37
+
+#define TOTAL_10MS_LBR 100
+#define TOTAL_20MS_LBR 80
+#define TOTAL_30MS_LBR 100
+
+#define OHMAX1_10MS_LBR 8 // Max Overhang 1
+#define OHMAX2_10MS_LBR 14 // Max Overhang 2
+#define OHMAX1_20MS_LBR 4
+#define OHMAX2_20MS_LBR 7
+
+#define OHMAX1_30MS_LBR 3
+#define OHMAX2_30MS_LBR 5
+
+// Mode 2, Very aggressive thresholds - Different thresholds for the different frame lengths
+#define INDIVIDUAL_10MS_AGG 82
+#define INDIVIDUAL_20MS_AGG 78
+#define INDIVIDUAL_30MS_AGG 82
+
+#define TOTAL_10MS_AGG 285 //580
+#define TOTAL_20MS_AGG 260
+#define TOTAL_30MS_AGG 285
+
+#define OHMAX1_10MS_AGG 6 // Max Overhang 1
+#define OHMAX2_10MS_AGG 9 // Max Overhang 2
+#define OHMAX1_20MS_AGG 3
+#define OHMAX2_20MS_AGG 5
+
+#define OHMAX1_30MS_AGG 2
+#define OHMAX2_30MS_AGG 3
+
+// Mode 3, Super aggressive thresholds - Different thresholds for the different frame lengths
+#define INDIVIDUAL_10MS_VAG 94
+#define INDIVIDUAL_20MS_VAG 94
+#define INDIVIDUAL_30MS_VAG 94
+
+#define TOTAL_10MS_VAG 1100 //1700
+#define TOTAL_20MS_VAG 1050
+#define TOTAL_30MS_VAG 1100
+
+#define OHMAX1_10MS_VAG 6 // Max Overhang 1
+#define OHMAX2_10MS_VAG 9 // Max Overhang 2
+#define OHMAX1_20MS_VAG 3
+#define OHMAX2_20MS_VAG 5
+
+#define OHMAX1_30MS_VAG 2
+#define OHMAX2_30MS_VAG 3
+
+#endif // WEBRTC_VAD_DEFINES_H_
diff --git a/src/common_audio/vad/main/source/vad_filterbank.c b/src/common_audio/vad/main/source/vad_filterbank.c
new file mode 100644
index 0000000000..11392c917a
--- /dev/null
+++ b/src/common_audio/vad/main/source/vad_filterbank.c
@@ -0,0 +1,267 @@
+/*
+ * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/*
+ * This file includes the implementation of the internal filterbank associated functions.
+ * For function description, see vad_filterbank.h.
+ */
+
+#include "vad_filterbank.h"
+#include "vad_defines.h"
+#include "vad_const.h"
+#include "signal_processing_library.h"
+
+void WebRtcVad_HpOutput(WebRtc_Word16 *in_vector,
+ WebRtc_Word16 in_vector_length,
+ WebRtc_Word16 *out_vector,
+ WebRtc_Word16 *filter_state)
+{
+ WebRtc_Word16 i, *pi, *outPtr;
+ WebRtc_Word32 tmpW32;
+
+ pi = &in_vector[0];
+ outPtr = &out_vector[0];
+
+ // The sum of the absolute values of the impulse response:
+ // The zero/pole-filter has a max amplification of a single sample of: 1.4546
+ // Impulse response: 0.4047 -0.6179 -0.0266 0.1993 0.1035 -0.0194
+ // The all-zero section has a max amplification of a single sample of: 1.6189
+ // Impulse response: 0.4047 -0.8094 0.4047 0 0 0
+ // The all-pole section has a max amplification of a single sample of: 1.9931
+ // Impulse response: 1.0000 0.4734 -0.1189 -0.2187 -0.0627 0.04532
+
+ for (i = 0; i < in_vector_length; i++)
+ {
+ // all-zero section (filter coefficients in Q14)
+ tmpW32 = (WebRtc_Word32)WEBRTC_SPL_MUL_16_16(kHpZeroCoefs[0], (*pi));
+ tmpW32 += (WebRtc_Word32)WEBRTC_SPL_MUL_16_16(kHpZeroCoefs[1], filter_state[0]);
+ tmpW32 += (WebRtc_Word32)WEBRTC_SPL_MUL_16_16(kHpZeroCoefs[2], filter_state[1]); // Q14
+ filter_state[1] = filter_state[0];
+ filter_state[0] = *pi++;
+
+ // all-pole section
+ tmpW32 -= (WebRtc_Word32)WEBRTC_SPL_MUL_16_16(kHpPoleCoefs[1], filter_state[2]); // Q14
+ tmpW32 -= (WebRtc_Word32)WEBRTC_SPL_MUL_16_16(kHpPoleCoefs[2], filter_state[3]);
+ filter_state[3] = filter_state[2];
+ filter_state[2] = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32 (tmpW32, 14);
+ *outPtr++ = filter_state[2];
+ }
+}
+
+void WebRtcVad_Allpass(WebRtc_Word16 *in_vector,
+ WebRtc_Word16 *out_vector,
+ WebRtc_Word16 filter_coefficients,
+ int vector_length,
+ WebRtc_Word16 *filter_state)
+{
+ // The filter can only cause overflow (in the w16 output variable)
+ // if more than 4 consecutive input numbers are of maximum value and
+ // has the the same sign as the impulse responses first taps.
+ // First 6 taps of the impulse response: 0.6399 0.5905 -0.3779
+ // 0.2418 -0.1547 0.0990
+
+ int n;
+ WebRtc_Word16 tmp16;
+ WebRtc_Word32 tmp32, in32, state32;
+
+ state32 = WEBRTC_SPL_LSHIFT_W32(((WebRtc_Word32)(*filter_state)), 16); // Q31
+
+ for (n = 0; n < vector_length; n++)
+ {
+
+ tmp32 = state32 + WEBRTC_SPL_MUL_16_16(filter_coefficients, (*in_vector));
+ tmp16 = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(tmp32, 16);
+ *out_vector++ = tmp16;
+ in32 = WEBRTC_SPL_LSHIFT_W32(((WebRtc_Word32)(*in_vector)), 14);
+ state32 = in32 - WEBRTC_SPL_MUL_16_16(filter_coefficients, tmp16);
+ state32 = WEBRTC_SPL_LSHIFT_W32(state32, 1);
+ in_vector += 2;
+ }
+
+ *filter_state = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(state32, 16);
+}
+
+void WebRtcVad_SplitFilter(WebRtc_Word16 *in_vector,
+ WebRtc_Word16 *out_vector_hp,
+ WebRtc_Word16 *out_vector_lp,
+ WebRtc_Word16 *upper_state,
+ WebRtc_Word16 *lower_state,
+ int in_vector_length)
+{
+ WebRtc_Word16 tmpOut;
+ int k, halflen;
+
+ // Downsampling by 2 and get two branches
+ halflen = WEBRTC_SPL_RSHIFT_W16(in_vector_length, 1);
+
+ // All-pass filtering upper branch
+ WebRtcVad_Allpass(&in_vector[0], out_vector_hp, kAllPassCoefsQ15[0], halflen, upper_state);
+
+ // All-pass filtering lower branch
+ WebRtcVad_Allpass(&in_vector[1], out_vector_lp, kAllPassCoefsQ15[1], halflen, lower_state);
+
+ // Make LP and HP signals
+ for (k = 0; k < halflen; k++)
+ {
+ tmpOut = *out_vector_hp;
+ *out_vector_hp++ -= *out_vector_lp;
+ *out_vector_lp++ += tmpOut;
+ }
+}
+
+WebRtc_Word16 WebRtcVad_get_features(VadInstT *inst,
+ WebRtc_Word16 *in_vector,
+ int frame_size,
+ WebRtc_Word16 *out_vector)
+{
+ int curlen, filtno;
+ WebRtc_Word16 vecHP1[120], vecLP1[120];
+ WebRtc_Word16 vecHP2[60], vecLP2[60];
+ WebRtc_Word16 *ptin;
+ WebRtc_Word16 *hptout, *lptout;
+ WebRtc_Word16 power = 0;
+
+ // Split at 2000 Hz and downsample
+ filtno = 0;
+ ptin = in_vector;
+ hptout = vecHP1;
+ lptout = vecLP1;
+ curlen = frame_size;
+ WebRtcVad_SplitFilter(ptin, hptout, lptout, &inst->upper_state[filtno],
+ &inst->lower_state[filtno], curlen);
+
+ // Split at 3000 Hz and downsample
+ filtno = 1;
+ ptin = vecHP1;
+ hptout = vecHP2;
+ lptout = vecLP2;
+ curlen = WEBRTC_SPL_RSHIFT_W16(frame_size, 1);
+
+ WebRtcVad_SplitFilter(ptin, hptout, lptout, &inst->upper_state[filtno],
+ &inst->lower_state[filtno], curlen);
+
+ // Energy in 3000 Hz - 4000 Hz
+ curlen = WEBRTC_SPL_RSHIFT_W16(curlen, 1);
+ WebRtcVad_LogOfEnergy(vecHP2, &out_vector[5], &power, kOffsetVector[5], curlen);
+
+ // Energy in 2000 Hz - 3000 Hz
+ WebRtcVad_LogOfEnergy(vecLP2, &out_vector[4], &power, kOffsetVector[4], curlen);
+
+ // Split at 1000 Hz and downsample
+ filtno = 2;
+ ptin = vecLP1;
+ hptout = vecHP2;
+ lptout = vecLP2;
+ curlen = WEBRTC_SPL_RSHIFT_W16(frame_size, 1);
+ WebRtcVad_SplitFilter(ptin, hptout, lptout, &inst->upper_state[filtno],
+ &inst->lower_state[filtno], curlen);
+
+ // Energy in 1000 Hz - 2000 Hz
+ curlen = WEBRTC_SPL_RSHIFT_W16(curlen, 1);
+ WebRtcVad_LogOfEnergy(vecHP2, &out_vector[3], &power, kOffsetVector[3], curlen);
+
+ // Split at 500 Hz
+ filtno = 3;
+ ptin = vecLP2;
+ hptout = vecHP1;
+ lptout = vecLP1;
+
+ WebRtcVad_SplitFilter(ptin, hptout, lptout, &inst->upper_state[filtno],
+ &inst->lower_state[filtno], curlen);
+
+ // Energy in 500 Hz - 1000 Hz
+ curlen = WEBRTC_SPL_RSHIFT_W16(curlen, 1);
+ WebRtcVad_LogOfEnergy(vecHP1, &out_vector[2], &power, kOffsetVector[2], curlen);
+ // Split at 250 Hz
+ filtno = 4;
+ ptin = vecLP1;
+ hptout = vecHP2;
+ lptout = vecLP2;
+
+ WebRtcVad_SplitFilter(ptin, hptout, lptout, &inst->upper_state[filtno],
+ &inst->lower_state[filtno], curlen);
+
+ // Energy in 250 Hz - 500 Hz
+ curlen = WEBRTC_SPL_RSHIFT_W16(curlen, 1);
+ WebRtcVad_LogOfEnergy(vecHP2, &out_vector[1], &power, kOffsetVector[1], curlen);
+
+ // Remove DC and LFs
+ WebRtcVad_HpOutput(vecLP2, curlen, vecHP1, inst->hp_filter_state);
+
+ // Power in 80 Hz - 250 Hz
+ WebRtcVad_LogOfEnergy(vecHP1, &out_vector[0], &power, kOffsetVector[0], curlen);
+
+ return power;
+}
+
+void WebRtcVad_LogOfEnergy(WebRtc_Word16 *vector,
+ WebRtc_Word16 *enerlogval,
+ WebRtc_Word16 *power,
+ WebRtc_Word16 offset,
+ int vector_length)
+{
+ WebRtc_Word16 enerSum = 0;
+ WebRtc_Word16 zeros, frac, log2;
+ WebRtc_Word32 energy;
+
+ int shfts = 0, shfts2;
+
+ energy = WebRtcSpl_Energy(vector, vector_length, &shfts);
+
+ if (energy > 0)
+ {
+
+ shfts2 = 16 - WebRtcSpl_NormW32(energy);
+ shfts += shfts2;
+ // "shfts" is the total number of right shifts that has been done to enerSum.
+ enerSum = (WebRtc_Word16)WEBRTC_SPL_SHIFT_W32(energy, -shfts2);
+
+ // Find:
+ // 160*log10(enerSum*2^shfts) = 160*log10(2)*log2(enerSum*2^shfts) =
+ // 160*log10(2)*(log2(enerSum) + log2(2^shfts)) =
+ // 160*log10(2)*(log2(enerSum) + shfts)
+
+ zeros = WebRtcSpl_NormU32(enerSum);
+ frac = (WebRtc_Word16)(((WebRtc_UWord32)((WebRtc_Word32)(enerSum) << zeros)
+ & 0x7FFFFFFF) >> 21);
+ log2 = (WebRtc_Word16)(((31 - zeros) << 10) + frac);
+
+ *enerlogval = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(kLogConst, log2, 19)
+ + (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(shfts, kLogConst, 9);
+
+ if (*enerlogval < 0)
+ {
+ *enerlogval = 0;
+ }
+ } else
+ {
+ *enerlogval = 0;
+ shfts = -15;
+ enerSum = 0;
+ }
+
+ *enerlogval += offset;
+
+ // Total power in frame
+ if (*power <= MIN_ENERGY)
+ {
+ if (shfts > 0)
+ {
+ *power += MIN_ENERGY + 1;
+ } else if (WEBRTC_SPL_SHIFT_W16(enerSum, shfts) > MIN_ENERGY)
+ {
+ *power += MIN_ENERGY + 1;
+ } else
+ {
+ *power += WEBRTC_SPL_SHIFT_W16(enerSum, shfts);
+ }
+ }
+}
diff --git a/src/common_audio/vad/main/source/vad_filterbank.h b/src/common_audio/vad/main/source/vad_filterbank.h
new file mode 100644
index 0000000000..a5507ead65
--- /dev/null
+++ b/src/common_audio/vad/main/source/vad_filterbank.h
@@ -0,0 +1,143 @@
+/*
+ * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/*
+ * This header file includes the description of the internal VAD call
+ * WebRtcVad_GaussianProbability.
+ */
+
+#ifndef WEBRTC_VAD_FILTERBANK_H_
+#define WEBRTC_VAD_FILTERBANK_H_
+
+#include "vad_core.h"
+
+/****************************************************************************
+ * WebRtcVad_HpOutput(...)
+ *
+ * This function removes DC from the lowest frequency band
+ *
+ * Input:
+ * - in_vector : Samples in the frequency interval 0 - 250 Hz
+ * - in_vector_length : Length of input and output vector
+ * - filter_state : Current state of the filter
+ *
+ * Output:
+ * - out_vector : Samples in the frequency interval 80 - 250 Hz
+ * - filter_state : Updated state of the filter
+ *
+ */
+void WebRtcVad_HpOutput(WebRtc_Word16* in_vector,
+ WebRtc_Word16 in_vector_length,
+ WebRtc_Word16* out_vector,
+ WebRtc_Word16* filter_state);
+
+/****************************************************************************
+ * WebRtcVad_Allpass(...)
+ *
+ * This function is used when before splitting a speech file into
+ * different frequency bands
+ *
+ * Note! Do NOT let the arrays in_vector and out_vector correspond to the same address.
+ *
+ * Input:
+ * - in_vector : (Q0)
+ * - filter_coefficients : (Q15)
+ * - vector_length : Length of input and output vector
+ * - filter_state : Current state of the filter (Q(-1))
+ *
+ * Output:
+ * - out_vector : Output speech signal (Q(-1))
+ * - filter_state : Updated state of the filter (Q(-1))
+ *
+ */
+void WebRtcVad_Allpass(WebRtc_Word16* in_vector,
+ WebRtc_Word16* outw16,
+ WebRtc_Word16 filter_coefficients,
+ int vector_length,
+ WebRtc_Word16* filter_state);
+
+/****************************************************************************
+ * WebRtcVad_SplitFilter(...)
+ *
+ * This function is used when before splitting a speech file into
+ * different frequency bands
+ *
+ * Input:
+ * - in_vector : Input signal to be split into two frequency bands.
+ * - upper_state : Current state of the upper filter
+ * - lower_state : Current state of the lower filter
+ * - in_vector_length : Length of input vector
+ *
+ * Output:
+ * - out_vector_hp : Upper half of the spectrum
+ * - out_vector_lp : Lower half of the spectrum
+ * - upper_state : Updated state of the upper filter
+ * - lower_state : Updated state of the lower filter
+ *
+ */
+void WebRtcVad_SplitFilter(WebRtc_Word16* in_vector,
+ WebRtc_Word16* out_vector_hp,
+ WebRtc_Word16* out_vector_lp,
+ WebRtc_Word16* upper_state,
+ WebRtc_Word16* lower_state,
+ int in_vector_length);
+
+/****************************************************************************
+ * WebRtcVad_get_features(...)
+ *
+ * This function is used to get the logarithm of the power of each of the
+ * 6 frequency bands used by the VAD:
+ * 80 Hz - 250 Hz
+ * 250 Hz - 500 Hz
+ * 500 Hz - 1000 Hz
+ * 1000 Hz - 2000 Hz
+ * 2000 Hz - 3000 Hz
+ * 3000 Hz - 4000 Hz
+ *
+ * Input:
+ * - inst : Pointer to VAD instance
+ * - in_vector : Input speech signal
+ * - frame_size : Frame size, in number of samples
+ *
+ * Output:
+ * - out_vector : 10*log10(power in each freq. band), Q4
+ *
+ * Return: total power in the signal (NOTE! This value is not exact since it
+ * is only used in a comparison.
+ */
+WebRtc_Word16 WebRtcVad_get_features(VadInstT* inst,
+ WebRtc_Word16* in_vector,
+ int frame_size,
+ WebRtc_Word16* out_vector);
+
+/****************************************************************************
+ * WebRtcVad_LogOfEnergy(...)
+ *
+ * This function is used to get the logarithm of the power of one frequency band.
+ *
+ * Input:
+ * - vector : Input speech samples for one frequency band
+ * - offset : Offset value for the current frequency band
+ * - vector_length : Length of input vector
+ *
+ * Output:
+ * - enerlogval : 10*log10(energy);
+ * - power : Update total power in speech frame. NOTE! This value
+ * is not exact since it is only used in a comparison.
+ *
+ */
+void WebRtcVad_LogOfEnergy(WebRtc_Word16* vector,
+ WebRtc_Word16* enerlogval,
+ WebRtc_Word16* power,
+ WebRtc_Word16 offset,
+ int vector_length);
+
+#endif // WEBRTC_VAD_FILTERBANK_H_
diff --git a/src/common_audio/vad/main/source/vad_gmm.c b/src/common_audio/vad/main/source/vad_gmm.c
new file mode 100644
index 0000000000..23d12fb335
--- /dev/null
+++ b/src/common_audio/vad/main/source/vad_gmm.c
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/*
+ * This file includes the implementation of the internal VAD call
+ * WebRtcVad_GaussianProbability. For function description, see vad_gmm.h.
+ */
+
+#include "vad_gmm.h"
+#include "signal_processing_library.h"
+#include "vad_const.h"
+
+WebRtc_Word32 WebRtcVad_GaussianProbability(WebRtc_Word16 in_sample,
+ WebRtc_Word16 mean,
+ WebRtc_Word16 std,
+ WebRtc_Word16 *delta)
+{
+ WebRtc_Word16 tmp16, tmpDiv, tmpDiv2, expVal, tmp16_1, tmp16_2;
+ WebRtc_Word32 tmp32, y32;
+
+ // Calculate tmpDiv=1/std, in Q10
+ tmp32 = (WebRtc_Word32)WEBRTC_SPL_RSHIFT_W16(std,1) + (WebRtc_Word32)131072; // 1 in Q17
+ tmpDiv = (WebRtc_Word16)WebRtcSpl_DivW32W16(tmp32, std); // Q17/Q7 = Q10
+
+ // Calculate tmpDiv2=1/std^2, in Q14
+ tmp16 = WEBRTC_SPL_RSHIFT_W16(tmpDiv, 2); // From Q10 to Q8
+ tmpDiv2 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(tmp16, tmp16, 2); // (Q8 * Q8)>>2 = Q14
+
+ tmp16 = WEBRTC_SPL_LSHIFT_W16(in_sample, 3); // Q7
+ tmp16 = tmp16 - mean; // Q7 - Q7 = Q7
+
+ // To be used later, when updating noise/speech model
+ // delta = (x-m)/std^2, in Q11
+ *delta = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(tmpDiv2, tmp16, 10); //(Q14*Q7)>>10 = Q11
+
+ // Calculate tmp32=(x-m)^2/(2*std^2), in Q10
+ tmp32 = (WebRtc_Word32)WEBRTC_SPL_MUL_16_16_RSFT(*delta, tmp16, 9); // One shift for /2
+
+ // Calculate expVal ~= exp(-(x-m)^2/(2*std^2)) ~= exp2(-log2(exp(1))*tmp32)
+ if (tmp32 < kCompVar)
+ {
+ // Calculate tmp16 = log2(exp(1))*tmp32 , in Q10
+ tmp16 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT((WebRtc_Word16)tmp32,
+ kLog10Const, 12);
+ tmp16 = -tmp16;
+ tmp16_2 = (WebRtc_Word16)(0x0400 | (tmp16 & 0x03FF));
+ tmp16_1 = (WebRtc_Word16)(tmp16 ^ 0xFFFF);
+ tmp16 = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W16(tmp16_1, 10);
+ tmp16 += 1;
+ // Calculate expVal=log2(-tmp32), in Q10
+ expVal = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32((WebRtc_Word32)tmp16_2, tmp16);
+
+ } else
+ {
+ expVal = 0;
+ }
+
+ // Calculate y32=(1/std)*exp(-(x-m)^2/(2*std^2)), in Q20
+ y32 = WEBRTC_SPL_MUL_16_16(tmpDiv, expVal); // Q10 * Q10 = Q20
+
+ return y32; // Q20
+}
diff --git a/src/common_audio/vad/main/source/vad_gmm.h b/src/common_audio/vad/main/source/vad_gmm.h
new file mode 100644
index 0000000000..e0747fb7e5
--- /dev/null
+++ b/src/common_audio/vad/main/source/vad_gmm.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/*
+ * This header file includes the description of the internal VAD call
+ * WebRtcVad_GaussianProbability.
+ */
+
+#ifndef WEBRTC_VAD_GMM_H_
+#define WEBRTC_VAD_GMM_H_
+
+#include "typedefs.h"
+
+/****************************************************************************
+ * WebRtcVad_GaussianProbability(...)
+ *
+ * This function calculates the probability for the value 'in_sample', given that in_sample
+ * comes from a normal distribution with mean 'mean' and standard deviation 'std'.
+ *
+ * Input:
+ * - in_sample : Input sample in Q4
+ * - mean : mean value in the statistical model, Q7
+ * - std : standard deviation, Q7
+ *
+ * Output:
+ *
+ * - delta : Value used when updating the model, Q11
+ *
+ * Return:
+ * - out : out = 1/std * exp(-(x-m)^2/(2*std^2));
+ * Probability for x.
+ *
+ */
+WebRtc_Word32 WebRtcVad_GaussianProbability(WebRtc_Word16 in_sample,
+ WebRtc_Word16 mean,
+ WebRtc_Word16 std,
+ WebRtc_Word16 *delta);
+
+#endif // WEBRTC_VAD_GMM_H_
diff --git a/src/common_audio/vad/main/source/vad_sp.c b/src/common_audio/vad/main/source/vad_sp.c
new file mode 100644
index 0000000000..f347ab5904
--- /dev/null
+++ b/src/common_audio/vad/main/source/vad_sp.c
@@ -0,0 +1,231 @@
+/*
+ * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/*
+ * This file includes the implementation of the VAD internal calls for Downsampling and
+ * FindMinimum.
+ * For function call descriptions; See vad_sp.h.
+ */
+
+#include "vad_sp.h"
+#include "vad_defines.h"
+#include "vad_const.h"
+#include "signal_processing_library.h"
+
+// Downsampling filter based on the splitting filter and the allpass functions
+// in vad_filterbank.c
+void WebRtcVad_Downsampling(WebRtc_Word16* signal_in,
+ WebRtc_Word16* signal_out,
+ WebRtc_Word32* filter_state,
+ int inlen)
+{
+ WebRtc_Word16 tmp16_1, tmp16_2;
+ WebRtc_Word32 tmp32_1, tmp32_2;
+ int n, halflen;
+
+ // Downsampling by 2 and get two branches
+ halflen = WEBRTC_SPL_RSHIFT_W16(inlen, 1);
+
+ tmp32_1 = filter_state[0];
+ tmp32_2 = filter_state[1];
+
+ // Filter coefficients in Q13, filter state in Q0
+ for (n = 0; n < halflen; n++)
+ {
+ // All-pass filtering upper branch
+ tmp16_1 = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(tmp32_1, 1)
+ + (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT((kAllPassCoefsQ13[0]),
+ *signal_in, 14);
+ *signal_out = tmp16_1;
+ tmp32_1 = (WebRtc_Word32)(*signal_in++)
+ - (WebRtc_Word32)WEBRTC_SPL_MUL_16_16_RSFT((kAllPassCoefsQ13[0]), tmp16_1, 12);
+
+ // All-pass filtering lower branch
+ tmp16_2 = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(tmp32_2, 1)
+ + (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT((kAllPassCoefsQ13[1]),
+ *signal_in, 14);
+ *signal_out++ += tmp16_2;
+ tmp32_2 = (WebRtc_Word32)(*signal_in++)
+ - (WebRtc_Word32)WEBRTC_SPL_MUL_16_16_RSFT((kAllPassCoefsQ13[1]), tmp16_2, 12);
+ }
+ filter_state[0] = tmp32_1;
+ filter_state[1] = tmp32_2;
+}
+
+WebRtc_Word16 WebRtcVad_FindMinimum(VadInstT* inst,
+ WebRtc_Word16 x,
+ int n)
+{
+ int i, j, k, II = -1, offset;
+ WebRtc_Word16 meanV, alpha;
+ WebRtc_Word32 tmp32, tmp32_1;
+ WebRtc_Word16 *valptr, *idxptr, *p1, *p2, *p3;
+
+ // Offset to beginning of the 16 minimum values in memory
+ offset = WEBRTC_SPL_LSHIFT_W16(n, 4);
+
+ // Pointer to memory for the 16 minimum values and the age of each value
+ idxptr = &inst->index_vector[offset];
+ valptr = &inst->low_value_vector[offset];
+
+ // Each value in low_value_vector is getting 1 loop older.
+ // Update age of each value in indexVal, and remove old values.
+ for (i = 0; i < 16; i++)
+ {
+ p3 = idxptr + i;
+ if (*p3 != 100)
+ {
+ *p3 += 1;
+ } else
+ {
+ p1 = valptr + i + 1;
+ p2 = p3 + 1;
+ for (j = i; j < 16; j++)
+ {
+ *(valptr + j) = *p1++;
+ *(idxptr + j) = *p2++;
+ }
+ *(idxptr + 15) = 101;
+ *(valptr + 15) = 10000;
+ }
+ }
+
+ // Check if x smaller than any of the values in low_value_vector.
+ // If so, find position.
+ if (x < *(valptr + 7))
+ {
+ if (x < *(valptr + 3))
+ {
+ if (x < *(valptr + 1))
+ {
+ if (x < *valptr)
+ {
+ II = 0;
+ } else
+ {
+ II = 1;
+ }
+ } else if (x < *(valptr + 2))
+ {
+ II = 2;
+ } else
+ {
+ II = 3;
+ }
+ } else if (x < *(valptr + 5))
+ {
+ if (x < *(valptr + 4))
+ {
+ II = 4;
+ } else
+ {
+ II = 5;
+ }
+ } else if (x < *(valptr + 6))
+ {
+ II = 6;
+ } else
+ {
+ II = 7;
+ }
+ } else if (x < *(valptr + 15))
+ {
+ if (x < *(valptr + 11))
+ {
+ if (x < *(valptr + 9))
+ {
+ if (x < *(valptr + 8))
+ {
+ II = 8;
+ } else
+ {
+ II = 9;
+ }
+ } else if (x < *(valptr + 10))
+ {
+ II = 10;
+ } else
+ {
+ II = 11;
+ }
+ } else if (x < *(valptr + 13))
+ {
+ if (x < *(valptr + 12))
+ {
+ II = 12;
+ } else
+ {
+ II = 13;
+ }
+ } else if (x < *(valptr + 14))
+ {
+ II = 14;
+ } else
+ {
+ II = 15;
+ }
+ }
+
+ // Put new min value on right position and shift bigger values up
+ if (II > -1)
+ {
+ for (i = 15; i > II; i--)
+ {
+ k = i - 1;
+ *(valptr + i) = *(valptr + k);
+ *(idxptr + i) = *(idxptr + k);
+ }
+ *(valptr + II) = x;
+ *(idxptr + II) = 1;
+ }
+
+ meanV = 0;
+ if ((inst->frame_counter) > 4)
+ {
+ j = 5;
+ } else
+ {
+ j = inst->frame_counter;
+ }
+
+ if (j > 2)
+ {
+ meanV = *(valptr + 2);
+ } else if (j > 0)
+ {
+ meanV = *valptr;
+ } else
+ {
+ meanV = 1600;
+ }
+
+ if (inst->frame_counter > 0)
+ {
+ if (meanV < inst->mean_value[n])
+ {
+ alpha = (WebRtc_Word16)ALPHA1; // 0.2 in Q15
+ } else
+ {
+ alpha = (WebRtc_Word16)ALPHA2; // 0.99 in Q15
+ }
+ } else
+ {
+ alpha = 0;
+ }
+
+ tmp32 = WEBRTC_SPL_MUL_16_16((alpha+1), inst->mean_value[n]);
+ tmp32_1 = WEBRTC_SPL_MUL_16_16(WEBRTC_SPL_WORD16_MAX - alpha, meanV);
+ tmp32 += tmp32_1;
+ tmp32 += 16384;
+ inst->mean_value[n] = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(tmp32, 15);
+
+ return inst->mean_value[n];
+}
diff --git a/src/common_audio/vad/main/source/vad_sp.h b/src/common_audio/vad/main/source/vad_sp.h
new file mode 100644
index 0000000000..ae15c11ad6
--- /dev/null
+++ b/src/common_audio/vad/main/source/vad_sp.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/*
+ * This header file includes the VAD internal calls for Downsampling and FindMinimum.
+ * Specific function calls are given below.
+ */
+
+#ifndef WEBRTC_VAD_SP_H_
+#define WEBRTC_VAD_SP_H_
+
+#include "vad_core.h"
+
+/****************************************************************************
+ * WebRtcVad_Downsampling(...)
+ *
+ * Downsamples the signal a factor 2, eg. 32->16 or 16->8
+ *
+ * Input:
+ * - signal_in : Input signal
+ * - in_length : Length of input signal in samples
+ *
+ * Input & Output:
+ * - filter_state : Filter state for first all-pass filters
+ *
+ * Output:
+ * - signal_out : Downsampled signal (of length len/2)
+ */
+void WebRtcVad_Downsampling(WebRtc_Word16* signal_in,
+ WebRtc_Word16* signal_out,
+ WebRtc_Word32* filter_state,
+ int in_length);
+
+/****************************************************************************
+ * WebRtcVad_FindMinimum(...)
+ *
+ * Find the five lowest values of x in 100 frames long window. Return a mean
+ * value of these five values.
+ *
+ * Input:
+ * - feature_value : Feature value
+ * - channel : Channel number
+ *
+ * Input & Output:
+ * - inst : State information
+ *
+ * Output:
+ * return value : Weighted minimum value for a moving window.
+ */
+WebRtc_Word16 WebRtcVad_FindMinimum(VadInstT* inst, WebRtc_Word16 feature_value, int channel);
+
+#endif // WEBRTC_VAD_SP_H_
diff --git a/src/common_audio/vad/main/source/webrtc_vad.c b/src/common_audio/vad/main/source/webrtc_vad.c
new file mode 100644
index 0000000000..dcfbda1128
--- /dev/null
+++ b/src/common_audio/vad/main/source/webrtc_vad.c
@@ -0,0 +1,197 @@
+/*
+ * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/*
+ * This file includes the VAD API calls. For a specific function call description,
+ * see webrtc_vad.h
+ */
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "webrtc_vad.h"
+#include "vad_core.h"
+
+static const int kInitCheck = 42;
+
+WebRtc_Word16 WebRtcVad_get_version(char *version, size_t size_bytes)
+{
+ const char my_version[] = "VAD 1.2.0";
+
+ if (version == NULL)
+ {
+ return -1;
+ }
+
+ if (size_bytes < sizeof(my_version))
+ {
+ return -1;
+ }
+
+ memcpy(version, my_version, sizeof(my_version));
+ return 0;
+}
+
+WebRtc_Word16 WebRtcVad_AssignSize(int *size_in_bytes)
+{
+ *size_in_bytes = sizeof(VadInstT) * 2 / sizeof(WebRtc_Word16);
+ return 0;
+}
+
+WebRtc_Word16 WebRtcVad_Assign(VadInst **vad_inst, void *vad_inst_addr)
+{
+
+ if (vad_inst == NULL)
+ {
+ return -1;
+ }
+
+ if (vad_inst_addr != NULL)
+ {
+ *vad_inst = (VadInst*)vad_inst_addr;
+ return 0;
+ } else
+ {
+ return -1;
+ }
+}
+
+WebRtc_Word16 WebRtcVad_Create(VadInst **vad_inst)
+{
+
+ VadInstT *vad_ptr = NULL;
+
+ if (vad_inst == NULL)
+ {
+ return -1;
+ }
+
+ *vad_inst = NULL;
+
+ vad_ptr = (VadInstT *)malloc(sizeof(VadInstT));
+ *vad_inst = (VadInst *)vad_ptr;
+
+ if (vad_ptr == NULL)
+ {
+ return -1;
+ }
+
+ vad_ptr->init_flag = 0;
+
+ return 0;
+}
+
+WebRtc_Word16 WebRtcVad_Free(VadInst *vad_inst)
+{
+
+ if (vad_inst == NULL)
+ {
+ return -1;
+ }
+
+ free(vad_inst);
+ return 0;
+}
+
+WebRtc_Word16 WebRtcVad_Init(VadInst *vad_inst)
+{
+ short mode = 0; // Default high quality
+
+ if (vad_inst == NULL)
+ {
+ return -1;
+ }
+
+ return WebRtcVad_InitCore((VadInstT*)vad_inst, mode);
+}
+
+WebRtc_Word16 WebRtcVad_set_mode(VadInst *vad_inst, WebRtc_Word16 mode)
+{
+ VadInstT* vad_ptr;
+
+ if (vad_inst == NULL)
+ {
+ return -1;
+ }
+
+ vad_ptr = (VadInstT*)vad_inst;
+ if (vad_ptr->init_flag != kInitCheck)
+ {
+ return -1;
+ }
+
+ return WebRtcVad_set_mode_core((VadInstT*)vad_inst, mode);
+}
+
+WebRtc_Word16 WebRtcVad_Process(VadInst *vad_inst,
+ WebRtc_Word16 fs,
+ WebRtc_Word16 *speech_frame,
+ WebRtc_Word16 frame_length)
+{
+ WebRtc_Word16 vad;
+ VadInstT* vad_ptr;
+
+ if (vad_inst == NULL)
+ {
+ return -1;
+ }
+
+ vad_ptr = (VadInstT*)vad_inst;
+ if (vad_ptr->init_flag != kInitCheck)
+ {
+ return -1;
+ }
+
+ if (speech_frame == NULL)
+ {
+ return -1;
+ }
+
+ if (fs == 32000)
+ {
+ if ((frame_length != 320) && (frame_length != 640) && (frame_length != 960))
+ {
+ return -1;
+ }
+ vad = WebRtcVad_CalcVad32khz((VadInstT*)vad_inst, speech_frame, frame_length);
+
+ } else if (fs == 16000)
+ {
+ if ((frame_length != 160) && (frame_length != 320) && (frame_length != 480))
+ {
+ return -1;
+ }
+ vad = WebRtcVad_CalcVad16khz((VadInstT*)vad_inst, speech_frame, frame_length);
+
+ } else if (fs == 8000)
+ {
+ if ((frame_length != 80) && (frame_length != 160) && (frame_length != 240))
+ {
+ return -1;
+ }
+ vad = WebRtcVad_CalcVad8khz((VadInstT*)vad_inst, speech_frame, frame_length);
+
+ } else
+ {
+ return -1; // Not a supported sampling frequency
+ }
+
+ if (vad > 0)
+ {
+ return 1;
+ } else if (vad == 0)
+ {
+ return 0;
+ } else
+ {
+ return -1;
+ }
+}
diff --git a/src/common_audio/vad/main/test/unit_test/unit_test.cc b/src/common_audio/vad/main/test/unit_test/unit_test.cc
new file mode 100644
index 0000000000..8ac793e44e
--- /dev/null
+++ b/src/common_audio/vad/main/test/unit_test/unit_test.cc
@@ -0,0 +1,123 @@
+/*
+ * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/*
+ * This file includes the implementation of the VAD unit tests.
+ */
+
+#include <cstring>
+#include "unit_test.h"
+#include "webrtc_vad.h"
+
+
+class VadEnvironment : public ::testing::Environment {
+ public:
+ virtual void SetUp() {
+ }
+
+ virtual void TearDown() {
+ }
+};
+
+VadTest::VadTest()
+{
+}
+
+void VadTest::SetUp() {
+}
+
+void VadTest::TearDown() {
+}
+
+TEST_F(VadTest, ApiTest) {
+ VadInst *vad_inst;
+ int i, j, k;
+ short zeros[960];
+ short speech[960];
+ char version[32];
+
+ // Valid test cases
+ int fs[3] = {8000, 16000, 32000};
+ int nMode[4] = {0, 1, 2, 3};
+ int framelen[3][3] = {{80, 160, 240},
+ {160, 320, 480}, {320, 640, 960}} ;
+ int vad_counter = 0;
+
+ memset(zeros, 0, sizeof(short) * 960);
+ memset(speech, 1, sizeof(short) * 960);
+ speech[13] = 1374;
+ speech[73] = -3747;
+
+
+
+ // WebRtcVad_get_version()
+ WebRtcVad_get_version(version);
+ //printf("API Test for %s\n", version);
+
+ // Null instance tests
+ EXPECT_EQ(-1, WebRtcVad_Create(NULL));
+ EXPECT_EQ(-1, WebRtcVad_Init(NULL));
+ EXPECT_EQ(-1, WebRtcVad_Assign(NULL, NULL));
+ EXPECT_EQ(-1, WebRtcVad_Free(NULL));
+ EXPECT_EQ(-1, WebRtcVad_set_mode(NULL, nMode[0]));
+ EXPECT_EQ(-1, WebRtcVad_Process(NULL, fs[0], speech, framelen[0][0]));
+
+
+ EXPECT_EQ(WebRtcVad_Create(&vad_inst), 0);
+
+ // Not initialized tests
+ EXPECT_EQ(-1, WebRtcVad_Process(vad_inst, fs[0], speech, framelen[0][0]));
+ EXPECT_EQ(-1, WebRtcVad_set_mode(vad_inst, nMode[0]));
+
+ // WebRtcVad_Init() tests
+ EXPECT_EQ(WebRtcVad_Init(vad_inst), 0);
+
+ // WebRtcVad_set_mode() tests
+ EXPECT_EQ(-1, WebRtcVad_set_mode(vad_inst, -1));
+ EXPECT_EQ(-1, WebRtcVad_set_mode(vad_inst, 4));
+
+ for (i = 0; i < sizeof(nMode)/sizeof(nMode[0]); i++) {
+ EXPECT_EQ(WebRtcVad_set_mode(vad_inst, nMode[i]), 0);
+ }
+
+ // WebRtcVad_Process() tests
+ EXPECT_EQ(-1, WebRtcVad_Process(vad_inst, fs[0], NULL, framelen[0][0]));
+ EXPECT_EQ(-1, WebRtcVad_Process(vad_inst, 12000, speech, framelen[0][0]));
+ EXPECT_EQ(-1, WebRtcVad_Process(vad_inst, fs[0], speech, framelen[1][1]));
+ EXPECT_EQ(WebRtcVad_Process(vad_inst, fs[0], zeros, framelen[0][0]), 0);
+ for (i = 0; i < sizeof(fs)/sizeof(fs[0]); i++) {
+ for (j = 0; j < sizeof(framelen[0])/sizeof(framelen[0][0]); j++) {
+ for (k = 0; k < sizeof(nMode)/sizeof(nMode[0]); k++) {
+ EXPECT_EQ(WebRtcVad_set_mode(vad_inst, nMode[k]), 0);
+// printf("%d\n", WebRtcVad_Process(vad_inst, fs[i], speech, framelen[i][j]));
+ if (vad_counter < 9)
+ {
+ EXPECT_EQ(WebRtcVad_Process(vad_inst, fs[i], speech, framelen[i][j]), 1);
+ } else
+ {
+ EXPECT_EQ(WebRtcVad_Process(vad_inst, fs[i], speech, framelen[i][j]), 0);
+ }
+ vad_counter++;
+ }
+ }
+ }
+
+ EXPECT_EQ(0, WebRtcVad_Free(vad_inst));
+
+}
+
+int main(int argc, char** argv) {
+ ::testing::InitGoogleTest(&argc, argv);
+ VadEnvironment* env = new VadEnvironment;
+ ::testing::AddGlobalTestEnvironment(env);
+
+ return RUN_ALL_TESTS();
+}
diff --git a/src/common_audio/vad/main/test/unit_test/unit_test.h b/src/common_audio/vad/main/test/unit_test/unit_test.h
new file mode 100644
index 0000000000..62dac11de4
--- /dev/null
+++ b/src/common_audio/vad/main/test/unit_test/unit_test.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/*
+ * This header file includes the declaration of the VAD unit test.
+ */
+
+#ifndef WEBRTC_VAD_UNIT_TEST_H_
+#define WEBRTC_VAD_UNIT_TEST_H_
+
+#include <gtest/gtest.h>
+
+class VadTest : public ::testing::Test {
+ protected:
+ VadTest();
+ virtual void SetUp();
+ virtual void TearDown();
+};
+
+#endif // WEBRTC_VAD_UNIT_TEST_H_