17 files changed, 2491 insertions, 0 deletions
diff --git a/src/common_audio/vad/main/interface/webrtc_vad.h b/src/common_audio/vad/main/interface/webrtc_vad.h
new file mode 100644
index 0000000000..6e3eb74ab5
--- /dev/null
+++ b/src/common_audio/vad/main/interface/webrtc_vad.h
@@ -0,0 +1,159 @@
+/*
+ *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/*
+ * This header file includes the VAD API calls. Specific function calls are given below.
+ */
+
+#ifndef WEBRTC_VAD_WEBRTC_VAD_H_
+#define WEBRTC_VAD_WEBRTC_VAD_H_
+
+#include "typedefs.h"
+
+typedef struct WebRtcVadInst VadInst;
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+/****************************************************************************
+ * WebRtcVad_get_version(...)
+ *
+ * This function returns the version number of the code.
+ *
+ * Output:
+ *      - version       : Pointer to a buffer where the version info will
+ *                        be stored.
+ * Input:
+ *      - size_bytes    : Size of the buffer.
+ *
+ */
+WebRtc_Word16 WebRtcVad_get_version(char *version, size_t size_bytes);
+
+/****************************************************************************
+ * WebRtcVad_AssignSize(...) 
+ *
+ * This functions get the size needed for storing the instance for encoder
+ * and decoder, respectively
+ *
+ * Input/Output:
+ *      - size_in_bytes : Pointer to integer where the size is returned
+ *
+ * Return value         : 0
+ */
+WebRtc_Word16 WebRtcVad_AssignSize(int *size_in_bytes);
+
+/****************************************************************************
+ * WebRtcVad_Assign(...) 
+ *
+ * This functions Assigns memory for the instances.
+ *
+ * Input:
+ *        - vad_inst_addr :  Address to where to assign memory
+ * Output:
+ *        - vad_inst      :  Pointer to the instance that should be created
+ *
+ * Return value           :  0 - Ok
+ *                          -1 - Error
+ */
+WebRtc_Word16 WebRtcVad_Assign(VadInst **vad_inst, void *vad_inst_addr);
+
+/****************************************************************************
+ * WebRtcVad_Create(...)
+ *
+ * This function creates an instance to the VAD structure
+ *
+ * Input:
+ *      - vad_inst      : Pointer to VAD instance that should be created
+ *
+ * Output:
+ *      - vad_inst      : Pointer to created VAD instance
+ *
+ * Return value         :  0 - Ok
+ *                        -1 - Error
+ */
+WebRtc_Word16 WebRtcVad_Create(VadInst **vad_inst);
+
+/****************************************************************************
+ * WebRtcVad_Free(...)
+ *
+ * This function frees the dynamic memory of a specified VAD instance
+ *
+ * Input:
+ *      - vad_inst      : Pointer to VAD instance that should be freed
+ *
+ * Return value         :  0 - Ok
+ *                        -1 - Error
+ */
+WebRtc_Word16 WebRtcVad_Free(VadInst *vad_inst);
+
+/****************************************************************************
+ * WebRtcVad_Init(...)
+ *
+ * This function initializes a VAD instance
+ *
+ * Input:
+ *      - vad_inst      : Instance that should be initialized
+ *
+ * Output:
+ *      - vad_inst      : Initialized instance
+ *
+ * Return value         :  0 - Ok
+ *                        -1 - Error
+ */
+WebRtc_Word16 WebRtcVad_Init(VadInst *vad_inst);
+
+/****************************************************************************
+ * WebRtcVad_set_mode(...)
+ *
+ * This function initializes a VAD instance
+ *
+ * Input:
+ *      - vad_inst      : VAD instance
+ *      - mode          : Aggressiveness setting (0, 1, 2, or 3) 
+ *
+ * Output:
+ *      - vad_inst      : Initialized instance
+ *
+ * Return value         :  0 - Ok
+ *                        -1 - Error
+ */
+WebRtc_Word16 WebRtcVad_set_mode(VadInst *vad_inst, WebRtc_Word16 mode);
+
+/****************************************************************************
+ * WebRtcVad_Process(...)
+ * 
+ * This functions does a VAD for the inserted speech frame
+ *
+ * Input
+ *        - vad_inst     : VAD Instance. Needs to be initiated before call.
+ *        - fs           : sampling frequency (Hz): 8000, 16000, or 32000
+ *        - speech_frame : Pointer to speech frame buffer
+ *        - frame_length : Length of speech frame buffer in number of samples
+ *
+ * Output:
+ *        - vad_inst     : Updated VAD instance
+ *
+ * Return value          :  1 - Active Voice
+ *                          0 - Non-active Voice
+ *                         -1 - Error
+ */
+WebRtc_Word16 WebRtcVad_Process(VadInst *vad_inst,
+                                WebRtc_Word16 fs,
+                                WebRtc_Word16 *speech_frame,
+                                WebRtc_Word16 frame_length);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // WEBRTC_VAD_WEBRTC_VAD_H_
diff --git a/src/common_audio/vad/main/source/Android.mk b/src/common_audio/vad/main/source/Android.mk
new file mode 100644
index 0000000000..f52df935d1
--- /dev/null
+++ b/src/common_audio/vad/main/source/Android.mk
@@ -0,0 +1,64 @@
+# Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+#
+# Use of this source code is governed by a BSD-style license
+# that can be found in the LICENSE file in the root of the source
+# tree. An additional intellectual property rights grant can be found
+# in the file PATENTS.  All contributing project authors may
+# be found in the AUTHORS file in the root of the source tree.
+
+LOCAL_PATH := $(call my-dir)
+
+include $(CLEAR_VARS)
+
+LOCAL_ARM_MODE := arm
+LOCAL_MODULE_CLASS := STATIC_LIBRARIES
+LOCAL_MODULE := libwebrtc_vad
+LOCAL_MODULE_TAGS := optional
+LOCAL_GENERATED_SOURCES :=
+LOCAL_SRC_FILES := webrtc_vad.c \
+    vad_const.c \
+    vad_core.c \
+    vad_filterbank.c \
+    vad_gmm.c \
+    vad_sp.c
+
+# Flags passed to both C and C++ files.
+MY_CFLAGS :=  
+MY_CFLAGS_C :=
+MY_DEFS := '-DNO_TCMALLOC' \
+    '-DNO_HEAPCHECKER' \
+    '-DWEBRTC_TARGET_PC' \
+    '-DWEBRTC_LINUX' 
+ifeq ($(TARGET_ARCH),arm) 
+MY_DEFS += \
+    '-DWEBRTC_ANDROID' \
+    '-DANDROID' 
+endif
+LOCAL_CFLAGS := $(MY_CFLAGS_C) $(MY_CFLAGS) $(MY_DEFS)
+
+# Include paths placed before CFLAGS/CPPFLAGS
+LOCAL_C_INCLUDES := $(LOCAL_PATH)/../../../.. \
+    $(LOCAL_PATH)/../interface \
+    $(LOCAL_PATH)/../../../signal_processing_library/main/interface 
+
+# Flags passed to only C++ (and not C) files.
+LOCAL_CPPFLAGS := 
+
+LOCAL_LDFLAGS :=
+
+LOCAL_STATIC_LIBRARIES :=
+
+LOCAL_SHARED_LIBRARIES := libdl \
+    libstlport
+LOCAL_ADDITIONAL_DEPENDENCIES :=
+
+ifeq ($(TARGET_OS)-$(TARGET_SIMULATOR),linux-true)
+LOCAL_LDLIBS += -ldl -lpthread
+endif
+
+ifneq ($(TARGET_SIMULATOR),true)
+LOCAL_SHARED_LIBRARIES += libdl
+endif
+
+include external/stlport/libstlport.mk
+include $(BUILD_STATIC_LIBRARY)
diff --git a/src/common_audio/vad/main/source/vad.gyp b/src/common_audio/vad/main/source/vad.gyp
new file mode 100644
index 0000000000..754b684d5b
--- /dev/null
+++ b/src/common_audio/vad/main/source/vad.gyp
@@ -0,0 +1,51 @@
+# Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+#
+# Use of this source code is governed by a BSD-style license
+# that can be found in the LICENSE file in the root of the source
+# tree. An additional intellectual property rights grant can be found
+# in the file PATENTS.  All contributing project authors may
+# be found in the AUTHORS file in the root of the source tree.
+
+{
+  'includes': [
+    '../../../../common_settings.gypi', # Common settings
+  ],
+  'targets': [
+    {
+      'target_name': 'vad',
+      'type': '<(library)',
+      'dependencies': [
+        '../../../signal_processing_library/main/source/spl.gyp:spl',
+      ],
+      'include_dirs': [
+        '../interface',
+      ],
+      'direct_dependent_settings': {
+        'include_dirs': [
+          '../interface',
+        ],
+      },
+      'sources': [
+        '../interface/webrtc_vad.h',
+        'webrtc_vad.c',
+        'vad_const.c',
+        'vad_const.h',
+        'vad_defines.h',
+        'vad_core.c',
+        'vad_core.h',
+        'vad_filterbank.c',
+        'vad_filterbank.h',
+        'vad_gmm.c',
+        'vad_gmm.h',
+        'vad_sp.c',
+        'vad_sp.h',
+      ],
+    },
+  ],
+}
+
+# Local Variables:
+# tab-width:2
+# indent-tabs-mode:nil
+# End:
+# vim: set expandtab tabstop=2 shiftwidth=2:
diff --git a/src/common_audio/vad/main/source/vad_const.c b/src/common_audio/vad/main/source/vad_const.c
new file mode 100644
index 0000000000..47b6a4b8ca
--- /dev/null
+++ b/src/common_audio/vad/main/source/vad_const.c
@@ -0,0 +1,80 @@
+/*
+ *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+/*
+ * This file includes the constant values used internally in VAD.
+ */
+
+#include "vad_const.h"
+
+// Spectrum Weighting
+const WebRtc_Word16 kSpectrumWeight[6] = {6, 8, 10, 12, 14, 16};
+
+const WebRtc_Word16 kCompVar = 22005;
+
+// Constant 160*log10(2) in Q9
+const WebRtc_Word16 kLogConst = 24660;
+
+// Constant log2(exp(1)) in Q12
+const WebRtc_Word16 kLog10Const = 5909;
+
+// Q15
+const WebRtc_Word16 kNoiseUpdateConst = 655;
+const WebRtc_Word16 kSpeechUpdateConst = 6554;
+
+// Q8
+const WebRtc_Word16 kBackEta = 154;
+
+// Coefficients used by WebRtcVad_HpOutput, Q14
+const WebRtc_Word16 kHpZeroCoefs[3] = {6631, -13262, 6631};
+const WebRtc_Word16 kHpPoleCoefs[3] = {16384, -7756, 5620};
+
+// Allpass filter coefficients, upper and lower, in Q15
+// Upper: 0.64, Lower: 0.17
+const WebRtc_Word16 kAllPassCoefsQ15[2] = {20972, 5571};
+const WebRtc_Word16 kAllPassCoefsQ13[2] = {5243, 1392}; // Q13
+
+// Minimum difference between the two models, Q5
+const WebRtc_Word16 kMinimumDifference[6] = {544, 544, 576, 576, 576, 576};
+
+// Upper limit of mean value for speech model, Q7
+const WebRtc_Word16 kMaximumSpeech[6] = {11392, 11392, 11520, 11520, 11520, 11520};
+
+// Minimum value for mean value
+const WebRtc_Word16 kMinimumMean[2] = {640, 768};
+
+// Upper limit of mean value for noise model, Q7
+const WebRtc_Word16 kMaximumNoise[6] = {9216, 9088, 8960, 8832, 8704, 8576};
+
+// Adjustment for division with two in WebRtcVad_SplitFilter
+const WebRtc_Word16 kOffsetVector[6] = {368, 368, 272, 176, 176, 176};
+
+// Start values for the Gaussian models, Q7
+// Weights for the two Gaussians for the six channels (noise)
+const WebRtc_Word16 kNoiseDataWeights[12] = {34, 62, 72, 66, 53, 25, 94, 66, 56, 62, 75, 103};
+
+// Weights for the two Gaussians for the six channels (speech)
+const WebRtc_Word16 kSpeechDataWeights[12] = {48, 82, 45, 87, 50, 47, 80, 46, 83, 41, 78, 81};
+
+// Means for the two Gaussians for the six channels (noise)
+const WebRtc_Word16 kNoiseDataMeans[12] = {6738, 4892, 7065, 6715, 6771, 3369, 7646, 3863,
+        7820, 7266, 5020, 4362};
+
+// Means for the two Gaussians for the six channels (speech)
+const WebRtc_Word16 kSpeechDataMeans[12] = {8306, 10085, 10078, 11823, 11843, 6309, 9473,
+        9571, 10879, 7581, 8180, 7483};
+
+// Stds for the two Gaussians for the six channels (noise)
+const WebRtc_Word16 kNoiseDataStds[12] = {378, 1064, 493, 582, 688, 593, 474, 697, 475, 688,
+        421, 455};
+
+// Stds for the two Gaussians for the six channels (speech)
+const WebRtc_Word16 kSpeechDataStds[12] = {555, 505, 567, 524, 585, 1231, 509, 828, 492, 1540,
+        1079, 850};
diff --git a/src/common_audio/vad/main/source/vad_const.h b/src/common_audio/vad/main/source/vad_const.h
new file mode 100644
index 0000000000..89804379be
--- /dev/null
+++ b/src/common_audio/vad/main/source/vad_const.h
@@ -0,0 +1,59 @@
+/*
+ *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/*
+ * This header file includes the declarations of the internally used constants.
+ */
+
+#ifndef WEBRTC_VAD_CONST_H_
+#define WEBRTC_VAD_CONST_H_
+
+#include "typedefs.h"
+
+// TODO(ajm): give these internal-linkage by moving to the appropriate file
+// where possible, and otherwise tag with WebRtcVad_.
+
+// Spectrum Weighting
+extern const WebRtc_Word16 kSpectrumWeight[];
+extern const WebRtc_Word16 kCompVar;
+// Logarithm constant
+extern const WebRtc_Word16 kLogConst;
+extern const WebRtc_Word16 kLog10Const;
+// Q15
+extern const WebRtc_Word16 kNoiseUpdateConst;
+extern const WebRtc_Word16 kSpeechUpdateConst;
+// Q8
+extern const WebRtc_Word16 kBackEta;
+// Coefficients used by WebRtcVad_HpOutput, Q14
+extern const WebRtc_Word16 kHpZeroCoefs[];
+extern const WebRtc_Word16 kHpPoleCoefs[];
+// Allpass filter coefficients, upper and lower, in Q15 resp. Q13
+extern const WebRtc_Word16 kAllPassCoefsQ15[];
+extern const WebRtc_Word16 kAllPassCoefsQ13[];
+// Minimum difference between the two models, Q5
+extern const WebRtc_Word16 kMinimumDifference[];
+// Maximum value when updating the speech model, Q7
+extern const WebRtc_Word16 kMaximumSpeech[];
+// Minimum value for mean value
+extern const WebRtc_Word16 kMinimumMean[];
+// Upper limit of mean value for noise model, Q7
+extern const WebRtc_Word16 kMaximumNoise[];
+// Adjustment for division with two in WebRtcVad_SplitFilter
+extern const WebRtc_Word16 kOffsetVector[];
+// Start values for the Gaussian models, Q7
+extern const WebRtc_Word16 kNoiseDataWeights[];
+extern const WebRtc_Word16 kSpeechDataWeights[];
+extern const WebRtc_Word16 kNoiseDataMeans[];
+extern const WebRtc_Word16 kSpeechDataMeans[];
+extern const WebRtc_Word16 kNoiseDataStds[];
+extern const WebRtc_Word16 kSpeechDataStds[];
+
+#endif // WEBRTC_VAD_CONST_H_
diff --git a/src/common_audio/vad/main/source/vad_core.c b/src/common_audio/vad/main/source/vad_core.c
new file mode 100644
index 0000000000..e8829993d5
--- /dev/null
+++ b/src/common_audio/vad/main/source/vad_core.c
@@ -0,0 +1,685 @@
+/*
+ *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/*
+ * This file includes the implementation of the core functionality in VAD.
+ * For function description, see vad_core.h.
+ */
+
+#include "vad_core.h"
+#include "vad_const.h"
+#include "vad_defines.h"
+#include "vad_filterbank.h"
+#include "vad_gmm.h"
+#include "vad_sp.h"
+#include "signal_processing_library.h"
+
+static const int kInitCheck = 42;
+
+// Initialize VAD
+int WebRtcVad_InitCore(VadInstT *inst, short mode)
+{
+    int i;
+
+    // Initialization of struct
+    inst->vad = 1;
+    inst->frame_counter = 0;
+    inst->over_hang = 0;
+    inst->num_of_speech = 0;
+
+    // Initialization of downsampling filter state
+    inst->downsampling_filter_states[0] = 0;
+    inst->downsampling_filter_states[1] = 0;
+    inst->downsampling_filter_states[2] = 0;
+    inst->downsampling_filter_states[3] = 0;
+
+    // Read initial PDF parameters
+    for (i = 0; i < NUM_TABLE_VALUES; i++)
+    {
+        inst->noise_means[i] = kNoiseDataMeans[i];
+        inst->speech_means[i] = kSpeechDataMeans[i];
+        inst->noise_stds[i] = kNoiseDataStds[i];
+        inst->speech_stds[i] = kSpeechDataStds[i];
+    }
+
+    // Index and Minimum value vectors are initialized
+    for (i = 0; i < 16 * NUM_CHANNELS; i++)
+    {
+        inst->low_value_vector[i] = 10000;
+        inst->index_vector[i] = 0;
+    }
+
+    for (i = 0; i < 5; i++)
+    {
+        inst->upper_state[i] = 0;
+        inst->lower_state[i] = 0;
+    }
+
+    for (i = 0; i < 4; i++)
+    {
+        inst->hp_filter_state[i] = 0;
+    }
+
+    // Init mean value memory, for FindMin function
+    inst->mean_value[0] = 1600;
+    inst->mean_value[1] = 1600;
+    inst->mean_value[2] = 1600;
+    inst->mean_value[3] = 1600;
+    inst->mean_value[4] = 1600;
+    inst->mean_value[5] = 1600;
+
+    if (mode == 0)
+    {
+        // Quality mode
+        inst->over_hang_max_1[0] = OHMAX1_10MS_Q; // Overhang short speech burst
+        inst->over_hang_max_1[1] = OHMAX1_20MS_Q; // Overhang short speech burst
+        inst->over_hang_max_1[2] = OHMAX1_30MS_Q; // Overhang short speech burst
+        inst->over_hang_max_2[0] = OHMAX2_10MS_Q; // Overhang long speech burst
+        inst->over_hang_max_2[1] = OHMAX2_20MS_Q; // Overhang long speech burst
+        inst->over_hang_max_2[2] = OHMAX2_30MS_Q; // Overhang long speech burst
+
+        inst->individual[0] = INDIVIDUAL_10MS_Q;
+        inst->individual[1] = INDIVIDUAL_20MS_Q;
+        inst->individual[2] = INDIVIDUAL_30MS_Q;
+
+        inst->total[0] = TOTAL_10MS_Q;
+        inst->total[1] = TOTAL_20MS_Q;
+        inst->total[2] = TOTAL_30MS_Q;
+    } else if (mode == 1)
+    {
+        // Low bitrate mode
+        inst->over_hang_max_1[0] = OHMAX1_10MS_LBR; // Overhang short speech burst
+        inst->over_hang_max_1[1] = OHMAX1_20MS_LBR; // Overhang short speech burst
+        inst->over_hang_max_1[2] = OHMAX1_30MS_LBR; // Overhang short speech burst
+        inst->over_hang_max_2[0] = OHMAX2_10MS_LBR; // Overhang long speech burst
+        inst->over_hang_max_2[1] = OHMAX2_20MS_LBR; // Overhang long speech burst
+        inst->over_hang_max_2[2] = OHMAX2_30MS_LBR; // Overhang long speech burst
+
+        inst->individual[0] = INDIVIDUAL_10MS_LBR;
+        inst->individual[1] = INDIVIDUAL_20MS_LBR;
+        inst->individual[2] = INDIVIDUAL_30MS_LBR;
+
+        inst->total[0] = TOTAL_10MS_LBR;
+        inst->total[1] = TOTAL_20MS_LBR;
+        inst->total[2] = TOTAL_30MS_LBR;
+    } else if (mode == 2)
+    {
+        // Aggressive mode
+        inst->over_hang_max_1[0] = OHMAX1_10MS_AGG; // Overhang short speech burst
+        inst->over_hang_max_1[1] = OHMAX1_20MS_AGG; // Overhang short speech burst
+        inst->over_hang_max_1[2] = OHMAX1_30MS_AGG; // Overhang short speech burst
+        inst->over_hang_max_2[0] = OHMAX2_10MS_AGG; // Overhang long speech burst
+        inst->over_hang_max_2[1] = OHMAX2_20MS_AGG; // Overhang long speech burst
+        inst->over_hang_max_2[2] = OHMAX2_30MS_AGG; // Overhang long speech burst
+
+        inst->individual[0] = INDIVIDUAL_10MS_AGG;
+        inst->individual[1] = INDIVIDUAL_20MS_AGG;
+        inst->individual[2] = INDIVIDUAL_30MS_AGG;
+
+        inst->total[0] = TOTAL_10MS_AGG;
+        inst->total[1] = TOTAL_20MS_AGG;
+        inst->total[2] = TOTAL_30MS_AGG;
+    } else
+    {
+        // Very aggressive mode
+        inst->over_hang_max_1[0] = OHMAX1_10MS_VAG; // Overhang short speech burst
+        inst->over_hang_max_1[1] = OHMAX1_20MS_VAG; // Overhang short speech burst
+        inst->over_hang_max_1[2] = OHMAX1_30MS_VAG; // Overhang short speech burst
+        inst->over_hang_max_2[0] = OHMAX2_10MS_VAG; // Overhang long speech burst
+        inst->over_hang_max_2[1] = OHMAX2_20MS_VAG; // Overhang long speech burst
+        inst->over_hang_max_2[2] = OHMAX2_30MS_VAG; // Overhang long speech burst
+
+        inst->individual[0] = INDIVIDUAL_10MS_VAG;
+        inst->individual[1] = INDIVIDUAL_20MS_VAG;
+        inst->individual[2] = INDIVIDUAL_30MS_VAG;
+
+        inst->total[0] = TOTAL_10MS_VAG;
+        inst->total[1] = TOTAL_20MS_VAG;
+        inst->total[2] = TOTAL_30MS_VAG;
+    }
+
+    inst->init_flag = kInitCheck;
+
+    return 0;
+}
+
+// Set aggressiveness mode
+int WebRtcVad_set_mode_core(VadInstT *inst, short mode)
+{
+
+    if (mode == 0)
+    {
+        // Quality mode
+        inst->over_hang_max_1[0] = OHMAX1_10MS_Q; // Overhang short speech burst
+        inst->over_hang_max_1[1] = OHMAX1_20MS_Q; // Overhang short speech burst
+        inst->over_hang_max_1[2] = OHMAX1_30MS_Q; // Overhang short speech burst
+        inst->over_hang_max_2[0] = OHMAX2_10MS_Q; // Overhang long speech burst
+        inst->over_hang_max_2[1] = OHMAX2_20MS_Q; // Overhang long speech burst
+        inst->over_hang_max_2[2] = OHMAX2_30MS_Q; // Overhang long speech burst
+
+        inst->individual[0] = INDIVIDUAL_10MS_Q;
+        inst->individual[1] = INDIVIDUAL_20MS_Q;
+        inst->individual[2] = INDIVIDUAL_30MS_Q;
+
+        inst->total[0] = TOTAL_10MS_Q;
+        inst->total[1] = TOTAL_20MS_Q;
+        inst->total[2] = TOTAL_30MS_Q;
+    } else if (mode == 1)
+    {
+        // Low bitrate mode
+        inst->over_hang_max_1[0] = OHMAX1_10MS_LBR; // Overhang short speech burst
+        inst->over_hang_max_1[1] = OHMAX1_20MS_LBR; // Overhang short speech burst
+        inst->over_hang_max_1[2] = OHMAX1_30MS_LBR; // Overhang short speech burst
+        inst->over_hang_max_2[0] = OHMAX2_10MS_LBR; // Overhang long speech burst
+        inst->over_hang_max_2[1] = OHMAX2_20MS_LBR; // Overhang long speech burst
+        inst->over_hang_max_2[2] = OHMAX2_30MS_LBR; // Overhang long speech burst
+
+        inst->individual[0] = INDIVIDUAL_10MS_LBR;
+        inst->individual[1] = INDIVIDUAL_20MS_LBR;
+        inst->individual[2] = INDIVIDUAL_30MS_LBR;
+
+        inst->total[0] = TOTAL_10MS_LBR;
+        inst->total[1] = TOTAL_20MS_LBR;
+        inst->total[2] = TOTAL_30MS_LBR;
+    } else if (mode == 2)
+    {
+        // Aggressive mode
+        inst->over_hang_max_1[0] = OHMAX1_10MS_AGG; // Overhang short speech burst
+        inst->over_hang_max_1[1] = OHMAX1_20MS_AGG; // Overhang short speech burst
+        inst->over_hang_max_1[2] = OHMAX1_30MS_AGG; // Overhang short speech burst
+        inst->over_hang_max_2[0] = OHMAX2_10MS_AGG; // Overhang long speech burst
+        inst->over_hang_max_2[1] = OHMAX2_20MS_AGG; // Overhang long speech burst
+        inst->over_hang_max_2[2] = OHMAX2_30MS_AGG; // Overhang long speech burst
+
+        inst->individual[0] = INDIVIDUAL_10MS_AGG;
+        inst->individual[1] = INDIVIDUAL_20MS_AGG;
+        inst->individual[2] = INDIVIDUAL_30MS_AGG;
+
+        inst->total[0] = TOTAL_10MS_AGG;
+        inst->total[1] = TOTAL_20MS_AGG;
+        inst->total[2] = TOTAL_30MS_AGG;
+    } else if (mode == 3)
+    {
+        // Very aggressive mode
+        inst->over_hang_max_1[0] = OHMAX1_10MS_VAG; // Overhang short speech burst
+        inst->over_hang_max_1[1] = OHMAX1_20MS_VAG; // Overhang short speech burst
+        inst->over_hang_max_1[2] = OHMAX1_30MS_VAG; // Overhang short speech burst
+        inst->over_hang_max_2[0] = OHMAX2_10MS_VAG; // Overhang long speech burst
+        inst->over_hang_max_2[1] = OHMAX2_20MS_VAG; // Overhang long speech burst
+        inst->over_hang_max_2[2] = OHMAX2_30MS_VAG; // Overhang long speech burst
+
+        inst->individual[0] = INDIVIDUAL_10MS_VAG;
+        inst->individual[1] = INDIVIDUAL_20MS_VAG;
+        inst->individual[2] = INDIVIDUAL_30MS_VAG;
+
+        inst->total[0] = TOTAL_10MS_VAG;
+        inst->total[1] = TOTAL_20MS_VAG;
+        inst->total[2] = TOTAL_30MS_VAG;
+    } else
+    {
+        return -1;
+    }
+
+    return 0;
+}
+
+// Calculate VAD decision by first extracting feature values and then calculate
+// probability for both speech and background noise.
+
+WebRtc_Word16 WebRtcVad_CalcVad32khz(VadInstT *inst, WebRtc_Word16 *speech_frame,
+                                     int frame_length)
+{
+    WebRtc_Word16 len, vad;
+    WebRtc_Word16 speechWB[480]; // Downsampled speech frame: 960 samples (30ms in SWB)
+    WebRtc_Word16 speechNB[240]; // Downsampled speech frame: 480 samples (30ms in WB)
+
+
+    // Downsample signal 32->16->8 before doing VAD
+    WebRtcVad_Downsampling(speech_frame, speechWB, &(inst->downsampling_filter_states[2]),
+                           frame_length);
+    len = WEBRTC_SPL_RSHIFT_W16(frame_length, 1);
+
+    WebRtcVad_Downsampling(speechWB, speechNB, inst->downsampling_filter_states, len);
+    len = WEBRTC_SPL_RSHIFT_W16(len, 1);
+
+    // Do VAD on an 8 kHz signal
+    vad = WebRtcVad_CalcVad8khz(inst, speechNB, len);
+
+    return vad;
+}
+
+WebRtc_Word16 WebRtcVad_CalcVad16khz(VadInstT *inst, WebRtc_Word16 *speech_frame,
+                                     int frame_length)
+{
+    WebRtc_Word16 len, vad;
+    WebRtc_Word16 speechNB[240]; // Downsampled speech frame: 480 samples (30ms in WB)
+
+    // Wideband: Downsample signal before doing VAD
+    WebRtcVad_Downsampling(speech_frame, speechNB, inst->downsampling_filter_states,
+                           frame_length);
+
+    len = WEBRTC_SPL_RSHIFT_W16(frame_length, 1);
+    vad = WebRtcVad_CalcVad8khz(inst, speechNB, len);
+
+    return vad;
+}
+
+WebRtc_Word16 WebRtcVad_CalcVad8khz(VadInstT *inst, WebRtc_Word16 *speech_frame,
+                                    int frame_length)
+{
+    WebRtc_Word16 feature_vector[NUM_CHANNELS], total_power;
+
+    // Get power in the bands
+    total_power = WebRtcVad_get_features(inst, speech_frame, frame_length, feature_vector);
+
+    // Make a VAD
+    inst->vad = WebRtcVad_GmmProbability(inst, feature_vector, total_power, frame_length);
+
+    return inst->vad;
+}
+
+// Calculate probability for both speech and background noise, and perform a
+// hypothesis-test.
+WebRtc_Word16 WebRtcVad_GmmProbability(VadInstT *inst, WebRtc_Word16 *feature_vector,
+                                       WebRtc_Word16 total_power, int frame_length)
+{
+    int n, k;
+    WebRtc_Word16 backval;
+    WebRtc_Word16 h0, h1;
+    WebRtc_Word16 ratvec, xval;
+    WebRtc_Word16 vadflag;
+    WebRtc_Word16 shifts0, shifts1;
+    WebRtc_Word16 tmp16, tmp16_1, tmp16_2;
+    WebRtc_Word16 diff, nr, pos;
+    WebRtc_Word16 nmk, nmk2, nmk3, smk, smk2, nsk, ssk;
+    WebRtc_Word16 delt, ndelt;
+    WebRtc_Word16 maxspe, maxmu;
+    WebRtc_Word16 deltaN[NUM_TABLE_VALUES], deltaS[NUM_TABLE_VALUES];
+    WebRtc_Word16 ngprvec[NUM_TABLE_VALUES], sgprvec[NUM_TABLE_VALUES];
+    WebRtc_Word32 h0test, h1test;
+    WebRtc_Word32 tmp32_1, tmp32_2;
+    WebRtc_Word32 dotVal;
+    WebRtc_Word32 nmid, smid;
+    WebRtc_Word32 probn[NUM_MODELS], probs[NUM_MODELS];
+    WebRtc_Word16 *nmean1ptr, *nmean2ptr, *smean1ptr, *smean2ptr, *nstd1ptr, *nstd2ptr,
+            *sstd1ptr, *sstd2ptr;
+    WebRtc_Word16 overhead1, overhead2, individualTest, totalTest;
+
+    // Set the thresholds to different values based on frame length
+    if (frame_length == 80)
+    {
+        // 80 input samples
+        overhead1 = inst->over_hang_max_1[0];
+        overhead2 = inst->over_hang_max_2[0];
+        individualTest = inst->individual[0];
+        totalTest = inst->total[0];
+    } else if (frame_length == 160)
+    {
+        // 160 input samples
+        overhead1 = inst->over_hang_max_1[1];
+        overhead2 = inst->over_hang_max_2[1];
+        individualTest = inst->individual[1];
+        totalTest = inst->total[1];
+    } else
+    {
+        // 240 input samples
+        overhead1 = inst->over_hang_max_1[2];
+        overhead2 = inst->over_hang_max_2[2];
+        individualTest = inst->individual[2];
+        totalTest = inst->total[2];
+    }
+
+    if (total_power > MIN_ENERGY)
+    { // If signal present at all
+
+        // Set pointers to the gaussian parameters
+        nmean1ptr = &inst->noise_means[0];
+        nmean2ptr = &inst->noise_means[NUM_CHANNELS];
+        smean1ptr = &inst->speech_means[0];
+        smean2ptr = &inst->speech_means[NUM_CHANNELS];
+        nstd1ptr = &inst->noise_stds[0];
+        nstd2ptr = &inst->noise_stds[NUM_CHANNELS];
+        sstd1ptr = &inst->speech_stds[0];
+        sstd2ptr = &inst->speech_stds[NUM_CHANNELS];
+
+        vadflag = 0;
+        dotVal = 0;
+        for (n = 0; n < NUM_CHANNELS; n++)
+        { // For all channels
+
+            pos = WEBRTC_SPL_LSHIFT_W16(n, 1);
+            xval = feature_vector[n];
+
+            // Probability for Noise, Q7 * Q20 = Q27
+            tmp32_1 = WebRtcVad_GaussianProbability(xval, *nmean1ptr++, *nstd1ptr++,
+                                                    &deltaN[pos]);
+            probn[0] = (WebRtc_Word32)(kNoiseDataWeights[n] * tmp32_1);
+            tmp32_1 = WebRtcVad_GaussianProbability(xval, *nmean2ptr++, *nstd2ptr++,
+                                                    &deltaN[pos + 1]);
+            probn[1] = (WebRtc_Word32)(kNoiseDataWeights[n + NUM_CHANNELS] * tmp32_1);
+            h0test = probn[0] + probn[1]; // Q27
+            h0 = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(h0test, 12); // Q15
+
+            // Probability for Speech
+            tmp32_1 = WebRtcVad_GaussianProbability(xval, *smean1ptr++, *sstd1ptr++,
+                                                    &deltaS[pos]);
+            probs[0] = (WebRtc_Word32)(kSpeechDataWeights[n] * tmp32_1);
+            tmp32_1 = WebRtcVad_GaussianProbability(xval, *smean2ptr++, *sstd2ptr++,
+                                                    &deltaS[pos + 1]);
+            probs[1] = (WebRtc_Word32)(kSpeechDataWeights[n + NUM_CHANNELS] * tmp32_1);
+            h1test = probs[0] + probs[1]; // Q27
+            h1 = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(h1test, 12); // Q15
+
+            // Get likelihood ratio. Approximate log2(H1/H0) with shifts0 - shifts1
+            shifts0 = WebRtcSpl_NormW32(h0test);
+            shifts1 = WebRtcSpl_NormW32(h1test);
+
+            if ((h0test > 0) && (h1test > 0))
+            {
+                ratvec = shifts0 - shifts1;
+            } else if (h1test > 0)
+            {
+                ratvec = 31 - shifts1;
+            } else if (h0test > 0)
+            {
+                ratvec = shifts0 - 31;
+            } else
+            {
+                ratvec = 0;
+            }
+
+            // VAD decision with spectrum weighting
+            dotVal += WEBRTC_SPL_MUL_16_16(ratvec, kSpectrumWeight[n]);
+
+            // Individual channel test
+            if ((ratvec << 2) > individualTest)
+            {
+                vadflag = 1;
+            }
+
+            // Probabilities used when updating model
+            if (h0 > 0)
+            {
+                tmp32_1 = probn[0] & 0xFFFFF000; // Q27
+                tmp32_2 = WEBRTC_SPL_LSHIFT_W32(tmp32_1, 2); // Q29
+                ngprvec[pos] = (WebRtc_Word16)WebRtcSpl_DivW32W16(tmp32_2, h0);
+                ngprvec[pos + 1] = 16384 - ngprvec[pos];
+            } else
+            {
+                ngprvec[pos] = 16384;
+                ngprvec[pos + 1] = 0;
+            }
+
+            // Probabilities used when updating model
+            if (h1 > 0)
+            {
+                tmp32_1 = probs[0] & 0xFFFFF000;
+                tmp32_2 = WEBRTC_SPL_LSHIFT_W32(tmp32_1, 2);
+                sgprvec[pos] = (WebRtc_Word16)WebRtcSpl_DivW32W16(tmp32_2, h1);
+                sgprvec[pos + 1] = 16384 - sgprvec[pos];
+            } else
+            {
+                sgprvec[pos] = 0;
+                sgprvec[pos + 1] = 0;
+            }
+        }
+
+        // Overall test
+        if (dotVal >= totalTest)
+        {
+            vadflag |= 1;
+        }
+
+        // Set pointers to the means and standard deviations.
+        nmean1ptr = &inst->noise_means[0];
+        smean1ptr = &inst->speech_means[0];
+        nstd1ptr = &inst->noise_stds[0];
+        sstd1ptr = &inst->speech_stds[0];
+
+        maxspe = 12800;
+
+        // Update the model's parameters
+        for (n = 0; n < NUM_CHANNELS; n++)
+        {
+
+            pos = WEBRTC_SPL_LSHIFT_W16(n, 1);
+
+            // Get min value in past which is used for long term correction
+            backval = WebRtcVad_FindMinimum(inst, feature_vector[n], n); // Q4
+
+            // Compute the "global" mean, that is the sum of the two means weighted
+            nmid = WEBRTC_SPL_MUL_16_16(kNoiseDataWeights[n], *nmean1ptr); // Q7 * Q7
+            nmid += WEBRTC_SPL_MUL_16_16(kNoiseDataWeights[n+NUM_CHANNELS],
+                    *(nmean1ptr+NUM_CHANNELS));
+            tmp16_1 = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(nmid, 6); // Q8
+
+            for (k = 0; k < NUM_MODELS; k++)
+            {
+
+                nr = pos + k;
+
+                nmean2ptr = nmean1ptr + k * NUM_CHANNELS;
+                smean2ptr = smean1ptr + k * NUM_CHANNELS;
+                nstd2ptr = nstd1ptr + k * NUM_CHANNELS;
+                sstd2ptr = sstd1ptr + k * NUM_CHANNELS;
+                nmk = *nmean2ptr;
+                smk = *smean2ptr;
+                nsk = *nstd2ptr;
+                ssk = *sstd2ptr;
+
+                // Update noise mean vector if the frame consists of noise only
+                nmk2 = nmk;
+                if (!vadflag)
+                {
+                    // deltaN = (x-mu)/sigma^2
+                    // ngprvec[k] = probn[k]/(probn[0] + probn[1])
+
+                    delt = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(ngprvec[nr],
+                            deltaN[nr], 11); // Q14*Q11
+                    nmk2 = nmk + (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(delt,
+                            kNoiseUpdateConst,
+                            22); // Q7+(Q14*Q15>>22)
+                }
+
+                // Long term correction of the noise mean
+                ndelt = WEBRTC_SPL_LSHIFT_W16(backval, 4);
+                ndelt -= tmp16_1; // Q8 - Q8
+                nmk3 = nmk2 + (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(ndelt,
+                        kBackEta,
+                        9); // Q7+(Q8*Q8)>>9
+
+                // Control that the noise mean does not drift to much
+                tmp16 = WEBRTC_SPL_LSHIFT_W16(k+5, 7);
+                if (nmk3 < tmp16)
+                    nmk3 = tmp16;
+                tmp16 = WEBRTC_SPL_LSHIFT_W16(72+k-n, 7);
+                if (nmk3 > tmp16)
+                    nmk3 = tmp16;
+                *nmean2ptr = nmk3;
+
+                if (vadflag)
+                {
+                    // Update speech mean vector:
+                    // deltaS = (x-mu)/sigma^2
+                    // sgprvec[k] = probn[k]/(probn[0] + probn[1])
+
+                    delt = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(sgprvec[nr],
+                            deltaS[nr],
+                            11); // (Q14*Q11)>>11=Q14
+                    tmp16 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(delt,
+                            kSpeechUpdateConst,
+                            21) + 1;
+                    smk2 = smk + (tmp16 >> 1); // Q7 + (Q14 * Q15 >> 22)
+
+                    // Control that the speech mean does not drift to much
+                    maxmu = maxspe + 640;
+                    if (smk2 < kMinimumMean[k])
+                        smk2 = kMinimumMean[k];
+                    if (smk2 > maxmu)
+                        smk2 = maxmu;
+
+                    *smean2ptr = smk2;
+
+                    // (Q7>>3) = Q4
+                    tmp16 = WEBRTC_SPL_RSHIFT_W16((smk + 4), 3);
+
+                    tmp16 = feature_vector[n] - tmp16; // Q4
+                    tmp32_1 = WEBRTC_SPL_MUL_16_16_RSFT(deltaS[nr], tmp16, 3);
+                    tmp32_2 = tmp32_1 - (WebRtc_Word32)4096; // Q12
+                    tmp16 = WEBRTC_SPL_RSHIFT_W16((sgprvec[nr]), 2);
+                    tmp32_1 = (WebRtc_Word32)(tmp16 * tmp32_2);// (Q15>>3)*(Q14>>2)=Q12*Q12=Q24
+
+                    tmp32_2 = WEBRTC_SPL_RSHIFT_W32(tmp32_1, 4); // Q20
+
+                    // 0.1 * Q20 / Q7 = Q13
+                    if (tmp32_2 > 0)
+                        tmp16 = (WebRtc_Word16)WebRtcSpl_DivW32W16(tmp32_2, ssk * 10);
+                    else
+                    {
+                        tmp16 = (WebRtc_Word16)WebRtcSpl_DivW32W16(-tmp32_2, ssk * 10);
+                        tmp16 = -tmp16;
+                    }
+                    // divide by 4 giving an update factor of 0.025
+                    tmp16 += 128; // Rounding
+                    ssk += WEBRTC_SPL_RSHIFT_W16(tmp16, 8);
+                    // Division with 8 plus Q7
+                    if (ssk < MIN_STD)
+                        ssk = MIN_STD;
+                    *sstd2ptr = ssk;
+                } else
+                {
+                    // Update GMM variance vectors
+                    // deltaN * (feature_vector[n] - nmk) - 1, Q11 * Q4
+                    tmp16 = feature_vector[n] - WEBRTC_SPL_RSHIFT_W16(nmk, 3);
+
+                    // (Q15>>3) * (Q14>>2) = Q12 * Q12 = Q24
+                    tmp32_1 = WEBRTC_SPL_MUL_16_16_RSFT(deltaN[nr], tmp16, 3) - 4096;
+                    tmp16 = WEBRTC_SPL_RSHIFT_W16((ngprvec[nr]+2), 2);
+                    tmp32_2 = (WebRtc_Word32)(tmp16 * tmp32_1);
+                    tmp32_1 = WEBRTC_SPL_RSHIFT_W32(tmp32_2, 14);
+                    // Q20  * approx 0.001 (2^-10=0.0009766)
+
+                    // Q20 / Q7 = Q13
+                    tmp16 = (WebRtc_Word16)WebRtcSpl_DivW32W16(tmp32_1, nsk);
+                    if (tmp32_1 > 0)
+                        tmp16 = (WebRtc_Word16)WebRtcSpl_DivW32W16(tmp32_1, nsk);
+                    else
+                    {
+                        tmp16 = (WebRtc_Word16)WebRtcSpl_DivW32W16(-tmp32_1, nsk);
+                        tmp16 = -tmp16;
+                    }
+                    tmp16 += 32; // Rounding
+                    nsk += WEBRTC_SPL_RSHIFT_W16(tmp16, 6);
+
+                    if (nsk < MIN_STD)
+                        nsk = MIN_STD;
+
+                    *nstd2ptr = nsk;
+                }
+            }
+
+            // Separate models if they are too close - nmid in Q14
+            nmid = WEBRTC_SPL_MUL_16_16(kNoiseDataWeights[n], *nmean1ptr);
+            nmid += WEBRTC_SPL_MUL_16_16(kNoiseDataWeights[n+NUM_CHANNELS], *nmean2ptr);
+
+            // smid in Q14
+            smid = WEBRTC_SPL_MUL_16_16(kSpeechDataWeights[n], *smean1ptr);
+            smid += WEBRTC_SPL_MUL_16_16(kSpeechDataWeights[n+NUM_CHANNELS], *smean2ptr);
+
+            // diff = "global" speech mean - "global" noise mean
+            diff = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(smid, 9);
+            tmp16 = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(nmid, 9);
+            diff -= tmp16;
+
+            if (diff < kMinimumDifference[n])
+            {
+
+                tmp16 = kMinimumDifference[n] - diff; // Q5
+
+                // tmp16_1 = ~0.8 * (kMinimumDifference - diff) in Q7
+                // tmp16_2 = ~0.2 * (kMinimumDifference - diff) in Q7
+                tmp16_1 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(13, tmp16, 2);
+                tmp16_2 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(3, tmp16, 2);
+
+                // First Gauss, speech model
+                tmp16 = tmp16_1 + *smean1ptr;
+                *smean1ptr = tmp16;
+                smid = WEBRTC_SPL_MUL_16_16(tmp16, kSpeechDataWeights[n]);
+
+                // Second Gauss, speech model
+                tmp16 = tmp16_1 + *smean2ptr;
+                *smean2ptr = tmp16;
+                smid += WEBRTC_SPL_MUL_16_16(tmp16, kSpeechDataWeights[n+NUM_CHANNELS]);
+
+                // First Gauss, noise model
+                tmp16 = *nmean1ptr - tmp16_2;
+                *nmean1ptr = tmp16;
+
+                nmid = WEBRTC_SPL_MUL_16_16(tmp16, kNoiseDataWeights[n]);
+
+                // Second Gauss, noise model
+                tmp16 = *nmean2ptr - tmp16_2;
+                *nmean2ptr = tmp16;
+                nmid += WEBRTC_SPL_MUL_16_16(tmp16, kNoiseDataWeights[n+NUM_CHANNELS]);
+            }
+
+            // Control that the speech & noise means do not drift to much
+            maxspe = kMaximumSpeech[n];
+            tmp16_2 = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(smid, 7);
+            if (tmp16_2 > maxspe)
+            { // Upper limit of speech model
+                tmp16_2 -= maxspe;
+
+                *smean1ptr -= tmp16_2;
+                *smean2ptr -= tmp16_2;
+            }
+
+            tmp16_2 = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(nmid, 7);
+            if (tmp16_2 > kMaximumNoise[n])
+            {
+                tmp16_2 -= kMaximumNoise[n];
+
+                *nmean1ptr -= tmp16_2;
+                *nmean2ptr -= tmp16_2;
+            }
+
+            *nmean1ptr++;
+            *smean1ptr++;
+            *nstd1ptr++;
+            *sstd1ptr++;
+        }
+        inst->frame_counter++;
+    } else
+    {
+        vadflag = 0;
+    }
+
+    // Hangover smoothing
+    if (!vadflag)
+    {
+        if (inst->over_hang > 0)
+        {
+            vadflag = 2 + inst->over_hang;
+            inst->over_hang = inst->over_hang - 1;
+        }
+        inst->num_of_speech = 0;
+    } else
+    {
+        inst->num_of_speech = inst->num_of_speech + 1;
+        if (inst->num_of_speech > NSP_MAX)
+        {
+            inst->num_of_speech = NSP_MAX;
+            inst->over_hang = overhead2;
+        } else
+            inst->over_hang = overhead1;
+    }
+    return vadflag;
+}
diff --git a/src/common_audio/vad/main/source/vad_core.h b/src/common_audio/vad/main/source/vad_core.h
new file mode 100644
index 0000000000..544caf5ab3
--- /dev/null
+++ b/src/common_audio/vad/main/source/vad_core.h
@@ -0,0 +1,132 @@
+/*
+ *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/*
+ * This header file includes the descriptions of the core VAD calls.
+ */
+
+#ifndef WEBRTC_VAD_CORE_H_
+#define WEBRTC_VAD_CORE_H_
+
+#include "typedefs.h"
+#include "vad_defines.h"
+
+typedef struct VadInstT_
+{
+
+    WebRtc_Word16 vad;
+    WebRtc_Word32 downsampling_filter_states[4];
+    WebRtc_Word16 noise_means[NUM_TABLE_VALUES];
+    WebRtc_Word16 speech_means[NUM_TABLE_VALUES];
+    WebRtc_Word16 noise_stds[NUM_TABLE_VALUES];
+    WebRtc_Word16 speech_stds[NUM_TABLE_VALUES];
+    WebRtc_Word32 frame_counter;
+    WebRtc_Word16 over_hang; // Over Hang
+    WebRtc_Word16 num_of_speech;
+    WebRtc_Word16 index_vector[16 * NUM_CHANNELS];
+    WebRtc_Word16 low_value_vector[16 * NUM_CHANNELS];
+    WebRtc_Word16 mean_value[NUM_CHANNELS];
+    WebRtc_Word16 upper_state[5];
+    WebRtc_Word16 lower_state[5];
+    WebRtc_Word16 hp_filter_state[4];
+    WebRtc_Word16 over_hang_max_1[3];
+    WebRtc_Word16 over_hang_max_2[3];
+    WebRtc_Word16 individual[3];
+    WebRtc_Word16 total[3];
+
+    short init_flag;
+
+} VadInstT;
+
+/****************************************************************************
+ * WebRtcVad_InitCore(...)
+ *
+ * This function initializes a VAD instance
+ *
+ * Input:
+ *      - inst      : Instance that should be initialized
+ *      - mode      : Aggressiveness degree
+ *                    0 (High quality) - 3 (Highly aggressive)
+ *
+ * Output:
+ *      - inst      : Initialized instance
+ *
+ * Return value     :  0 - Ok
+ *                    -1 - Error
+ */
+int WebRtcVad_InitCore(VadInstT* inst, short mode);
+
+/****************************************************************************
+ * WebRtcVad_set_mode_core(...)
+ *
+ * This function changes the VAD settings
+ *
+ * Input:
+ *      - inst      : VAD instance
+ *      - mode      : Aggressiveness degree
+ *                    0 (High quality) - 3 (Highly aggressive)
+ *
+ * Output:
+ *      - inst      : Changed  instance
+ *
+ * Return value     :  0 - Ok
+ *                    -1 - Error
+ */
+
+int WebRtcVad_set_mode_core(VadInstT* inst, short mode);
+
+/****************************************************************************
+ * WebRtcVad_CalcVad32khz(...) 
+ * WebRtcVad_CalcVad16khz(...) 
+ * WebRtcVad_CalcVad8khz(...) 
+ *
+ * Calculate probability for active speech and make VAD decision.
+ *
+ * Input:
+ *      - inst          : Instance that should be initialized
+ *      - speech_frame  : Input speech frame
+ *      - frame_length  : Number of input samples
+ *
+ * Output:
+ *      - inst          : Updated filter states etc.
+ *
+ * Return value         : VAD decision
+ *                        0 - No active speech
+ *                        1-6 - Active speech
+ */
+WebRtc_Word16 WebRtcVad_CalcVad32khz(VadInstT* inst, WebRtc_Word16* speech_frame,
+                                     int frame_length);
+WebRtc_Word16 WebRtcVad_CalcVad16khz(VadInstT* inst, WebRtc_Word16* speech_frame,
+                                     int frame_length);
+WebRtc_Word16 WebRtcVad_CalcVad8khz(VadInstT* inst, WebRtc_Word16* speech_frame,
+                                    int frame_length);
+
+/****************************************************************************
+ * WebRtcVad_GmmProbability(...)
+ *
+ * This function calculates the probabilities for background noise and
+ * speech using Gaussian Mixture Models. A hypothesis-test is performed to decide
+ * which type of signal is most probable.
+ *
+ * Input:
+ *      - inst              : Pointer to VAD instance
+ *      - feature_vector    : Feature vector = log10(energy in frequency band)
+ *      - total_power       : Total power in frame.
+ *      - frame_length      : Number of input samples
+ *
+ * Output:
+ *      VAD decision        : 0 - noise, 1 - speech
+ *    
+ */
+WebRtc_Word16 WebRtcVad_GmmProbability(VadInstT* inst, WebRtc_Word16* feature_vector,
+                                       WebRtc_Word16 total_power, int frame_length);
+
+#endif // WEBRTC_VAD_CORE_H_
diff --git a/src/common_audio/vad/main/source/vad_defines.h b/src/common_audio/vad/main/source/vad_defines.h
new file mode 100644
index 0000000000..b33af2ef7d
--- /dev/null
+++ b/src/common_audio/vad/main/source/vad_defines.h
@@ -0,0 +1,95 @@
+/*
+ *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/*
+ * This header file includes the macros used in VAD.
+ */
+
+#ifndef WEBRTC_VAD_DEFINES_H_
+#define WEBRTC_VAD_DEFINES_H_
+
+#define NUM_CHANNELS        6   // Eight frequency bands
+#define NUM_MODELS          2   // Number of Gaussian models
+#define NUM_TABLE_VALUES    NUM_CHANNELS * NUM_MODELS
+
+#define MIN_ENERGY          10
+#define ALPHA1              6553    // 0.2 in Q15
+#define ALPHA2              32439   // 0.99 in Q15
+#define NSP_MAX             6       // Maximum number of VAD=1 frames in a row counted
+#define MIN_STD             384     // Minimum standard deviation
+// Mode 0, Quality thresholds - Different thresholds for the different frame lengths
+#define INDIVIDUAL_10MS_Q   24
+#define INDIVIDUAL_20MS_Q   21      // (log10(2)*66)<<2 ~=16
+#define INDIVIDUAL_30MS_Q   24
+
+#define TOTAL_10MS_Q        57
+#define TOTAL_20MS_Q        48
+#define TOTAL_30MS_Q        57
+
+#define OHMAX1_10MS_Q       8  // Max Overhang 1
+#define OHMAX2_10MS_Q       14 // Max Overhang 2
+#define OHMAX1_20MS_Q       4  // Max Overhang 1
+#define OHMAX2_20MS_Q       7  // Max Overhang 2
+#define OHMAX1_30MS_Q       3
+#define OHMAX2_30MS_Q       5
+
+// Mode 1, Low bitrate thresholds - Different thresholds for the different frame lengths
+#define INDIVIDUAL_10MS_LBR 37
+#define INDIVIDUAL_20MS_LBR 32
+#define INDIVIDUAL_30MS_LBR 37
+
+#define TOTAL_10MS_LBR      100
+#define TOTAL_20MS_LBR      80
+#define TOTAL_30MS_LBR      100
+
+#define OHMAX1_10MS_LBR     8  // Max Overhang 1
+#define OHMAX2_10MS_LBR     14 // Max Overhang 2
+#define OHMAX1_20MS_LBR     4
+#define OHMAX2_20MS_LBR     7
+
+#define OHMAX1_30MS_LBR     3
+#define OHMAX2_30MS_LBR     5
+
+// Mode 2, Very aggressive thresholds - Different thresholds for the different frame lengths
+#define INDIVIDUAL_10MS_AGG 82
+#define INDIVIDUAL_20MS_AGG 78
+#define INDIVIDUAL_30MS_AGG 82
+
+#define TOTAL_10MS_AGG      285 //580
+#define TOTAL_20MS_AGG      260
+#define TOTAL_30MS_AGG      285
+
+#define OHMAX1_10MS_AGG     6  // Max Overhang 1
+#define OHMAX2_10MS_AGG     9  // Max Overhang 2
+#define OHMAX1_20MS_AGG     3
+#define OHMAX2_20MS_AGG     5
+
+#define OHMAX1_30MS_AGG     2
+#define OHMAX2_30MS_AGG     3
+
+// Mode 3, Super aggressive thresholds - Different thresholds for the different frame lengths
+#define INDIVIDUAL_10MS_VAG 94
+#define INDIVIDUAL_20MS_VAG 94
+#define INDIVIDUAL_30MS_VAG 94
+
+#define TOTAL_10MS_VAG      1100 //1700
+#define TOTAL_20MS_VAG      1050
+#define TOTAL_30MS_VAG      1100
+
+#define OHMAX1_10MS_VAG     6  // Max Overhang 1
+#define OHMAX2_10MS_VAG     9  // Max Overhang 2
+#define OHMAX1_20MS_VAG     3
+#define OHMAX2_20MS_VAG     5
+
+#define OHMAX1_30MS_VAG     2
+#define OHMAX2_30MS_VAG     3
+
+#endif // WEBRTC_VAD_DEFINES_H_
diff --git a/src/common_audio/vad/main/source/vad_filterbank.c b/src/common_audio/vad/main/source/vad_filterbank.c
new file mode 100644
index 0000000000..11392c917a
--- /dev/null
+++ b/src/common_audio/vad/main/source/vad_filterbank.c
@@ -0,0 +1,267 @@
+/*
+ *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/*
+ * This file includes the implementation of the internal filterbank associated functions.
+ * For function description, see vad_filterbank.h.
+ */
+
+#include "vad_filterbank.h"
+#include "vad_defines.h"
+#include "vad_const.h"
+#include "signal_processing_library.h"
+
+void WebRtcVad_HpOutput(WebRtc_Word16 *in_vector,
+                        WebRtc_Word16 in_vector_length,
+                        WebRtc_Word16 *out_vector,
+                        WebRtc_Word16 *filter_state)
+{
+    WebRtc_Word16 i, *pi, *outPtr;
+    WebRtc_Word32 tmpW32;
+
+    pi = &in_vector[0];
+    outPtr = &out_vector[0];
+
+    // The sum of the absolute values of the impulse response:
+    // The zero/pole-filter has a max amplification of a single sample of: 1.4546
+    // Impulse response: 0.4047 -0.6179 -0.0266  0.1993  0.1035  -0.0194
+    // The all-zero section has a max amplification of a single sample of: 1.6189
+    // Impulse response: 0.4047 -0.8094  0.4047  0       0        0
+    // The all-pole section has a max amplification of a single sample of: 1.9931
+    // Impulse response: 1.0000  0.4734 -0.1189 -0.2187 -0.0627   0.04532
+
+    for (i = 0; i < in_vector_length; i++)
+    {
+        // all-zero section (filter coefficients in Q14)
+        tmpW32 = (WebRtc_Word32)WEBRTC_SPL_MUL_16_16(kHpZeroCoefs[0], (*pi));
+        tmpW32 += (WebRtc_Word32)WEBRTC_SPL_MUL_16_16(kHpZeroCoefs[1], filter_state[0]);
+        tmpW32 += (WebRtc_Word32)WEBRTC_SPL_MUL_16_16(kHpZeroCoefs[2], filter_state[1]); // Q14
+        filter_state[1] = filter_state[0];
+        filter_state[0] = *pi++;
+
+        // all-pole section
+        tmpW32 -= (WebRtc_Word32)WEBRTC_SPL_MUL_16_16(kHpPoleCoefs[1], filter_state[2]); // Q14
+        tmpW32 -= (WebRtc_Word32)WEBRTC_SPL_MUL_16_16(kHpPoleCoefs[2], filter_state[3]);
+        filter_state[3] = filter_state[2];
+        filter_state[2] = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32 (tmpW32, 14);
+        *outPtr++ = filter_state[2];
+    }
+}
+
+void WebRtcVad_Allpass(WebRtc_Word16 *in_vector,
+                       WebRtc_Word16 *out_vector,
+                       WebRtc_Word16 filter_coefficients,
+                       int vector_length,
+                       WebRtc_Word16 *filter_state)
+{
+    // The filter can only cause overflow (in the w16 output variable)
+    // if more than 4 consecutive input numbers are of maximum value and
+    // has the the same sign as the impulse responses first taps.
+    // First 6 taps of the impulse response: 0.6399 0.5905 -0.3779
+    // 0.2418 -0.1547 0.0990
+
+    int n;
+    WebRtc_Word16 tmp16;
+    WebRtc_Word32 tmp32, in32, state32;
+
+    state32 = WEBRTC_SPL_LSHIFT_W32(((WebRtc_Word32)(*filter_state)), 16); // Q31
+
+    for (n = 0; n < vector_length; n++)
+    {
+
+        tmp32 = state32 + WEBRTC_SPL_MUL_16_16(filter_coefficients, (*in_vector));
+        tmp16 = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(tmp32, 16);
+        *out_vector++ = tmp16;
+        in32 = WEBRTC_SPL_LSHIFT_W32(((WebRtc_Word32)(*in_vector)), 14);
+        state32 = in32 - WEBRTC_SPL_MUL_16_16(filter_coefficients, tmp16);
+        state32 = WEBRTC_SPL_LSHIFT_W32(state32, 1);
+        in_vector += 2;
+    }
+
+    *filter_state = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(state32, 16);
+}
+
+void WebRtcVad_SplitFilter(WebRtc_Word16 *in_vector,
+                           WebRtc_Word16 *out_vector_hp,
+                           WebRtc_Word16 *out_vector_lp,
+                           WebRtc_Word16 *upper_state,
+                           WebRtc_Word16 *lower_state,
+                           int in_vector_length)
+{
+    WebRtc_Word16 tmpOut;
+    int k, halflen;
+
+    // Downsampling by 2 and get two branches
+    halflen = WEBRTC_SPL_RSHIFT_W16(in_vector_length, 1);
+
+    // All-pass filtering upper branch
+    WebRtcVad_Allpass(&in_vector[0], out_vector_hp, kAllPassCoefsQ15[0], halflen, upper_state);
+
+    // All-pass filtering lower branch
+    WebRtcVad_Allpass(&in_vector[1], out_vector_lp, kAllPassCoefsQ15[1], halflen, lower_state);
+
+    // Make LP and HP signals
+    for (k = 0; k < halflen; k++)
+    {
+        tmpOut = *out_vector_hp;
+        *out_vector_hp++ -= *out_vector_lp;
+        *out_vector_lp++ += tmpOut;
+    }
+}
+
+WebRtc_Word16 WebRtcVad_get_features(VadInstT *inst,
+                                     WebRtc_Word16 *in_vector,
+                                     int frame_size,
+                                     WebRtc_Word16 *out_vector)
+{
+    int curlen, filtno;
+    WebRtc_Word16 vecHP1[120], vecLP1[120];
+    WebRtc_Word16 vecHP2[60], vecLP2[60];
+    WebRtc_Word16 *ptin;
+    WebRtc_Word16 *hptout, *lptout;
+    WebRtc_Word16 power = 0;
+
+    // Split at 2000 Hz and downsample
+    filtno = 0;
+    ptin = in_vector;
+    hptout = vecHP1;
+    lptout = vecLP1;
+    curlen = frame_size;
+    WebRtcVad_SplitFilter(ptin, hptout, lptout, &inst->upper_state[filtno],
+                  &inst->lower_state[filtno], curlen);
+
+    // Split at 3000 Hz and downsample
+    filtno = 1;
+    ptin = vecHP1;
+    hptout = vecHP2;
+    lptout = vecLP2;
+    curlen = WEBRTC_SPL_RSHIFT_W16(frame_size, 1);
+
+    WebRtcVad_SplitFilter(ptin, hptout, lptout, &inst->upper_state[filtno],
+                  &inst->lower_state[filtno], curlen);
+
+    // Energy in 3000 Hz - 4000 Hz
+    curlen = WEBRTC_SPL_RSHIFT_W16(curlen, 1);
+    WebRtcVad_LogOfEnergy(vecHP2, &out_vector[5], &power, kOffsetVector[5], curlen);
+
+    // Energy in 2000 Hz - 3000 Hz
+    WebRtcVad_LogOfEnergy(vecLP2, &out_vector[4], &power, kOffsetVector[4], curlen);
+
+    // Split at 1000 Hz and downsample
+    filtno = 2;
+    ptin = vecLP1;
+    hptout = vecHP2;
+    lptout = vecLP2;
+    curlen = WEBRTC_SPL_RSHIFT_W16(frame_size, 1);
+    WebRtcVad_SplitFilter(ptin, hptout, lptout, &inst->upper_state[filtno],
+                  &inst->lower_state[filtno], curlen);
+
+    // Energy in 1000 Hz - 2000 Hz
+    curlen = WEBRTC_SPL_RSHIFT_W16(curlen, 1);
+    WebRtcVad_LogOfEnergy(vecHP2, &out_vector[3], &power, kOffsetVector[3], curlen);
+
+    // Split at 500 Hz
+    filtno = 3;
+    ptin = vecLP2;
+    hptout = vecHP1;
+    lptout = vecLP1;
+
+    WebRtcVad_SplitFilter(ptin, hptout, lptout, &inst->upper_state[filtno],
+                  &inst->lower_state[filtno], curlen);
+
+    // Energy in 500 Hz - 1000 Hz
+    curlen = WEBRTC_SPL_RSHIFT_W16(curlen, 1);
+    WebRtcVad_LogOfEnergy(vecHP1, &out_vector[2], &power, kOffsetVector[2], curlen);
+    // Split at 250 Hz
+    filtno = 4;
+    ptin = vecLP1;
+    hptout = vecHP2;
+    lptout = vecLP2;
+
+    WebRtcVad_SplitFilter(ptin, hptout, lptout, &inst->upper_state[filtno],
+                  &inst->lower_state[filtno], curlen);
+
+    // Energy in 250 Hz - 500 Hz
+    curlen = WEBRTC_SPL_RSHIFT_W16(curlen, 1);
+    WebRtcVad_LogOfEnergy(vecHP2, &out_vector[1], &power, kOffsetVector[1], curlen);
+
+    // Remove DC and LFs
+    WebRtcVad_HpOutput(vecLP2, curlen, vecHP1, inst->hp_filter_state);
+
+    // Power in 80 Hz - 250 Hz
+    WebRtcVad_LogOfEnergy(vecHP1, &out_vector[0], &power, kOffsetVector[0], curlen);
+
+    return power;
+}
+
+void WebRtcVad_LogOfEnergy(WebRtc_Word16 *vector,
+                           WebRtc_Word16 *enerlogval,
+                           WebRtc_Word16 *power,
+                           WebRtc_Word16 offset,
+                           int vector_length)
+{
+    WebRtc_Word16 enerSum = 0;
+    WebRtc_Word16 zeros, frac, log2;
+    WebRtc_Word32 energy;
+
+    int shfts = 0, shfts2;
+
+    energy = WebRtcSpl_Energy(vector, vector_length, &shfts);
+
+    if (energy > 0)
+    {
+
+        shfts2 = 16 - WebRtcSpl_NormW32(energy);
+        shfts += shfts2;
+        // "shfts" is the total number of right shifts that has been done to enerSum.
+        enerSum = (WebRtc_Word16)WEBRTC_SPL_SHIFT_W32(energy, -shfts2);
+
+        // Find:
+        // 160*log10(enerSum*2^shfts) = 160*log10(2)*log2(enerSum*2^shfts) =
+        // 160*log10(2)*(log2(enerSum) + log2(2^shfts)) =
+        // 160*log10(2)*(log2(enerSum) + shfts)
+
+        zeros = WebRtcSpl_NormU32(enerSum);
+        frac = (WebRtc_Word16)(((WebRtc_UWord32)((WebRtc_Word32)(enerSum) << zeros)
+                & 0x7FFFFFFF) >> 21);
+        log2 = (WebRtc_Word16)(((31 - zeros) << 10) + frac);
+
+        *enerlogval = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(kLogConst, log2, 19)
+                + (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(shfts, kLogConst, 9);
+
+        if (*enerlogval < 0)
+        {
+            *enerlogval = 0;
+        }
+    } else
+    {
+        *enerlogval = 0;
+        shfts = -15;
+        enerSum = 0;
+    }
+
+    *enerlogval += offset;
+
+    // Total power in frame
+    if (*power <= MIN_ENERGY)
+    {
+        if (shfts > 0)
+        {
+            *power += MIN_ENERGY + 1;
+        } else if (WEBRTC_SPL_SHIFT_W16(enerSum, shfts) > MIN_ENERGY)
+        {
+            *power += MIN_ENERGY + 1;
+        } else
+        {
+            *power += WEBRTC_SPL_SHIFT_W16(enerSum, shfts);
+        }
+    }
+}
diff --git a/src/common_audio/vad/main/source/vad_filterbank.h b/src/common_audio/vad/main/source/vad_filterbank.h
new file mode 100644
index 0000000000..a5507ead65
--- /dev/null
+++ b/src/common_audio/vad/main/source/vad_filterbank.h
@@ -0,0 +1,143 @@
+/*
+ *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/*
+ * This header file includes the description of the internal VAD call
+ * WebRtcVad_GaussianProbability.
+ */
+
+#ifndef WEBRTC_VAD_FILTERBANK_H_
+#define WEBRTC_VAD_FILTERBANK_H_
+
+#include "vad_core.h"
+
+/****************************************************************************
+ * WebRtcVad_HpOutput(...)
+ *
+ * This function removes DC from the lowest frequency band
+ *
+ * Input:
+ *      - in_vector         : Samples in the frequency interval 0 - 250 Hz
+ *      - in_vector_length  : Length of input and output vector
+ *      - filter_state      : Current state of the filter
+ *
+ * Output:
+ *      - out_vector        : Samples in the frequency interval 80 - 250 Hz
+ *      - filter_state      : Updated state of the filter
+ *
+ */
+void WebRtcVad_HpOutput(WebRtc_Word16* in_vector,
+                        WebRtc_Word16  in_vector_length,
+                        WebRtc_Word16* out_vector,
+                        WebRtc_Word16* filter_state);
+
+/****************************************************************************
+ * WebRtcVad_Allpass(...)
+ *
+ * This function is used when before splitting a speech file into 
+ * different frequency bands
+ *
+ * Note! Do NOT let the arrays in_vector and out_vector correspond to the same address.
+ *
+ * Input:
+ *      - in_vector             : (Q0)
+ *      - filter_coefficients   : (Q15)
+ *      - vector_length         : Length of input and output vector
+ *      - filter_state          : Current state of the filter (Q(-1))
+ *
+ * Output:
+ *      - out_vector            : Output speech signal (Q(-1))
+ *      - filter_state          : Updated state of the filter (Q(-1))
+ *
+ */
+void WebRtcVad_Allpass(WebRtc_Word16* in_vector,
+                       WebRtc_Word16* outw16,
+                       WebRtc_Word16 filter_coefficients,
+                       int vector_length,
+                       WebRtc_Word16* filter_state);
+
+/****************************************************************************
+ * WebRtcVad_SplitFilter(...)
+ *
+ * This function is used when before splitting a speech file into 
+ * different frequency bands
+ *
+ * Input:
+ *      - in_vector         : Input signal to be split into two frequency bands.
+ *      - upper_state       : Current state of the upper filter
+ *      - lower_state       : Current state of the lower filter
+ *      - in_vector_length  : Length of input vector
+ *
+ * Output:
+ *      - out_vector_hp     : Upper half of the spectrum
+ *      - out_vector_lp     : Lower half of the spectrum
+ *      - upper_state       : Updated state of the upper filter
+ *      - lower_state       : Updated state of the lower filter
+ *
+ */
+void WebRtcVad_SplitFilter(WebRtc_Word16* in_vector,
+                           WebRtc_Word16* out_vector_hp,
+                           WebRtc_Word16* out_vector_lp,
+                           WebRtc_Word16* upper_state,
+                           WebRtc_Word16* lower_state,
+                           int in_vector_length);
+
+/****************************************************************************
+ * WebRtcVad_get_features(...)
+ *
+ * This function is used to get the logarithm of the power of each of the 
+ * 6 frequency bands used by the VAD:
+ *        80 Hz - 250 Hz
+ *        250 Hz - 500 Hz
+ *        500 Hz - 1000 Hz
+ *        1000 Hz - 2000 Hz
+ *        2000 Hz - 3000 Hz
+ *        3000 Hz - 4000 Hz 
+ *
+ * Input:
+ *      - inst        : Pointer to VAD instance
+ *      - in_vector   : Input speech signal
+ *      - frame_size  : Frame size, in number of samples
+ *
+ * Output:
+ *      - out_vector  : 10*log10(power in each freq. band), Q4
+ *    
+ * Return: total power in the signal (NOTE! This value is not exact since it
+ *         is only used in a comparison.
+ */
+WebRtc_Word16 WebRtcVad_get_features(VadInstT* inst,
+                                     WebRtc_Word16* in_vector,
+                                     int frame_size,
+                                     WebRtc_Word16* out_vector);
+
+/****************************************************************************
+ * WebRtcVad_LogOfEnergy(...)
+ *
+ * This function is used to get the logarithm of the power of one frequency band.
+ *
+ * Input:
+ *      - vector            : Input speech samples for one frequency band
+ *      - offset            : Offset value for the current frequency band
+ *      - vector_length     : Length of input vector
+ *
+ * Output:
+ *      - enerlogval        : 10*log10(energy);
+ *      - power             : Update total power in speech frame. NOTE! This value
+ *                            is not exact since it is only used in a comparison.
+ *     
+ */
+void WebRtcVad_LogOfEnergy(WebRtc_Word16* vector,
+                           WebRtc_Word16* enerlogval,
+                           WebRtc_Word16* power,
+                           WebRtc_Word16 offset,
+                           int vector_length);
+
+#endif // WEBRTC_VAD_FILTERBANK_H_
diff --git a/src/common_audio/vad/main/source/vad_gmm.c b/src/common_audio/vad/main/source/vad_gmm.c
new file mode 100644
index 0000000000..23d12fb335
--- /dev/null
+++ b/src/common_audio/vad/main/source/vad_gmm.c
@@ -0,0 +1,70 @@
+/*
+ *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/*
+ * This file includes the implementation of the internal VAD call
+ * WebRtcVad_GaussianProbability. For function description, see vad_gmm.h.
+ */
+
+#include "vad_gmm.h"
+#include "signal_processing_library.h"
+#include "vad_const.h"
+
+WebRtc_Word32 WebRtcVad_GaussianProbability(WebRtc_Word16 in_sample,
+                                            WebRtc_Word16 mean,
+                                            WebRtc_Word16 std,
+                                            WebRtc_Word16 *delta)
+{
+    WebRtc_Word16 tmp16, tmpDiv, tmpDiv2, expVal, tmp16_1, tmp16_2;
+    WebRtc_Word32 tmp32, y32;
+
+    // Calculate tmpDiv=1/std, in Q10
+    tmp32 = (WebRtc_Word32)WEBRTC_SPL_RSHIFT_W16(std,1) + (WebRtc_Word32)131072; // 1 in Q17
+    tmpDiv = (WebRtc_Word16)WebRtcSpl_DivW32W16(tmp32, std); // Q17/Q7 = Q10
+
+    // Calculate tmpDiv2=1/std^2, in Q14
+    tmp16 = WEBRTC_SPL_RSHIFT_W16(tmpDiv, 2); // From Q10 to Q8
+    tmpDiv2 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(tmp16, tmp16, 2); // (Q8 * Q8)>>2 = Q14
+
+    tmp16 = WEBRTC_SPL_LSHIFT_W16(in_sample, 3); // Q7
+    tmp16 = tmp16 - mean; // Q7 - Q7 = Q7
+
+    // To be used later, when updating noise/speech model
+    // delta = (x-m)/std^2, in Q11
+    *delta = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(tmpDiv2, tmp16, 10); //(Q14*Q7)>>10 = Q11
+
+    // Calculate tmp32=(x-m)^2/(2*std^2), in Q10
+    tmp32 = (WebRtc_Word32)WEBRTC_SPL_MUL_16_16_RSFT(*delta, tmp16, 9); // One shift for /2
+
+    // Calculate expVal ~= exp(-(x-m)^2/(2*std^2)) ~= exp2(-log2(exp(1))*tmp32)
+    if (tmp32 < kCompVar)
+    {
+        // Calculate tmp16 = log2(exp(1))*tmp32 , in Q10
+        tmp16 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT((WebRtc_Word16)tmp32,
+                                                         kLog10Const, 12);
+        tmp16 = -tmp16;
+        tmp16_2 = (WebRtc_Word16)(0x0400 | (tmp16 & 0x03FF));
+        tmp16_1 = (WebRtc_Word16)(tmp16 ^ 0xFFFF);
+        tmp16 = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W16(tmp16_1, 10);
+        tmp16 += 1;
+        // Calculate expVal=log2(-tmp32), in Q10
+        expVal = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32((WebRtc_Word32)tmp16_2, tmp16);
+
+    } else
+    {
+        expVal = 0;
+    }
+
+    // Calculate y32=(1/std)*exp(-(x-m)^2/(2*std^2)), in Q20
+    y32 = WEBRTC_SPL_MUL_16_16(tmpDiv, expVal); // Q10 * Q10 = Q20
+
+    return y32; // Q20
+}
diff --git a/src/common_audio/vad/main/source/vad_gmm.h b/src/common_audio/vad/main/source/vad_gmm.h
new file mode 100644
index 0000000000..e0747fb7e5
--- /dev/null
+++ b/src/common_audio/vad/main/source/vad_gmm.h
@@ -0,0 +1,47 @@
+/*
+ *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/*
+ * This header file includes the description of the internal VAD call
+ * WebRtcVad_GaussianProbability.
+ */
+
+#ifndef WEBRTC_VAD_GMM_H_
+#define WEBRTC_VAD_GMM_H_
+
+#include "typedefs.h"
+
+/****************************************************************************
+ * WebRtcVad_GaussianProbability(...)
+ *
+ * This function calculates the probability for the value 'in_sample', given that in_sample
+ * comes from a normal distribution with mean 'mean' and standard deviation 'std'.
+ *
+ * Input:
+ *      - in_sample     : Input sample in Q4
+ *      - mean          : mean value in the statistical model, Q7
+ *      - std           : standard deviation, Q7
+ *
+ * Output:
+ *
+ *      - delta         : Value used when updating the model, Q11
+ *
+ * Return:
+ *      - out           : out = 1/std * exp(-(x-m)^2/(2*std^2));
+ *                        Probability for x.
+ *
+ */
+WebRtc_Word32 WebRtcVad_GaussianProbability(WebRtc_Word16 in_sample,
+                                            WebRtc_Word16 mean,
+                                            WebRtc_Word16 std,
+                                            WebRtc_Word16 *delta);
+
+#endif // WEBRTC_VAD_GMM_H_
diff --git a/src/common_audio/vad/main/source/vad_sp.c b/src/common_audio/vad/main/source/vad_sp.c
new file mode 100644
index 0000000000..f347ab5904
--- /dev/null
+++ b/src/common_audio/vad/main/source/vad_sp.c
@@ -0,0 +1,231 @@
+/*
+ *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/*
+ * This file includes the implementation of the VAD internal calls for Downsampling and
+ * FindMinimum.
+ * For function call descriptions; See vad_sp.h.
+ */
+
+#include "vad_sp.h"
+#include "vad_defines.h"
+#include "vad_const.h"
+#include "signal_processing_library.h"
+
+// Downsampling filter based on the splitting filter and the allpass functions
+// in vad_filterbank.c
+void WebRtcVad_Downsampling(WebRtc_Word16* signal_in,
+                            WebRtc_Word16* signal_out,
+                            WebRtc_Word32* filter_state,
+                            int inlen)
+{
+    WebRtc_Word16 tmp16_1, tmp16_2;
+    WebRtc_Word32 tmp32_1, tmp32_2;
+    int n, halflen;
+
+    // Downsampling by 2 and get two branches
+    halflen = WEBRTC_SPL_RSHIFT_W16(inlen, 1);
+
+    tmp32_1 = filter_state[0];
+    tmp32_2 = filter_state[1];
+
+    // Filter coefficients in Q13, filter state in Q0
+    for (n = 0; n < halflen; n++)
+    {
+        // All-pass filtering upper branch
+        tmp16_1 = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(tmp32_1, 1)
+                + (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT((kAllPassCoefsQ13[0]),
+                                                           *signal_in, 14);
+        *signal_out = tmp16_1;
+        tmp32_1 = (WebRtc_Word32)(*signal_in++)
+                - (WebRtc_Word32)WEBRTC_SPL_MUL_16_16_RSFT((kAllPassCoefsQ13[0]), tmp16_1, 12);
+
+        // All-pass filtering lower branch
+        tmp16_2 = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(tmp32_2, 1)
+                + (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT((kAllPassCoefsQ13[1]),
+                                                           *signal_in, 14);
+        *signal_out++ += tmp16_2;
+        tmp32_2 = (WebRtc_Word32)(*signal_in++)
+                - (WebRtc_Word32)WEBRTC_SPL_MUL_16_16_RSFT((kAllPassCoefsQ13[1]), tmp16_2, 12);
+    }
+    filter_state[0] = tmp32_1;
+    filter_state[1] = tmp32_2;
+}
+
+WebRtc_Word16 WebRtcVad_FindMinimum(VadInstT* inst,
+                                    WebRtc_Word16 x,
+                                    int n)
+{
+    int i, j, k, II = -1, offset;
+    WebRtc_Word16 meanV, alpha;
+    WebRtc_Word32 tmp32, tmp32_1;
+    WebRtc_Word16 *valptr, *idxptr, *p1, *p2, *p3;
+
+    // Offset to beginning of the 16 minimum values in memory
+    offset = WEBRTC_SPL_LSHIFT_W16(n, 4);
+
+    // Pointer to memory for the 16 minimum values and the age of each value
+    idxptr = &inst->index_vector[offset];
+    valptr = &inst->low_value_vector[offset];
+
+    // Each value in low_value_vector is getting 1 loop older.
+    // Update age of each value in indexVal, and remove old values.
+    for (i = 0; i < 16; i++)
+    {
+        p3 = idxptr + i;
+        if (*p3 != 100)
+        {
+            *p3 += 1;
+        } else
+        {
+            p1 = valptr + i + 1;
+            p2 = p3 + 1;
+            for (j = i; j < 16; j++)
+            {
+                *(valptr + j) = *p1++;
+                *(idxptr + j) = *p2++;
+            }
+            *(idxptr + 15) = 101;
+            *(valptr + 15) = 10000;
+        }
+    }
+
+    // Check if x smaller than any of the values in low_value_vector.
+    // If so, find position.
+    if (x < *(valptr + 7))
+    {
+        if (x < *(valptr + 3))
+        {
+            if (x < *(valptr + 1))
+            {
+                if (x < *valptr)
+                {
+                    II = 0;
+                } else
+                {
+                    II = 1;
+                }
+            } else if (x < *(valptr + 2))
+            {
+                II = 2;
+            } else
+            {
+                II = 3;
+            }
+        } else if (x < *(valptr + 5))
+        {
+            if (x < *(valptr + 4))
+            {
+                II = 4;
+            } else
+            {
+                II = 5;
+            }
+        } else if (x < *(valptr + 6))
+        {
+            II = 6;
+        } else
+        {
+            II = 7;
+        }
+    } else if (x < *(valptr + 15))
+    {
+        if (x < *(valptr + 11))
+        {
+            if (x < *(valptr + 9))
+            {
+                if (x < *(valptr + 8))
+                {
+                    II = 8;
+                } else
+                {
+                    II = 9;
+                }
+            } else if (x < *(valptr + 10))
+            {
+                II = 10;
+            } else
+            {
+                II = 11;
+            }
+        } else if (x < *(valptr + 13))
+        {
+            if (x < *(valptr + 12))
+            {
+                II = 12;
+            } else
+            {
+                II = 13;
+            }
+        } else if (x < *(valptr + 14))
+        {
+            II = 14;
+        } else
+        {
+            II = 15;
+        }
+    }
+
+    // Put new min value on right position and shift bigger values up
+    if (II > -1)
+    {
+        for (i = 15; i > II; i--)
+        {
+            k = i - 1;
+            *(valptr + i) = *(valptr + k);
+            *(idxptr + i) = *(idxptr + k);
+        }
+        *(valptr + II) = x;
+        *(idxptr + II) = 1;
+    }
+
+    meanV = 0;
+    if ((inst->frame_counter) > 4)
+    {
+        j = 5;
+    } else
+    {
+        j = inst->frame_counter;
+    }
+
+    if (j > 2)
+    {
+        meanV = *(valptr + 2);
+    } else if (j > 0)
+    {
+        meanV = *valptr;
+    } else
+    {
+        meanV = 1600;
+    }
+
+    if (inst->frame_counter > 0)
+    {
+        if (meanV < inst->mean_value[n])
+        {
+            alpha = (WebRtc_Word16)ALPHA1; // 0.2 in Q15
+        } else
+        {
+            alpha = (WebRtc_Word16)ALPHA2; // 0.99 in Q15
+        }
+    } else
+    {
+        alpha = 0;
+    }
+
+    tmp32 = WEBRTC_SPL_MUL_16_16((alpha+1), inst->mean_value[n]);
+    tmp32_1 = WEBRTC_SPL_MUL_16_16(WEBRTC_SPL_WORD16_MAX - alpha, meanV);
+    tmp32 += tmp32_1;
+    tmp32 += 16384;
+    inst->mean_value[n] = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(tmp32, 15);
+
+    return inst->mean_value[n];
+}
diff --git a/src/common_audio/vad/main/source/vad_sp.h b/src/common_audio/vad/main/source/vad_sp.h
new file mode 100644
index 0000000000..ae15c11ad6
--- /dev/null
+++ b/src/common_audio/vad/main/source/vad_sp.h
@@ -0,0 +1,60 @@
+/*
+ *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/*
+ * This header file includes the VAD internal calls for Downsampling and FindMinimum.
+ * Specific function calls are given below.
+ */
+
+#ifndef WEBRTC_VAD_SP_H_
+#define WEBRTC_VAD_SP_H_
+
+#include "vad_core.h"
+
+/****************************************************************************
+ * WebRtcVad_Downsampling(...)
+ *
+ * Downsamples the signal a factor 2, eg. 32->16 or 16->8
+ *
+ * Input:
+ *      - signal_in     : Input signal
+ *      - in_length     : Length of input signal in samples
+ *
+ * Input & Output:
+ *      - filter_state  : Filter state for first all-pass filters
+ *
+ * Output:
+ *      - signal_out    : Downsampled signal (of length len/2)
+ */
+void WebRtcVad_Downsampling(WebRtc_Word16* signal_in,
+                            WebRtc_Word16* signal_out,
+                            WebRtc_Word32* filter_state,
+                            int in_length);
+
+/****************************************************************************
+ * WebRtcVad_FindMinimum(...)
+ *
+ * Find the five lowest values of x in 100 frames long window. Return a mean
+ * value of these five values.
+ *
+ * Input:
+ *      - feature_value : Feature value
+ *      - channel       : Channel number
+ *
+ * Input & Output:
+ *      - inst          : State information
+ *
+ * Output:
+ *      return value    : Weighted minimum value for a moving window.
+ */
+WebRtc_Word16 WebRtcVad_FindMinimum(VadInstT* inst, WebRtc_Word16 feature_value, int channel);
+
+#endif // WEBRTC_VAD_SP_H_
diff --git a/src/common_audio/vad/main/source/webrtc_vad.c b/src/common_audio/vad/main/source/webrtc_vad.c
new file mode 100644
index 0000000000..dcfbda1128
--- /dev/null
+++ b/src/common_audio/vad/main/source/webrtc_vad.c
@@ -0,0 +1,197 @@
+/*
+ *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/*
+ * This file includes the VAD API calls. For a specific function call description,
+ * see webrtc_vad.h
+ */
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "webrtc_vad.h"
+#include "vad_core.h"
+
+static const int kInitCheck = 42;
+
+WebRtc_Word16 WebRtcVad_get_version(char *version, size_t size_bytes)
+{
+    const char my_version[] = "VAD 1.2.0";
+
+    if (version == NULL)
+    {
+        return -1;
+    }
+
+    if (size_bytes < sizeof(my_version))
+    {
+        return -1;
+    }
+
+    memcpy(version, my_version, sizeof(my_version));
+    return 0;
+}
+
+WebRtc_Word16 WebRtcVad_AssignSize(int *size_in_bytes)
+{
+    *size_in_bytes = sizeof(VadInstT) * 2 / sizeof(WebRtc_Word16);
+    return 0;
+}
+
+WebRtc_Word16 WebRtcVad_Assign(VadInst **vad_inst, void *vad_inst_addr)
+{
+
+    if (vad_inst == NULL)
+    {
+        return -1;
+    }
+
+    if (vad_inst_addr != NULL)
+    {
+        *vad_inst = (VadInst*)vad_inst_addr;
+        return 0;
+    } else
+    {
+        return -1;
+    }
+}
+
+WebRtc_Word16 WebRtcVad_Create(VadInst **vad_inst)
+{
+
+    VadInstT *vad_ptr = NULL;
+
+    if (vad_inst == NULL)
+    {
+        return -1;
+    }
+
+    *vad_inst = NULL;
+
+    vad_ptr = (VadInstT *)malloc(sizeof(VadInstT));
+    *vad_inst = (VadInst *)vad_ptr;
+
+    if (vad_ptr == NULL)
+    {
+        return -1;
+    }
+
+    vad_ptr->init_flag = 0;
+
+    return 0;
+}
+
+WebRtc_Word16 WebRtcVad_Free(VadInst *vad_inst)
+{
+
+    if (vad_inst == NULL)
+    {
+        return -1;
+    }
+
+    free(vad_inst);
+    return 0;
+}
+
+WebRtc_Word16 WebRtcVad_Init(VadInst *vad_inst)
+{
+    short mode = 0; // Default high quality
+
+    if (vad_inst == NULL)
+    {
+        return -1;
+    }
+
+    return WebRtcVad_InitCore((VadInstT*)vad_inst, mode);
+}
+
+WebRtc_Word16 WebRtcVad_set_mode(VadInst *vad_inst, WebRtc_Word16 mode)
+{
+    VadInstT* vad_ptr;
+
+    if (vad_inst == NULL)
+    {
+        return -1;
+    }
+
+    vad_ptr = (VadInstT*)vad_inst;
+    if (vad_ptr->init_flag != kInitCheck)
+    {
+        return -1;
+    }
+
+    return WebRtcVad_set_mode_core((VadInstT*)vad_inst, mode);
+}
+
+WebRtc_Word16 WebRtcVad_Process(VadInst *vad_inst,
+                                WebRtc_Word16 fs,
+                                WebRtc_Word16 *speech_frame,
+                                WebRtc_Word16 frame_length)
+{
+    WebRtc_Word16 vad;
+    VadInstT* vad_ptr;
+
+    if (vad_inst == NULL)
+    {
+        return -1;
+    }
+
+    vad_ptr = (VadInstT*)vad_inst;
+    if (vad_ptr->init_flag != kInitCheck)
+    {
+        return -1;
+    }
+
+    if (speech_frame == NULL)
+    {
+        return -1;
+    }
+
+    if (fs == 32000)
+    {
+        if ((frame_length != 320) && (frame_length != 640) && (frame_length != 960))
+        {
+            return -1;
+        }
+        vad = WebRtcVad_CalcVad32khz((VadInstT*)vad_inst, speech_frame, frame_length);
+
+    } else if (fs == 16000)
+    {
+        if ((frame_length != 160) && (frame_length != 320) && (frame_length != 480))
+        {
+            return -1;
+        }
+        vad = WebRtcVad_CalcVad16khz((VadInstT*)vad_inst, speech_frame, frame_length);
+
+    } else if (fs == 8000)
+    {
+        if ((frame_length != 80) && (frame_length != 160) && (frame_length != 240))
+        {
+            return -1;
+        }
+        vad = WebRtcVad_CalcVad8khz((VadInstT*)vad_inst, speech_frame, frame_length);
+
+    } else
+    {
+        return -1; // Not a supported sampling frequency
+    }
+
+    if (vad > 0)
+    {
+        return 1;
+    } else if (vad == 0)
+    {
+        return 0;
+    } else
+    {
+        return -1;
+    }
+}
diff --git a/src/common_audio/vad/main/test/unit_test/unit_test.cc b/src/common_audio/vad/main/test/unit_test/unit_test.cc
new file mode 100644
index 0000000000..8ac793e44e
--- /dev/null
+++ b/src/common_audio/vad/main/test/unit_test/unit_test.cc
@@ -0,0 +1,123 @@
+/*
+ *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/*
+ * This file includes the implementation of the VAD unit tests.
+ */
+
+#include <cstring>
+#include "unit_test.h"
+#include "webrtc_vad.h"
+
+
+class VadEnvironment : public ::testing::Environment {
+ public:
+  virtual void SetUp() {
+  }
+
+  virtual void TearDown() {
+  }
+};
+
+VadTest::VadTest()
+{
+}
+
+void VadTest::SetUp() {
+}
+
+void VadTest::TearDown() {
+}
+
+TEST_F(VadTest, ApiTest) {
+    VadInst *vad_inst;
+    int i, j, k;
+    short zeros[960];
+    short speech[960];
+    char version[32];
+
+    // Valid test cases
+    int fs[3] = {8000, 16000, 32000};
+    int nMode[4] = {0, 1, 2, 3};
+    int framelen[3][3] = {{80, 160, 240},
+    {160, 320, 480}, {320, 640, 960}} ;
+    int vad_counter = 0;
+
+    memset(zeros, 0, sizeof(short) * 960);
+    memset(speech, 1, sizeof(short) * 960);
+    speech[13] = 1374;
+    speech[73] = -3747;
+
+
+
+    // WebRtcVad_get_version()
+    WebRtcVad_get_version(version);
+    //printf("API Test for %s\n", version);
+
+    // Null instance tests
+    EXPECT_EQ(-1, WebRtcVad_Create(NULL));
+    EXPECT_EQ(-1, WebRtcVad_Init(NULL));
+    EXPECT_EQ(-1, WebRtcVad_Assign(NULL, NULL));
+    EXPECT_EQ(-1, WebRtcVad_Free(NULL));
+    EXPECT_EQ(-1, WebRtcVad_set_mode(NULL, nMode[0]));
+    EXPECT_EQ(-1, WebRtcVad_Process(NULL, fs[0], speech,  framelen[0][0]));
+
+
+    EXPECT_EQ(WebRtcVad_Create(&vad_inst), 0);
+
+    // Not initialized tests
+    EXPECT_EQ(-1, WebRtcVad_Process(vad_inst, fs[0], speech,  framelen[0][0]));
+    EXPECT_EQ(-1, WebRtcVad_set_mode(vad_inst, nMode[0]));
+
+    // WebRtcVad_Init() tests
+    EXPECT_EQ(WebRtcVad_Init(vad_inst), 0);
+
+    // WebRtcVad_set_mode() tests
+    EXPECT_EQ(-1, WebRtcVad_set_mode(vad_inst, -1));
+    EXPECT_EQ(-1, WebRtcVad_set_mode(vad_inst, 4));
+
+    for (i = 0; i < sizeof(nMode)/sizeof(nMode[0]); i++) {
+        EXPECT_EQ(WebRtcVad_set_mode(vad_inst, nMode[i]), 0);
+    }
+
+    // WebRtcVad_Process() tests
+    EXPECT_EQ(-1, WebRtcVad_Process(vad_inst, fs[0], NULL,  framelen[0][0]));
+    EXPECT_EQ(-1, WebRtcVad_Process(vad_inst, 12000, speech,  framelen[0][0]));
+    EXPECT_EQ(-1, WebRtcVad_Process(vad_inst, fs[0], speech,  framelen[1][1]));
+    EXPECT_EQ(WebRtcVad_Process(vad_inst, fs[0], zeros,  framelen[0][0]), 0);
+    for (i = 0; i < sizeof(fs)/sizeof(fs[0]); i++) {
+        for (j = 0; j < sizeof(framelen[0])/sizeof(framelen[0][0]); j++) {
+            for (k = 0; k < sizeof(nMode)/sizeof(nMode[0]); k++) {
+                EXPECT_EQ(WebRtcVad_set_mode(vad_inst, nMode[k]), 0);
+//                printf("%d\n", WebRtcVad_Process(vad_inst, fs[i], speech,  framelen[i][j]));
+                if (vad_counter < 9)
+                {
+                    EXPECT_EQ(WebRtcVad_Process(vad_inst, fs[i], speech,  framelen[i][j]), 1);
+                } else
+                {
+                    EXPECT_EQ(WebRtcVad_Process(vad_inst, fs[i], speech,  framelen[i][j]), 0);
+                }
+                vad_counter++;
+            }
+        }
+    }
+
+    EXPECT_EQ(0, WebRtcVad_Free(vad_inst));
+
+}
+
+int main(int argc, char** argv) {
+  ::testing::InitGoogleTest(&argc, argv);
+  VadEnvironment* env = new VadEnvironment;
+  ::testing::AddGlobalTestEnvironment(env);
+
+  return RUN_ALL_TESTS();
+}
diff --git a/src/common_audio/vad/main/test/unit_test/unit_test.h b/src/common_audio/vad/main/test/unit_test/unit_test.h
new file mode 100644
index 0000000000..62dac11de4
--- /dev/null
+++ b/src/common_audio/vad/main/test/unit_test/unit_test.h
@@ -0,0 +1,28 @@
+/*
+ *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/*
+ * This header file includes the declaration of the VAD unit test.
+ */
+
+#ifndef WEBRTC_VAD_UNIT_TEST_H_
+#define WEBRTC_VAD_UNIT_TEST_H_
+
+#include <gtest/gtest.h>
+
+class VadTest : public ::testing::Test {
+ protected:
+  VadTest();
+  virtual void SetUp();
+  virtual void TearDown();
+};
+
+#endif  // WEBRTC_VAD_UNIT_TEST_H_