aboutsummaryrefslogtreecommitdiff
path: root/src/common_audio/vad/main/source
diff options
context:
space:
mode:
Diffstat (limited to 'src/common_audio/vad/main/source')
-rw-r--r--src/common_audio/vad/main/source/Android.mk64
-rw-r--r--src/common_audio/vad/main/source/vad.gyp51
-rw-r--r--src/common_audio/vad/main/source/vad_const.c80
-rw-r--r--src/common_audio/vad/main/source/vad_const.h59
-rw-r--r--src/common_audio/vad/main/source/vad_core.c685
-rw-r--r--src/common_audio/vad/main/source/vad_core.h132
-rw-r--r--src/common_audio/vad/main/source/vad_defines.h95
-rw-r--r--src/common_audio/vad/main/source/vad_filterbank.c267
-rw-r--r--src/common_audio/vad/main/source/vad_filterbank.h143
-rw-r--r--src/common_audio/vad/main/source/vad_gmm.c70
-rw-r--r--src/common_audio/vad/main/source/vad_gmm.h47
-rw-r--r--src/common_audio/vad/main/source/vad_sp.c231
-rw-r--r--src/common_audio/vad/main/source/vad_sp.h60
-rw-r--r--src/common_audio/vad/main/source/webrtc_vad.c197
14 files changed, 0 insertions, 2181 deletions
diff --git a/src/common_audio/vad/main/source/Android.mk b/src/common_audio/vad/main/source/Android.mk
deleted file mode 100644
index f52df935d1..0000000000
--- a/src/common_audio/vad/main/source/Android.mk
+++ /dev/null
@@ -1,64 +0,0 @@
-# Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
-#
-# Use of this source code is governed by a BSD-style license
-# that can be found in the LICENSE file in the root of the source
-# tree. An additional intellectual property rights grant can be found
-# in the file PATENTS. All contributing project authors may
-# be found in the AUTHORS file in the root of the source tree.
-
-LOCAL_PATH := $(call my-dir)
-
-include $(CLEAR_VARS)
-
-LOCAL_ARM_MODE := arm
-LOCAL_MODULE_CLASS := STATIC_LIBRARIES
-LOCAL_MODULE := libwebrtc_vad
-LOCAL_MODULE_TAGS := optional
-LOCAL_GENERATED_SOURCES :=
-LOCAL_SRC_FILES := webrtc_vad.c \
- vad_const.c \
- vad_core.c \
- vad_filterbank.c \
- vad_gmm.c \
- vad_sp.c
-
-# Flags passed to both C and C++ files.
-MY_CFLAGS :=
-MY_CFLAGS_C :=
-MY_DEFS := '-DNO_TCMALLOC' \
- '-DNO_HEAPCHECKER' \
- '-DWEBRTC_TARGET_PC' \
- '-DWEBRTC_LINUX'
-ifeq ($(TARGET_ARCH),arm)
-MY_DEFS += \
- '-DWEBRTC_ANDROID' \
- '-DANDROID'
-endif
-LOCAL_CFLAGS := $(MY_CFLAGS_C) $(MY_CFLAGS) $(MY_DEFS)
-
-# Include paths placed before CFLAGS/CPPFLAGS
-LOCAL_C_INCLUDES := $(LOCAL_PATH)/../../../.. \
- $(LOCAL_PATH)/../interface \
- $(LOCAL_PATH)/../../../signal_processing_library/main/interface
-
-# Flags passed to only C++ (and not C) files.
-LOCAL_CPPFLAGS :=
-
-LOCAL_LDFLAGS :=
-
-LOCAL_STATIC_LIBRARIES :=
-
-LOCAL_SHARED_LIBRARIES := libdl \
- libstlport
-LOCAL_ADDITIONAL_DEPENDENCIES :=
-
-ifeq ($(TARGET_OS)-$(TARGET_SIMULATOR),linux-true)
-LOCAL_LDLIBS += -ldl -lpthread
-endif
-
-ifneq ($(TARGET_SIMULATOR),true)
-LOCAL_SHARED_LIBRARIES += libdl
-endif
-
-include external/stlport/libstlport.mk
-include $(BUILD_STATIC_LIBRARY)
diff --git a/src/common_audio/vad/main/source/vad.gyp b/src/common_audio/vad/main/source/vad.gyp
deleted file mode 100644
index 754b684d5b..0000000000
--- a/src/common_audio/vad/main/source/vad.gyp
+++ /dev/null
@@ -1,51 +0,0 @@
-# Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
-#
-# Use of this source code is governed by a BSD-style license
-# that can be found in the LICENSE file in the root of the source
-# tree. An additional intellectual property rights grant can be found
-# in the file PATENTS. All contributing project authors may
-# be found in the AUTHORS file in the root of the source tree.
-
-{
- 'includes': [
- '../../../../common_settings.gypi', # Common settings
- ],
- 'targets': [
- {
- 'target_name': 'vad',
- 'type': '<(library)',
- 'dependencies': [
- '../../../signal_processing_library/main/source/spl.gyp:spl',
- ],
- 'include_dirs': [
- '../interface',
- ],
- 'direct_dependent_settings': {
- 'include_dirs': [
- '../interface',
- ],
- },
- 'sources': [
- '../interface/webrtc_vad.h',
- 'webrtc_vad.c',
- 'vad_const.c',
- 'vad_const.h',
- 'vad_defines.h',
- 'vad_core.c',
- 'vad_core.h',
- 'vad_filterbank.c',
- 'vad_filterbank.h',
- 'vad_gmm.c',
- 'vad_gmm.h',
- 'vad_sp.c',
- 'vad_sp.h',
- ],
- },
- ],
-}
-
-# Local Variables:
-# tab-width:2
-# indent-tabs-mode:nil
-# End:
-# vim: set expandtab tabstop=2 shiftwidth=2:
diff --git a/src/common_audio/vad/main/source/vad_const.c b/src/common_audio/vad/main/source/vad_const.c
deleted file mode 100644
index 47b6a4b8ca..0000000000
--- a/src/common_audio/vad/main/source/vad_const.c
+++ /dev/null
@@ -1,80 +0,0 @@
-/*
- * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-/*
- * This file includes the constant values used internally in VAD.
- */
-
-#include "vad_const.h"
-
-// Spectrum Weighting
-const WebRtc_Word16 kSpectrumWeight[6] = {6, 8, 10, 12, 14, 16};
-
-const WebRtc_Word16 kCompVar = 22005;
-
-// Constant 160*log10(2) in Q9
-const WebRtc_Word16 kLogConst = 24660;
-
-// Constant log2(exp(1)) in Q12
-const WebRtc_Word16 kLog10Const = 5909;
-
-// Q15
-const WebRtc_Word16 kNoiseUpdateConst = 655;
-const WebRtc_Word16 kSpeechUpdateConst = 6554;
-
-// Q8
-const WebRtc_Word16 kBackEta = 154;
-
-// Coefficients used by WebRtcVad_HpOutput, Q14
-const WebRtc_Word16 kHpZeroCoefs[3] = {6631, -13262, 6631};
-const WebRtc_Word16 kHpPoleCoefs[3] = {16384, -7756, 5620};
-
-// Allpass filter coefficients, upper and lower, in Q15
-// Upper: 0.64, Lower: 0.17
-const WebRtc_Word16 kAllPassCoefsQ15[2] = {20972, 5571};
-const WebRtc_Word16 kAllPassCoefsQ13[2] = {5243, 1392}; // Q13
-
-// Minimum difference between the two models, Q5
-const WebRtc_Word16 kMinimumDifference[6] = {544, 544, 576, 576, 576, 576};
-
-// Upper limit of mean value for speech model, Q7
-const WebRtc_Word16 kMaximumSpeech[6] = {11392, 11392, 11520, 11520, 11520, 11520};
-
-// Minimum value for mean value
-const WebRtc_Word16 kMinimumMean[2] = {640, 768};
-
-// Upper limit of mean value for noise model, Q7
-const WebRtc_Word16 kMaximumNoise[6] = {9216, 9088, 8960, 8832, 8704, 8576};
-
-// Adjustment for division with two in WebRtcVad_SplitFilter
-const WebRtc_Word16 kOffsetVector[6] = {368, 368, 272, 176, 176, 176};
-
-// Start values for the Gaussian models, Q7
-// Weights for the two Gaussians for the six channels (noise)
-const WebRtc_Word16 kNoiseDataWeights[12] = {34, 62, 72, 66, 53, 25, 94, 66, 56, 62, 75, 103};
-
-// Weights for the two Gaussians for the six channels (speech)
-const WebRtc_Word16 kSpeechDataWeights[12] = {48, 82, 45, 87, 50, 47, 80, 46, 83, 41, 78, 81};
-
-// Means for the two Gaussians for the six channels (noise)
-const WebRtc_Word16 kNoiseDataMeans[12] = {6738, 4892, 7065, 6715, 6771, 3369, 7646, 3863,
- 7820, 7266, 5020, 4362};
-
-// Means for the two Gaussians for the six channels (speech)
-const WebRtc_Word16 kSpeechDataMeans[12] = {8306, 10085, 10078, 11823, 11843, 6309, 9473,
- 9571, 10879, 7581, 8180, 7483};
-
-// Stds for the two Gaussians for the six channels (noise)
-const WebRtc_Word16 kNoiseDataStds[12] = {378, 1064, 493, 582, 688, 593, 474, 697, 475, 688,
- 421, 455};
-
-// Stds for the two Gaussians for the six channels (speech)
-const WebRtc_Word16 kSpeechDataStds[12] = {555, 505, 567, 524, 585, 1231, 509, 828, 492, 1540,
- 1079, 850};
diff --git a/src/common_audio/vad/main/source/vad_const.h b/src/common_audio/vad/main/source/vad_const.h
deleted file mode 100644
index 89804379be..0000000000
--- a/src/common_audio/vad/main/source/vad_const.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-
-/*
- * This header file includes the declarations of the internally used constants.
- */
-
-#ifndef WEBRTC_VAD_CONST_H_
-#define WEBRTC_VAD_CONST_H_
-
-#include "typedefs.h"
-
-// TODO(ajm): give these internal-linkage by moving to the appropriate file
-// where possible, and otherwise tag with WebRtcVad_.
-
-// Spectrum Weighting
-extern const WebRtc_Word16 kSpectrumWeight[];
-extern const WebRtc_Word16 kCompVar;
-// Logarithm constant
-extern const WebRtc_Word16 kLogConst;
-extern const WebRtc_Word16 kLog10Const;
-// Q15
-extern const WebRtc_Word16 kNoiseUpdateConst;
-extern const WebRtc_Word16 kSpeechUpdateConst;
-// Q8
-extern const WebRtc_Word16 kBackEta;
-// Coefficients used by WebRtcVad_HpOutput, Q14
-extern const WebRtc_Word16 kHpZeroCoefs[];
-extern const WebRtc_Word16 kHpPoleCoefs[];
-// Allpass filter coefficients, upper and lower, in Q15 resp. Q13
-extern const WebRtc_Word16 kAllPassCoefsQ15[];
-extern const WebRtc_Word16 kAllPassCoefsQ13[];
-// Minimum difference between the two models, Q5
-extern const WebRtc_Word16 kMinimumDifference[];
-// Maximum value when updating the speech model, Q7
-extern const WebRtc_Word16 kMaximumSpeech[];
-// Minimum value for mean value
-extern const WebRtc_Word16 kMinimumMean[];
-// Upper limit of mean value for noise model, Q7
-extern const WebRtc_Word16 kMaximumNoise[];
-// Adjustment for division with two in WebRtcVad_SplitFilter
-extern const WebRtc_Word16 kOffsetVector[];
-// Start values for the Gaussian models, Q7
-extern const WebRtc_Word16 kNoiseDataWeights[];
-extern const WebRtc_Word16 kSpeechDataWeights[];
-extern const WebRtc_Word16 kNoiseDataMeans[];
-extern const WebRtc_Word16 kSpeechDataMeans[];
-extern const WebRtc_Word16 kNoiseDataStds[];
-extern const WebRtc_Word16 kSpeechDataStds[];
-
-#endif // WEBRTC_VAD_CONST_H_
diff --git a/src/common_audio/vad/main/source/vad_core.c b/src/common_audio/vad/main/source/vad_core.c
deleted file mode 100644
index e8829993d5..0000000000
--- a/src/common_audio/vad/main/source/vad_core.c
+++ /dev/null
@@ -1,685 +0,0 @@
-/*
- * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-
-/*
- * This file includes the implementation of the core functionality in VAD.
- * For function description, see vad_core.h.
- */
-
-#include "vad_core.h"
-#include "vad_const.h"
-#include "vad_defines.h"
-#include "vad_filterbank.h"
-#include "vad_gmm.h"
-#include "vad_sp.h"
-#include "signal_processing_library.h"
-
-static const int kInitCheck = 42;
-
-// Initialize VAD
-int WebRtcVad_InitCore(VadInstT *inst, short mode)
-{
- int i;
-
- // Initialization of struct
- inst->vad = 1;
- inst->frame_counter = 0;
- inst->over_hang = 0;
- inst->num_of_speech = 0;
-
- // Initialization of downsampling filter state
- inst->downsampling_filter_states[0] = 0;
- inst->downsampling_filter_states[1] = 0;
- inst->downsampling_filter_states[2] = 0;
- inst->downsampling_filter_states[3] = 0;
-
- // Read initial PDF parameters
- for (i = 0; i < NUM_TABLE_VALUES; i++)
- {
- inst->noise_means[i] = kNoiseDataMeans[i];
- inst->speech_means[i] = kSpeechDataMeans[i];
- inst->noise_stds[i] = kNoiseDataStds[i];
- inst->speech_stds[i] = kSpeechDataStds[i];
- }
-
- // Index and Minimum value vectors are initialized
- for (i = 0; i < 16 * NUM_CHANNELS; i++)
- {
- inst->low_value_vector[i] = 10000;
- inst->index_vector[i] = 0;
- }
-
- for (i = 0; i < 5; i++)
- {
- inst->upper_state[i] = 0;
- inst->lower_state[i] = 0;
- }
-
- for (i = 0; i < 4; i++)
- {
- inst->hp_filter_state[i] = 0;
- }
-
- // Init mean value memory, for FindMin function
- inst->mean_value[0] = 1600;
- inst->mean_value[1] = 1600;
- inst->mean_value[2] = 1600;
- inst->mean_value[3] = 1600;
- inst->mean_value[4] = 1600;
- inst->mean_value[5] = 1600;
-
- if (mode == 0)
- {
- // Quality mode
- inst->over_hang_max_1[0] = OHMAX1_10MS_Q; // Overhang short speech burst
- inst->over_hang_max_1[1] = OHMAX1_20MS_Q; // Overhang short speech burst
- inst->over_hang_max_1[2] = OHMAX1_30MS_Q; // Overhang short speech burst
- inst->over_hang_max_2[0] = OHMAX2_10MS_Q; // Overhang long speech burst
- inst->over_hang_max_2[1] = OHMAX2_20MS_Q; // Overhang long speech burst
- inst->over_hang_max_2[2] = OHMAX2_30MS_Q; // Overhang long speech burst
-
- inst->individual[0] = INDIVIDUAL_10MS_Q;
- inst->individual[1] = INDIVIDUAL_20MS_Q;
- inst->individual[2] = INDIVIDUAL_30MS_Q;
-
- inst->total[0] = TOTAL_10MS_Q;
- inst->total[1] = TOTAL_20MS_Q;
- inst->total[2] = TOTAL_30MS_Q;
- } else if (mode == 1)
- {
- // Low bitrate mode
- inst->over_hang_max_1[0] = OHMAX1_10MS_LBR; // Overhang short speech burst
- inst->over_hang_max_1[1] = OHMAX1_20MS_LBR; // Overhang short speech burst
- inst->over_hang_max_1[2] = OHMAX1_30MS_LBR; // Overhang short speech burst
- inst->over_hang_max_2[0] = OHMAX2_10MS_LBR; // Overhang long speech burst
- inst->over_hang_max_2[1] = OHMAX2_20MS_LBR; // Overhang long speech burst
- inst->over_hang_max_2[2] = OHMAX2_30MS_LBR; // Overhang long speech burst
-
- inst->individual[0] = INDIVIDUAL_10MS_LBR;
- inst->individual[1] = INDIVIDUAL_20MS_LBR;
- inst->individual[2] = INDIVIDUAL_30MS_LBR;
-
- inst->total[0] = TOTAL_10MS_LBR;
- inst->total[1] = TOTAL_20MS_LBR;
- inst->total[2] = TOTAL_30MS_LBR;
- } else if (mode == 2)
- {
- // Aggressive mode
- inst->over_hang_max_1[0] = OHMAX1_10MS_AGG; // Overhang short speech burst
- inst->over_hang_max_1[1] = OHMAX1_20MS_AGG; // Overhang short speech burst
- inst->over_hang_max_1[2] = OHMAX1_30MS_AGG; // Overhang short speech burst
- inst->over_hang_max_2[0] = OHMAX2_10MS_AGG; // Overhang long speech burst
- inst->over_hang_max_2[1] = OHMAX2_20MS_AGG; // Overhang long speech burst
- inst->over_hang_max_2[2] = OHMAX2_30MS_AGG; // Overhang long speech burst
-
- inst->individual[0] = INDIVIDUAL_10MS_AGG;
- inst->individual[1] = INDIVIDUAL_20MS_AGG;
- inst->individual[2] = INDIVIDUAL_30MS_AGG;
-
- inst->total[0] = TOTAL_10MS_AGG;
- inst->total[1] = TOTAL_20MS_AGG;
- inst->total[2] = TOTAL_30MS_AGG;
- } else
- {
- // Very aggressive mode
- inst->over_hang_max_1[0] = OHMAX1_10MS_VAG; // Overhang short speech burst
- inst->over_hang_max_1[1] = OHMAX1_20MS_VAG; // Overhang short speech burst
- inst->over_hang_max_1[2] = OHMAX1_30MS_VAG; // Overhang short speech burst
- inst->over_hang_max_2[0] = OHMAX2_10MS_VAG; // Overhang long speech burst
- inst->over_hang_max_2[1] = OHMAX2_20MS_VAG; // Overhang long speech burst
- inst->over_hang_max_2[2] = OHMAX2_30MS_VAG; // Overhang long speech burst
-
- inst->individual[0] = INDIVIDUAL_10MS_VAG;
- inst->individual[1] = INDIVIDUAL_20MS_VAG;
- inst->individual[2] = INDIVIDUAL_30MS_VAG;
-
- inst->total[0] = TOTAL_10MS_VAG;
- inst->total[1] = TOTAL_20MS_VAG;
- inst->total[2] = TOTAL_30MS_VAG;
- }
-
- inst->init_flag = kInitCheck;
-
- return 0;
-}
-
-// Set aggressiveness mode
-int WebRtcVad_set_mode_core(VadInstT *inst, short mode)
-{
-
- if (mode == 0)
- {
- // Quality mode
- inst->over_hang_max_1[0] = OHMAX1_10MS_Q; // Overhang short speech burst
- inst->over_hang_max_1[1] = OHMAX1_20MS_Q; // Overhang short speech burst
- inst->over_hang_max_1[2] = OHMAX1_30MS_Q; // Overhang short speech burst
- inst->over_hang_max_2[0] = OHMAX2_10MS_Q; // Overhang long speech burst
- inst->over_hang_max_2[1] = OHMAX2_20MS_Q; // Overhang long speech burst
- inst->over_hang_max_2[2] = OHMAX2_30MS_Q; // Overhang long speech burst
-
- inst->individual[0] = INDIVIDUAL_10MS_Q;
- inst->individual[1] = INDIVIDUAL_20MS_Q;
- inst->individual[2] = INDIVIDUAL_30MS_Q;
-
- inst->total[0] = TOTAL_10MS_Q;
- inst->total[1] = TOTAL_20MS_Q;
- inst->total[2] = TOTAL_30MS_Q;
- } else if (mode == 1)
- {
- // Low bitrate mode
- inst->over_hang_max_1[0] = OHMAX1_10MS_LBR; // Overhang short speech burst
- inst->over_hang_max_1[1] = OHMAX1_20MS_LBR; // Overhang short speech burst
- inst->over_hang_max_1[2] = OHMAX1_30MS_LBR; // Overhang short speech burst
- inst->over_hang_max_2[0] = OHMAX2_10MS_LBR; // Overhang long speech burst
- inst->over_hang_max_2[1] = OHMAX2_20MS_LBR; // Overhang long speech burst
- inst->over_hang_max_2[2] = OHMAX2_30MS_LBR; // Overhang long speech burst
-
- inst->individual[0] = INDIVIDUAL_10MS_LBR;
- inst->individual[1] = INDIVIDUAL_20MS_LBR;
- inst->individual[2] = INDIVIDUAL_30MS_LBR;
-
- inst->total[0] = TOTAL_10MS_LBR;
- inst->total[1] = TOTAL_20MS_LBR;
- inst->total[2] = TOTAL_30MS_LBR;
- } else if (mode == 2)
- {
- // Aggressive mode
- inst->over_hang_max_1[0] = OHMAX1_10MS_AGG; // Overhang short speech burst
- inst->over_hang_max_1[1] = OHMAX1_20MS_AGG; // Overhang short speech burst
- inst->over_hang_max_1[2] = OHMAX1_30MS_AGG; // Overhang short speech burst
- inst->over_hang_max_2[0] = OHMAX2_10MS_AGG; // Overhang long speech burst
- inst->over_hang_max_2[1] = OHMAX2_20MS_AGG; // Overhang long speech burst
- inst->over_hang_max_2[2] = OHMAX2_30MS_AGG; // Overhang long speech burst
-
- inst->individual[0] = INDIVIDUAL_10MS_AGG;
- inst->individual[1] = INDIVIDUAL_20MS_AGG;
- inst->individual[2] = INDIVIDUAL_30MS_AGG;
-
- inst->total[0] = TOTAL_10MS_AGG;
- inst->total[1] = TOTAL_20MS_AGG;
- inst->total[2] = TOTAL_30MS_AGG;
- } else if (mode == 3)
- {
- // Very aggressive mode
- inst->over_hang_max_1[0] = OHMAX1_10MS_VAG; // Overhang short speech burst
- inst->over_hang_max_1[1] = OHMAX1_20MS_VAG; // Overhang short speech burst
- inst->over_hang_max_1[2] = OHMAX1_30MS_VAG; // Overhang short speech burst
- inst->over_hang_max_2[0] = OHMAX2_10MS_VAG; // Overhang long speech burst
- inst->over_hang_max_2[1] = OHMAX2_20MS_VAG; // Overhang long speech burst
- inst->over_hang_max_2[2] = OHMAX2_30MS_VAG; // Overhang long speech burst
-
- inst->individual[0] = INDIVIDUAL_10MS_VAG;
- inst->individual[1] = INDIVIDUAL_20MS_VAG;
- inst->individual[2] = INDIVIDUAL_30MS_VAG;
-
- inst->total[0] = TOTAL_10MS_VAG;
- inst->total[1] = TOTAL_20MS_VAG;
- inst->total[2] = TOTAL_30MS_VAG;
- } else
- {
- return -1;
- }
-
- return 0;
-}
-
-// Calculate VAD decision by first extracting feature values and then calculate
-// probability for both speech and background noise.
-
-WebRtc_Word16 WebRtcVad_CalcVad32khz(VadInstT *inst, WebRtc_Word16 *speech_frame,
- int frame_length)
-{
- WebRtc_Word16 len, vad;
- WebRtc_Word16 speechWB[480]; // Downsampled speech frame: 960 samples (30ms in SWB)
- WebRtc_Word16 speechNB[240]; // Downsampled speech frame: 480 samples (30ms in WB)
-
-
- // Downsample signal 32->16->8 before doing VAD
- WebRtcVad_Downsampling(speech_frame, speechWB, &(inst->downsampling_filter_states[2]),
- frame_length);
- len = WEBRTC_SPL_RSHIFT_W16(frame_length, 1);
-
- WebRtcVad_Downsampling(speechWB, speechNB, inst->downsampling_filter_states, len);
- len = WEBRTC_SPL_RSHIFT_W16(len, 1);
-
- // Do VAD on an 8 kHz signal
- vad = WebRtcVad_CalcVad8khz(inst, speechNB, len);
-
- return vad;
-}
-
-WebRtc_Word16 WebRtcVad_CalcVad16khz(VadInstT *inst, WebRtc_Word16 *speech_frame,
- int frame_length)
-{
- WebRtc_Word16 len, vad;
- WebRtc_Word16 speechNB[240]; // Downsampled speech frame: 480 samples (30ms in WB)
-
- // Wideband: Downsample signal before doing VAD
- WebRtcVad_Downsampling(speech_frame, speechNB, inst->downsampling_filter_states,
- frame_length);
-
- len = WEBRTC_SPL_RSHIFT_W16(frame_length, 1);
- vad = WebRtcVad_CalcVad8khz(inst, speechNB, len);
-
- return vad;
-}
-
-WebRtc_Word16 WebRtcVad_CalcVad8khz(VadInstT *inst, WebRtc_Word16 *speech_frame,
- int frame_length)
-{
- WebRtc_Word16 feature_vector[NUM_CHANNELS], total_power;
-
- // Get power in the bands
- total_power = WebRtcVad_get_features(inst, speech_frame, frame_length, feature_vector);
-
- // Make a VAD
- inst->vad = WebRtcVad_GmmProbability(inst, feature_vector, total_power, frame_length);
-
- return inst->vad;
-}
-
-// Calculate probability for both speech and background noise, and perform a
-// hypothesis-test.
-WebRtc_Word16 WebRtcVad_GmmProbability(VadInstT *inst, WebRtc_Word16 *feature_vector,
- WebRtc_Word16 total_power, int frame_length)
-{
- int n, k;
- WebRtc_Word16 backval;
- WebRtc_Word16 h0, h1;
- WebRtc_Word16 ratvec, xval;
- WebRtc_Word16 vadflag;
- WebRtc_Word16 shifts0, shifts1;
- WebRtc_Word16 tmp16, tmp16_1, tmp16_2;
- WebRtc_Word16 diff, nr, pos;
- WebRtc_Word16 nmk, nmk2, nmk3, smk, smk2, nsk, ssk;
- WebRtc_Word16 delt, ndelt;
- WebRtc_Word16 maxspe, maxmu;
- WebRtc_Word16 deltaN[NUM_TABLE_VALUES], deltaS[NUM_TABLE_VALUES];
- WebRtc_Word16 ngprvec[NUM_TABLE_VALUES], sgprvec[NUM_TABLE_VALUES];
- WebRtc_Word32 h0test, h1test;
- WebRtc_Word32 tmp32_1, tmp32_2;
- WebRtc_Word32 dotVal;
- WebRtc_Word32 nmid, smid;
- WebRtc_Word32 probn[NUM_MODELS], probs[NUM_MODELS];
- WebRtc_Word16 *nmean1ptr, *nmean2ptr, *smean1ptr, *smean2ptr, *nstd1ptr, *nstd2ptr,
- *sstd1ptr, *sstd2ptr;
- WebRtc_Word16 overhead1, overhead2, individualTest, totalTest;
-
- // Set the thresholds to different values based on frame length
- if (frame_length == 80)
- {
- // 80 input samples
- overhead1 = inst->over_hang_max_1[0];
- overhead2 = inst->over_hang_max_2[0];
- individualTest = inst->individual[0];
- totalTest = inst->total[0];
- } else if (frame_length == 160)
- {
- // 160 input samples
- overhead1 = inst->over_hang_max_1[1];
- overhead2 = inst->over_hang_max_2[1];
- individualTest = inst->individual[1];
- totalTest = inst->total[1];
- } else
- {
- // 240 input samples
- overhead1 = inst->over_hang_max_1[2];
- overhead2 = inst->over_hang_max_2[2];
- individualTest = inst->individual[2];
- totalTest = inst->total[2];
- }
-
- if (total_power > MIN_ENERGY)
- { // If signal present at all
-
- // Set pointers to the gaussian parameters
- nmean1ptr = &inst->noise_means[0];
- nmean2ptr = &inst->noise_means[NUM_CHANNELS];
- smean1ptr = &inst->speech_means[0];
- smean2ptr = &inst->speech_means[NUM_CHANNELS];
- nstd1ptr = &inst->noise_stds[0];
- nstd2ptr = &inst->noise_stds[NUM_CHANNELS];
- sstd1ptr = &inst->speech_stds[0];
- sstd2ptr = &inst->speech_stds[NUM_CHANNELS];
-
- vadflag = 0;
- dotVal = 0;
- for (n = 0; n < NUM_CHANNELS; n++)
- { // For all channels
-
- pos = WEBRTC_SPL_LSHIFT_W16(n, 1);
- xval = feature_vector[n];
-
- // Probability for Noise, Q7 * Q20 = Q27
- tmp32_1 = WebRtcVad_GaussianProbability(xval, *nmean1ptr++, *nstd1ptr++,
- &deltaN[pos]);
- probn[0] = (WebRtc_Word32)(kNoiseDataWeights[n] * tmp32_1);
- tmp32_1 = WebRtcVad_GaussianProbability(xval, *nmean2ptr++, *nstd2ptr++,
- &deltaN[pos + 1]);
- probn[1] = (WebRtc_Word32)(kNoiseDataWeights[n + NUM_CHANNELS] * tmp32_1);
- h0test = probn[0] + probn[1]; // Q27
- h0 = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(h0test, 12); // Q15
-
- // Probability for Speech
- tmp32_1 = WebRtcVad_GaussianProbability(xval, *smean1ptr++, *sstd1ptr++,
- &deltaS[pos]);
- probs[0] = (WebRtc_Word32)(kSpeechDataWeights[n] * tmp32_1);
- tmp32_1 = WebRtcVad_GaussianProbability(xval, *smean2ptr++, *sstd2ptr++,
- &deltaS[pos + 1]);
- probs[1] = (WebRtc_Word32)(kSpeechDataWeights[n + NUM_CHANNELS] * tmp32_1);
- h1test = probs[0] + probs[1]; // Q27
- h1 = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(h1test, 12); // Q15
-
- // Get likelihood ratio. Approximate log2(H1/H0) with shifts0 - shifts1
- shifts0 = WebRtcSpl_NormW32(h0test);
- shifts1 = WebRtcSpl_NormW32(h1test);
-
- if ((h0test > 0) && (h1test > 0))
- {
- ratvec = shifts0 - shifts1;
- } else if (h1test > 0)
- {
- ratvec = 31 - shifts1;
- } else if (h0test > 0)
- {
- ratvec = shifts0 - 31;
- } else
- {
- ratvec = 0;
- }
-
- // VAD decision with spectrum weighting
- dotVal += WEBRTC_SPL_MUL_16_16(ratvec, kSpectrumWeight[n]);
-
- // Individual channel test
- if ((ratvec << 2) > individualTest)
- {
- vadflag = 1;
- }
-
- // Probabilities used when updating model
- if (h0 > 0)
- {
- tmp32_1 = probn[0] & 0xFFFFF000; // Q27
- tmp32_2 = WEBRTC_SPL_LSHIFT_W32(tmp32_1, 2); // Q29
- ngprvec[pos] = (WebRtc_Word16)WebRtcSpl_DivW32W16(tmp32_2, h0);
- ngprvec[pos + 1] = 16384 - ngprvec[pos];
- } else
- {
- ngprvec[pos] = 16384;
- ngprvec[pos + 1] = 0;
- }
-
- // Probabilities used when updating model
- if (h1 > 0)
- {
- tmp32_1 = probs[0] & 0xFFFFF000;
- tmp32_2 = WEBRTC_SPL_LSHIFT_W32(tmp32_1, 2);
- sgprvec[pos] = (WebRtc_Word16)WebRtcSpl_DivW32W16(tmp32_2, h1);
- sgprvec[pos + 1] = 16384 - sgprvec[pos];
- } else
- {
- sgprvec[pos] = 0;
- sgprvec[pos + 1] = 0;
- }
- }
-
- // Overall test
- if (dotVal >= totalTest)
- {
- vadflag |= 1;
- }
-
- // Set pointers to the means and standard deviations.
- nmean1ptr = &inst->noise_means[0];
- smean1ptr = &inst->speech_means[0];
- nstd1ptr = &inst->noise_stds[0];
- sstd1ptr = &inst->speech_stds[0];
-
- maxspe = 12800;
-
- // Update the model's parameters
- for (n = 0; n < NUM_CHANNELS; n++)
- {
-
- pos = WEBRTC_SPL_LSHIFT_W16(n, 1);
-
- // Get min value in past which is used for long term correction
- backval = WebRtcVad_FindMinimum(inst, feature_vector[n], n); // Q4
-
- // Compute the "global" mean, that is the sum of the two means weighted
- nmid = WEBRTC_SPL_MUL_16_16(kNoiseDataWeights[n], *nmean1ptr); // Q7 * Q7
- nmid += WEBRTC_SPL_MUL_16_16(kNoiseDataWeights[n+NUM_CHANNELS],
- *(nmean1ptr+NUM_CHANNELS));
- tmp16_1 = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(nmid, 6); // Q8
-
- for (k = 0; k < NUM_MODELS; k++)
- {
-
- nr = pos + k;
-
- nmean2ptr = nmean1ptr + k * NUM_CHANNELS;
- smean2ptr = smean1ptr + k * NUM_CHANNELS;
- nstd2ptr = nstd1ptr + k * NUM_CHANNELS;
- sstd2ptr = sstd1ptr + k * NUM_CHANNELS;
- nmk = *nmean2ptr;
- smk = *smean2ptr;
- nsk = *nstd2ptr;
- ssk = *sstd2ptr;
-
- // Update noise mean vector if the frame consists of noise only
- nmk2 = nmk;
- if (!vadflag)
- {
- // deltaN = (x-mu)/sigma^2
- // ngprvec[k] = probn[k]/(probn[0] + probn[1])
-
- delt = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(ngprvec[nr],
- deltaN[nr], 11); // Q14*Q11
- nmk2 = nmk + (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(delt,
- kNoiseUpdateConst,
- 22); // Q7+(Q14*Q15>>22)
- }
-
- // Long term correction of the noise mean
- ndelt = WEBRTC_SPL_LSHIFT_W16(backval, 4);
- ndelt -= tmp16_1; // Q8 - Q8
- nmk3 = nmk2 + (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(ndelt,
- kBackEta,
- 9); // Q7+(Q8*Q8)>>9
-
- // Control that the noise mean does not drift to much
- tmp16 = WEBRTC_SPL_LSHIFT_W16(k+5, 7);
- if (nmk3 < tmp16)
- nmk3 = tmp16;
- tmp16 = WEBRTC_SPL_LSHIFT_W16(72+k-n, 7);
- if (nmk3 > tmp16)
- nmk3 = tmp16;
- *nmean2ptr = nmk3;
-
- if (vadflag)
- {
- // Update speech mean vector:
- // deltaS = (x-mu)/sigma^2
- // sgprvec[k] = probn[k]/(probn[0] + probn[1])
-
- delt = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(sgprvec[nr],
- deltaS[nr],
- 11); // (Q14*Q11)>>11=Q14
- tmp16 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(delt,
- kSpeechUpdateConst,
- 21) + 1;
- smk2 = smk + (tmp16 >> 1); // Q7 + (Q14 * Q15 >> 22)
-
- // Control that the speech mean does not drift to much
- maxmu = maxspe + 640;
- if (smk2 < kMinimumMean[k])
- smk2 = kMinimumMean[k];
- if (smk2 > maxmu)
- smk2 = maxmu;
-
- *smean2ptr = smk2;
-
- // (Q7>>3) = Q4
- tmp16 = WEBRTC_SPL_RSHIFT_W16((smk + 4), 3);
-
- tmp16 = feature_vector[n] - tmp16; // Q4
- tmp32_1 = WEBRTC_SPL_MUL_16_16_RSFT(deltaS[nr], tmp16, 3);
- tmp32_2 = tmp32_1 - (WebRtc_Word32)4096; // Q12
- tmp16 = WEBRTC_SPL_RSHIFT_W16((sgprvec[nr]), 2);
- tmp32_1 = (WebRtc_Word32)(tmp16 * tmp32_2);// (Q15>>3)*(Q14>>2)=Q12*Q12=Q24
-
- tmp32_2 = WEBRTC_SPL_RSHIFT_W32(tmp32_1, 4); // Q20
-
- // 0.1 * Q20 / Q7 = Q13
- if (tmp32_2 > 0)
- tmp16 = (WebRtc_Word16)WebRtcSpl_DivW32W16(tmp32_2, ssk * 10);
- else
- {
- tmp16 = (WebRtc_Word16)WebRtcSpl_DivW32W16(-tmp32_2, ssk * 10);
- tmp16 = -tmp16;
- }
- // divide by 4 giving an update factor of 0.025
- tmp16 += 128; // Rounding
- ssk += WEBRTC_SPL_RSHIFT_W16(tmp16, 8);
- // Division with 8 plus Q7
- if (ssk < MIN_STD)
- ssk = MIN_STD;
- *sstd2ptr = ssk;
- } else
- {
- // Update GMM variance vectors
- // deltaN * (feature_vector[n] - nmk) - 1, Q11 * Q4
- tmp16 = feature_vector[n] - WEBRTC_SPL_RSHIFT_W16(nmk, 3);
-
- // (Q15>>3) * (Q14>>2) = Q12 * Q12 = Q24
- tmp32_1 = WEBRTC_SPL_MUL_16_16_RSFT(deltaN[nr], tmp16, 3) - 4096;
- tmp16 = WEBRTC_SPL_RSHIFT_W16((ngprvec[nr]+2), 2);
- tmp32_2 = (WebRtc_Word32)(tmp16 * tmp32_1);
- tmp32_1 = WEBRTC_SPL_RSHIFT_W32(tmp32_2, 14);
- // Q20 * approx 0.001 (2^-10=0.0009766)
-
- // Q20 / Q7 = Q13
- tmp16 = (WebRtc_Word16)WebRtcSpl_DivW32W16(tmp32_1, nsk);
- if (tmp32_1 > 0)
- tmp16 = (WebRtc_Word16)WebRtcSpl_DivW32W16(tmp32_1, nsk);
- else
- {
- tmp16 = (WebRtc_Word16)WebRtcSpl_DivW32W16(-tmp32_1, nsk);
- tmp16 = -tmp16;
- }
- tmp16 += 32; // Rounding
- nsk += WEBRTC_SPL_RSHIFT_W16(tmp16, 6);
-
- if (nsk < MIN_STD)
- nsk = MIN_STD;
-
- *nstd2ptr = nsk;
- }
- }
-
- // Separate models if they are too close - nmid in Q14
- nmid = WEBRTC_SPL_MUL_16_16(kNoiseDataWeights[n], *nmean1ptr);
- nmid += WEBRTC_SPL_MUL_16_16(kNoiseDataWeights[n+NUM_CHANNELS], *nmean2ptr);
-
- // smid in Q14
- smid = WEBRTC_SPL_MUL_16_16(kSpeechDataWeights[n], *smean1ptr);
- smid += WEBRTC_SPL_MUL_16_16(kSpeechDataWeights[n+NUM_CHANNELS], *smean2ptr);
-
- // diff = "global" speech mean - "global" noise mean
- diff = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(smid, 9);
- tmp16 = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(nmid, 9);
- diff -= tmp16;
-
- if (diff < kMinimumDifference[n])
- {
-
- tmp16 = kMinimumDifference[n] - diff; // Q5
-
- // tmp16_1 = ~0.8 * (kMinimumDifference - diff) in Q7
- // tmp16_2 = ~0.2 * (kMinimumDifference - diff) in Q7
- tmp16_1 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(13, tmp16, 2);
- tmp16_2 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(3, tmp16, 2);
-
- // First Gauss, speech model
- tmp16 = tmp16_1 + *smean1ptr;
- *smean1ptr = tmp16;
- smid = WEBRTC_SPL_MUL_16_16(tmp16, kSpeechDataWeights[n]);
-
- // Second Gauss, speech model
- tmp16 = tmp16_1 + *smean2ptr;
- *smean2ptr = tmp16;
- smid += WEBRTC_SPL_MUL_16_16(tmp16, kSpeechDataWeights[n+NUM_CHANNELS]);
-
- // First Gauss, noise model
- tmp16 = *nmean1ptr - tmp16_2;
- *nmean1ptr = tmp16;
-
- nmid = WEBRTC_SPL_MUL_16_16(tmp16, kNoiseDataWeights[n]);
-
- // Second Gauss, noise model
- tmp16 = *nmean2ptr - tmp16_2;
- *nmean2ptr = tmp16;
- nmid += WEBRTC_SPL_MUL_16_16(tmp16, kNoiseDataWeights[n+NUM_CHANNELS]);
- }
-
- // Control that the speech & noise means do not drift to much
- maxspe = kMaximumSpeech[n];
- tmp16_2 = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(smid, 7);
- if (tmp16_2 > maxspe)
- { // Upper limit of speech model
- tmp16_2 -= maxspe;
-
- *smean1ptr -= tmp16_2;
- *smean2ptr -= tmp16_2;
- }
-
- tmp16_2 = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(nmid, 7);
- if (tmp16_2 > kMaximumNoise[n])
- {
- tmp16_2 -= kMaximumNoise[n];
-
- *nmean1ptr -= tmp16_2;
- *nmean2ptr -= tmp16_2;
- }
-
- *nmean1ptr++;
- *smean1ptr++;
- *nstd1ptr++;
- *sstd1ptr++;
- }
- inst->frame_counter++;
- } else
- {
- vadflag = 0;
- }
-
- // Hangover smoothing
- if (!vadflag)
- {
- if (inst->over_hang > 0)
- {
- vadflag = 2 + inst->over_hang;
- inst->over_hang = inst->over_hang - 1;
- }
- inst->num_of_speech = 0;
- } else
- {
- inst->num_of_speech = inst->num_of_speech + 1;
- if (inst->num_of_speech > NSP_MAX)
- {
- inst->num_of_speech = NSP_MAX;
- inst->over_hang = overhead2;
- } else
- inst->over_hang = overhead1;
- }
- return vadflag;
-}
diff --git a/src/common_audio/vad/main/source/vad_core.h b/src/common_audio/vad/main/source/vad_core.h
deleted file mode 100644
index 544caf5ab3..0000000000
--- a/src/common_audio/vad/main/source/vad_core.h
+++ /dev/null
@@ -1,132 +0,0 @@
-/*
- * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-
-/*
- * This header file includes the descriptions of the core VAD calls.
- */
-
-#ifndef WEBRTC_VAD_CORE_H_
-#define WEBRTC_VAD_CORE_H_
-
-#include "typedefs.h"
-#include "vad_defines.h"
-
-typedef struct VadInstT_
-{
-
- WebRtc_Word16 vad;
- WebRtc_Word32 downsampling_filter_states[4];
- WebRtc_Word16 noise_means[NUM_TABLE_VALUES];
- WebRtc_Word16 speech_means[NUM_TABLE_VALUES];
- WebRtc_Word16 noise_stds[NUM_TABLE_VALUES];
- WebRtc_Word16 speech_stds[NUM_TABLE_VALUES];
- WebRtc_Word32 frame_counter;
- WebRtc_Word16 over_hang; // Over Hang
- WebRtc_Word16 num_of_speech;
- WebRtc_Word16 index_vector[16 * NUM_CHANNELS];
- WebRtc_Word16 low_value_vector[16 * NUM_CHANNELS];
- WebRtc_Word16 mean_value[NUM_CHANNELS];
- WebRtc_Word16 upper_state[5];
- WebRtc_Word16 lower_state[5];
- WebRtc_Word16 hp_filter_state[4];
- WebRtc_Word16 over_hang_max_1[3];
- WebRtc_Word16 over_hang_max_2[3];
- WebRtc_Word16 individual[3];
- WebRtc_Word16 total[3];
-
- short init_flag;
-
-} VadInstT;
-
-/****************************************************************************
- * WebRtcVad_InitCore(...)
- *
- * This function initializes a VAD instance
- *
- * Input:
- * - inst : Instance that should be initialized
- * - mode : Aggressiveness degree
- * 0 (High quality) - 3 (Highly aggressive)
- *
- * Output:
- * - inst : Initialized instance
- *
- * Return value : 0 - Ok
- * -1 - Error
- */
-int WebRtcVad_InitCore(VadInstT* inst, short mode);
-
-/****************************************************************************
- * WebRtcVad_set_mode_core(...)
- *
- * This function changes the VAD settings
- *
- * Input:
- * - inst : VAD instance
- * - mode : Aggressiveness degree
- * 0 (High quality) - 3 (Highly aggressive)
- *
- * Output:
- * - inst : Changed instance
- *
- * Return value : 0 - Ok
- * -1 - Error
- */
-
-int WebRtcVad_set_mode_core(VadInstT* inst, short mode);
-
-/****************************************************************************
- * WebRtcVad_CalcVad32khz(...)
- * WebRtcVad_CalcVad16khz(...)
- * WebRtcVad_CalcVad8khz(...)
- *
- * Calculate probability for active speech and make VAD decision.
- *
- * Input:
- * - inst : Instance that should be initialized
- * - speech_frame : Input speech frame
- * - frame_length : Number of input samples
- *
- * Output:
- * - inst : Updated filter states etc.
- *
- * Return value : VAD decision
- * 0 - No active speech
- * 1-6 - Active speech
- */
-WebRtc_Word16 WebRtcVad_CalcVad32khz(VadInstT* inst, WebRtc_Word16* speech_frame,
- int frame_length);
-WebRtc_Word16 WebRtcVad_CalcVad16khz(VadInstT* inst, WebRtc_Word16* speech_frame,
- int frame_length);
-WebRtc_Word16 WebRtcVad_CalcVad8khz(VadInstT* inst, WebRtc_Word16* speech_frame,
- int frame_length);
-
-/****************************************************************************
- * WebRtcVad_GmmProbability(...)
- *
- * This function calculates the probabilities for background noise and
- * speech using Gaussian Mixture Models. A hypothesis-test is performed to decide
- * which type of signal is most probable.
- *
- * Input:
- * - inst : Pointer to VAD instance
- * - feature_vector : Feature vector = log10(energy in frequency band)
- * - total_power : Total power in frame.
- * - frame_length : Number of input samples
- *
- * Output:
- * VAD decision : 0 - noise, 1 - speech
- *
- */
-WebRtc_Word16 WebRtcVad_GmmProbability(VadInstT* inst, WebRtc_Word16* feature_vector,
- WebRtc_Word16 total_power, int frame_length);
-
-#endif // WEBRTC_VAD_CORE_H_
diff --git a/src/common_audio/vad/main/source/vad_defines.h b/src/common_audio/vad/main/source/vad_defines.h
deleted file mode 100644
index b33af2ef7d..0000000000
--- a/src/common_audio/vad/main/source/vad_defines.h
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-
-/*
- * This header file includes the macros used in VAD.
- */
-
-#ifndef WEBRTC_VAD_DEFINES_H_
-#define WEBRTC_VAD_DEFINES_H_
-
-#define NUM_CHANNELS 6 // Eight frequency bands
-#define NUM_MODELS 2 // Number of Gaussian models
-#define NUM_TABLE_VALUES NUM_CHANNELS * NUM_MODELS
-
-#define MIN_ENERGY 10
-#define ALPHA1 6553 // 0.2 in Q15
-#define ALPHA2 32439 // 0.99 in Q15
-#define NSP_MAX 6 // Maximum number of VAD=1 frames in a row counted
-#define MIN_STD 384 // Minimum standard deviation
-// Mode 0, Quality thresholds - Different thresholds for the different frame lengths
-#define INDIVIDUAL_10MS_Q 24
-#define INDIVIDUAL_20MS_Q 21 // (log10(2)*66)<<2 ~=16
-#define INDIVIDUAL_30MS_Q 24
-
-#define TOTAL_10MS_Q 57
-#define TOTAL_20MS_Q 48
-#define TOTAL_30MS_Q 57
-
-#define OHMAX1_10MS_Q 8 // Max Overhang 1
-#define OHMAX2_10MS_Q 14 // Max Overhang 2
-#define OHMAX1_20MS_Q 4 // Max Overhang 1
-#define OHMAX2_20MS_Q 7 // Max Overhang 2
-#define OHMAX1_30MS_Q 3
-#define OHMAX2_30MS_Q 5
-
-// Mode 1, Low bitrate thresholds - Different thresholds for the different frame lengths
-#define INDIVIDUAL_10MS_LBR 37
-#define INDIVIDUAL_20MS_LBR 32
-#define INDIVIDUAL_30MS_LBR 37
-
-#define TOTAL_10MS_LBR 100
-#define TOTAL_20MS_LBR 80
-#define TOTAL_30MS_LBR 100
-
-#define OHMAX1_10MS_LBR 8 // Max Overhang 1
-#define OHMAX2_10MS_LBR 14 // Max Overhang 2
-#define OHMAX1_20MS_LBR 4
-#define OHMAX2_20MS_LBR 7
-
-#define OHMAX1_30MS_LBR 3
-#define OHMAX2_30MS_LBR 5
-
-// Mode 2, Very aggressive thresholds - Different thresholds for the different frame lengths
-#define INDIVIDUAL_10MS_AGG 82
-#define INDIVIDUAL_20MS_AGG 78
-#define INDIVIDUAL_30MS_AGG 82
-
-#define TOTAL_10MS_AGG 285 //580
-#define TOTAL_20MS_AGG 260
-#define TOTAL_30MS_AGG 285
-
-#define OHMAX1_10MS_AGG 6 // Max Overhang 1
-#define OHMAX2_10MS_AGG 9 // Max Overhang 2
-#define OHMAX1_20MS_AGG 3
-#define OHMAX2_20MS_AGG 5
-
-#define OHMAX1_30MS_AGG 2
-#define OHMAX2_30MS_AGG 3
-
-// Mode 3, Super aggressive thresholds - Different thresholds for the different frame lengths
-#define INDIVIDUAL_10MS_VAG 94
-#define INDIVIDUAL_20MS_VAG 94
-#define INDIVIDUAL_30MS_VAG 94
-
-#define TOTAL_10MS_VAG 1100 //1700
-#define TOTAL_20MS_VAG 1050
-#define TOTAL_30MS_VAG 1100
-
-#define OHMAX1_10MS_VAG 6 // Max Overhang 1
-#define OHMAX2_10MS_VAG 9 // Max Overhang 2
-#define OHMAX1_20MS_VAG 3
-#define OHMAX2_20MS_VAG 5
-
-#define OHMAX1_30MS_VAG 2
-#define OHMAX2_30MS_VAG 3
-
-#endif // WEBRTC_VAD_DEFINES_H_
diff --git a/src/common_audio/vad/main/source/vad_filterbank.c b/src/common_audio/vad/main/source/vad_filterbank.c
deleted file mode 100644
index 11392c917a..0000000000
--- a/src/common_audio/vad/main/source/vad_filterbank.c
+++ /dev/null
@@ -1,267 +0,0 @@
-/*
- * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-
-/*
- * This file includes the implementation of the internal filterbank associated functions.
- * For function description, see vad_filterbank.h.
- */
-
-#include "vad_filterbank.h"
-#include "vad_defines.h"
-#include "vad_const.h"
-#include "signal_processing_library.h"
-
-void WebRtcVad_HpOutput(WebRtc_Word16 *in_vector,
- WebRtc_Word16 in_vector_length,
- WebRtc_Word16 *out_vector,
- WebRtc_Word16 *filter_state)
-{
- WebRtc_Word16 i, *pi, *outPtr;
- WebRtc_Word32 tmpW32;
-
- pi = &in_vector[0];
- outPtr = &out_vector[0];
-
- // The sum of the absolute values of the impulse response:
- // The zero/pole-filter has a max amplification of a single sample of: 1.4546
- // Impulse response: 0.4047 -0.6179 -0.0266 0.1993 0.1035 -0.0194
- // The all-zero section has a max amplification of a single sample of: 1.6189
- // Impulse response: 0.4047 -0.8094 0.4047 0 0 0
- // The all-pole section has a max amplification of a single sample of: 1.9931
- // Impulse response: 1.0000 0.4734 -0.1189 -0.2187 -0.0627 0.04532
-
- for (i = 0; i < in_vector_length; i++)
- {
- // all-zero section (filter coefficients in Q14)
- tmpW32 = (WebRtc_Word32)WEBRTC_SPL_MUL_16_16(kHpZeroCoefs[0], (*pi));
- tmpW32 += (WebRtc_Word32)WEBRTC_SPL_MUL_16_16(kHpZeroCoefs[1], filter_state[0]);
- tmpW32 += (WebRtc_Word32)WEBRTC_SPL_MUL_16_16(kHpZeroCoefs[2], filter_state[1]); // Q14
- filter_state[1] = filter_state[0];
- filter_state[0] = *pi++;
-
- // all-pole section
- tmpW32 -= (WebRtc_Word32)WEBRTC_SPL_MUL_16_16(kHpPoleCoefs[1], filter_state[2]); // Q14
- tmpW32 -= (WebRtc_Word32)WEBRTC_SPL_MUL_16_16(kHpPoleCoefs[2], filter_state[3]);
- filter_state[3] = filter_state[2];
- filter_state[2] = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32 (tmpW32, 14);
- *outPtr++ = filter_state[2];
- }
-}
-
-void WebRtcVad_Allpass(WebRtc_Word16 *in_vector,
- WebRtc_Word16 *out_vector,
- WebRtc_Word16 filter_coefficients,
- int vector_length,
- WebRtc_Word16 *filter_state)
-{
- // The filter can only cause overflow (in the w16 output variable)
- // if more than 4 consecutive input numbers are of maximum value and
- // has the the same sign as the impulse responses first taps.
- // First 6 taps of the impulse response: 0.6399 0.5905 -0.3779
- // 0.2418 -0.1547 0.0990
-
- int n;
- WebRtc_Word16 tmp16;
- WebRtc_Word32 tmp32, in32, state32;
-
- state32 = WEBRTC_SPL_LSHIFT_W32(((WebRtc_Word32)(*filter_state)), 16); // Q31
-
- for (n = 0; n < vector_length; n++)
- {
-
- tmp32 = state32 + WEBRTC_SPL_MUL_16_16(filter_coefficients, (*in_vector));
- tmp16 = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(tmp32, 16);
- *out_vector++ = tmp16;
- in32 = WEBRTC_SPL_LSHIFT_W32(((WebRtc_Word32)(*in_vector)), 14);
- state32 = in32 - WEBRTC_SPL_MUL_16_16(filter_coefficients, tmp16);
- state32 = WEBRTC_SPL_LSHIFT_W32(state32, 1);
- in_vector += 2;
- }
-
- *filter_state = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(state32, 16);
-}
-
-void WebRtcVad_SplitFilter(WebRtc_Word16 *in_vector,
- WebRtc_Word16 *out_vector_hp,
- WebRtc_Word16 *out_vector_lp,
- WebRtc_Word16 *upper_state,
- WebRtc_Word16 *lower_state,
- int in_vector_length)
-{
- WebRtc_Word16 tmpOut;
- int k, halflen;
-
- // Downsampling by 2 and get two branches
- halflen = WEBRTC_SPL_RSHIFT_W16(in_vector_length, 1);
-
- // All-pass filtering upper branch
- WebRtcVad_Allpass(&in_vector[0], out_vector_hp, kAllPassCoefsQ15[0], halflen, upper_state);
-
- // All-pass filtering lower branch
- WebRtcVad_Allpass(&in_vector[1], out_vector_lp, kAllPassCoefsQ15[1], halflen, lower_state);
-
- // Make LP and HP signals
- for (k = 0; k < halflen; k++)
- {
- tmpOut = *out_vector_hp;
- *out_vector_hp++ -= *out_vector_lp;
- *out_vector_lp++ += tmpOut;
- }
-}
-
-WebRtc_Word16 WebRtcVad_get_features(VadInstT *inst,
- WebRtc_Word16 *in_vector,
- int frame_size,
- WebRtc_Word16 *out_vector)
-{
- int curlen, filtno;
- WebRtc_Word16 vecHP1[120], vecLP1[120];
- WebRtc_Word16 vecHP2[60], vecLP2[60];
- WebRtc_Word16 *ptin;
- WebRtc_Word16 *hptout, *lptout;
- WebRtc_Word16 power = 0;
-
- // Split at 2000 Hz and downsample
- filtno = 0;
- ptin = in_vector;
- hptout = vecHP1;
- lptout = vecLP1;
- curlen = frame_size;
- WebRtcVad_SplitFilter(ptin, hptout, lptout, &inst->upper_state[filtno],
- &inst->lower_state[filtno], curlen);
-
- // Split at 3000 Hz and downsample
- filtno = 1;
- ptin = vecHP1;
- hptout = vecHP2;
- lptout = vecLP2;
- curlen = WEBRTC_SPL_RSHIFT_W16(frame_size, 1);
-
- WebRtcVad_SplitFilter(ptin, hptout, lptout, &inst->upper_state[filtno],
- &inst->lower_state[filtno], curlen);
-
- // Energy in 3000 Hz - 4000 Hz
- curlen = WEBRTC_SPL_RSHIFT_W16(curlen, 1);
- WebRtcVad_LogOfEnergy(vecHP2, &out_vector[5], &power, kOffsetVector[5], curlen);
-
- // Energy in 2000 Hz - 3000 Hz
- WebRtcVad_LogOfEnergy(vecLP2, &out_vector[4], &power, kOffsetVector[4], curlen);
-
- // Split at 1000 Hz and downsample
- filtno = 2;
- ptin = vecLP1;
- hptout = vecHP2;
- lptout = vecLP2;
- curlen = WEBRTC_SPL_RSHIFT_W16(frame_size, 1);
- WebRtcVad_SplitFilter(ptin, hptout, lptout, &inst->upper_state[filtno],
- &inst->lower_state[filtno], curlen);
-
- // Energy in 1000 Hz - 2000 Hz
- curlen = WEBRTC_SPL_RSHIFT_W16(curlen, 1);
- WebRtcVad_LogOfEnergy(vecHP2, &out_vector[3], &power, kOffsetVector[3], curlen);
-
- // Split at 500 Hz
- filtno = 3;
- ptin = vecLP2;
- hptout = vecHP1;
- lptout = vecLP1;
-
- WebRtcVad_SplitFilter(ptin, hptout, lptout, &inst->upper_state[filtno],
- &inst->lower_state[filtno], curlen);
-
- // Energy in 500 Hz - 1000 Hz
- curlen = WEBRTC_SPL_RSHIFT_W16(curlen, 1);
- WebRtcVad_LogOfEnergy(vecHP1, &out_vector[2], &power, kOffsetVector[2], curlen);
- // Split at 250 Hz
- filtno = 4;
- ptin = vecLP1;
- hptout = vecHP2;
- lptout = vecLP2;
-
- WebRtcVad_SplitFilter(ptin, hptout, lptout, &inst->upper_state[filtno],
- &inst->lower_state[filtno], curlen);
-
- // Energy in 250 Hz - 500 Hz
- curlen = WEBRTC_SPL_RSHIFT_W16(curlen, 1);
- WebRtcVad_LogOfEnergy(vecHP2, &out_vector[1], &power, kOffsetVector[1], curlen);
-
- // Remove DC and LFs
- WebRtcVad_HpOutput(vecLP2, curlen, vecHP1, inst->hp_filter_state);
-
- // Power in 80 Hz - 250 Hz
- WebRtcVad_LogOfEnergy(vecHP1, &out_vector[0], &power, kOffsetVector[0], curlen);
-
- return power;
-}
-
-void WebRtcVad_LogOfEnergy(WebRtc_Word16 *vector,
- WebRtc_Word16 *enerlogval,
- WebRtc_Word16 *power,
- WebRtc_Word16 offset,
- int vector_length)
-{
- WebRtc_Word16 enerSum = 0;
- WebRtc_Word16 zeros, frac, log2;
- WebRtc_Word32 energy;
-
- int shfts = 0, shfts2;
-
- energy = WebRtcSpl_Energy(vector, vector_length, &shfts);
-
- if (energy > 0)
- {
-
- shfts2 = 16 - WebRtcSpl_NormW32(energy);
- shfts += shfts2;
- // "shfts" is the total number of right shifts that has been done to enerSum.
- enerSum = (WebRtc_Word16)WEBRTC_SPL_SHIFT_W32(energy, -shfts2);
-
- // Find:
- // 160*log10(enerSum*2^shfts) = 160*log10(2)*log2(enerSum*2^shfts) =
- // 160*log10(2)*(log2(enerSum) + log2(2^shfts)) =
- // 160*log10(2)*(log2(enerSum) + shfts)
-
- zeros = WebRtcSpl_NormU32(enerSum);
- frac = (WebRtc_Word16)(((WebRtc_UWord32)((WebRtc_Word32)(enerSum) << zeros)
- & 0x7FFFFFFF) >> 21);
- log2 = (WebRtc_Word16)(((31 - zeros) << 10) + frac);
-
- *enerlogval = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(kLogConst, log2, 19)
- + (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(shfts, kLogConst, 9);
-
- if (*enerlogval < 0)
- {
- *enerlogval = 0;
- }
- } else
- {
- *enerlogval = 0;
- shfts = -15;
- enerSum = 0;
- }
-
- *enerlogval += offset;
-
- // Total power in frame
- if (*power <= MIN_ENERGY)
- {
- if (shfts > 0)
- {
- *power += MIN_ENERGY + 1;
- } else if (WEBRTC_SPL_SHIFT_W16(enerSum, shfts) > MIN_ENERGY)
- {
- *power += MIN_ENERGY + 1;
- } else
- {
- *power += WEBRTC_SPL_SHIFT_W16(enerSum, shfts);
- }
- }
-}
diff --git a/src/common_audio/vad/main/source/vad_filterbank.h b/src/common_audio/vad/main/source/vad_filterbank.h
deleted file mode 100644
index a5507ead65..0000000000
--- a/src/common_audio/vad/main/source/vad_filterbank.h
+++ /dev/null
@@ -1,143 +0,0 @@
-/*
- * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-
-/*
- * This header file includes the description of the internal VAD call
- * WebRtcVad_GaussianProbability.
- */
-
-#ifndef WEBRTC_VAD_FILTERBANK_H_
-#define WEBRTC_VAD_FILTERBANK_H_
-
-#include "vad_core.h"
-
-/****************************************************************************
- * WebRtcVad_HpOutput(...)
- *
- * This function removes DC from the lowest frequency band
- *
- * Input:
- * - in_vector : Samples in the frequency interval 0 - 250 Hz
- * - in_vector_length : Length of input and output vector
- * - filter_state : Current state of the filter
- *
- * Output:
- * - out_vector : Samples in the frequency interval 80 - 250 Hz
- * - filter_state : Updated state of the filter
- *
- */
-void WebRtcVad_HpOutput(WebRtc_Word16* in_vector,
- WebRtc_Word16 in_vector_length,
- WebRtc_Word16* out_vector,
- WebRtc_Word16* filter_state);
-
-/****************************************************************************
- * WebRtcVad_Allpass(...)
- *
- * This function is used when before splitting a speech file into
- * different frequency bands
- *
- * Note! Do NOT let the arrays in_vector and out_vector correspond to the same address.
- *
- * Input:
- * - in_vector : (Q0)
- * - filter_coefficients : (Q15)
- * - vector_length : Length of input and output vector
- * - filter_state : Current state of the filter (Q(-1))
- *
- * Output:
- * - out_vector : Output speech signal (Q(-1))
- * - filter_state : Updated state of the filter (Q(-1))
- *
- */
-void WebRtcVad_Allpass(WebRtc_Word16* in_vector,
- WebRtc_Word16* outw16,
- WebRtc_Word16 filter_coefficients,
- int vector_length,
- WebRtc_Word16* filter_state);
-
-/****************************************************************************
- * WebRtcVad_SplitFilter(...)
- *
- * This function is used when before splitting a speech file into
- * different frequency bands
- *
- * Input:
- * - in_vector : Input signal to be split into two frequency bands.
- * - upper_state : Current state of the upper filter
- * - lower_state : Current state of the lower filter
- * - in_vector_length : Length of input vector
- *
- * Output:
- * - out_vector_hp : Upper half of the spectrum
- * - out_vector_lp : Lower half of the spectrum
- * - upper_state : Updated state of the upper filter
- * - lower_state : Updated state of the lower filter
- *
- */
-void WebRtcVad_SplitFilter(WebRtc_Word16* in_vector,
- WebRtc_Word16* out_vector_hp,
- WebRtc_Word16* out_vector_lp,
- WebRtc_Word16* upper_state,
- WebRtc_Word16* lower_state,
- int in_vector_length);
-
-/****************************************************************************
- * WebRtcVad_get_features(...)
- *
- * This function is used to get the logarithm of the power of each of the
- * 6 frequency bands used by the VAD:
- * 80 Hz - 250 Hz
- * 250 Hz - 500 Hz
- * 500 Hz - 1000 Hz
- * 1000 Hz - 2000 Hz
- * 2000 Hz - 3000 Hz
- * 3000 Hz - 4000 Hz
- *
- * Input:
- * - inst : Pointer to VAD instance
- * - in_vector : Input speech signal
- * - frame_size : Frame size, in number of samples
- *
- * Output:
- * - out_vector : 10*log10(power in each freq. band), Q4
- *
- * Return: total power in the signal (NOTE! This value is not exact since it
- * is only used in a comparison.
- */
-WebRtc_Word16 WebRtcVad_get_features(VadInstT* inst,
- WebRtc_Word16* in_vector,
- int frame_size,
- WebRtc_Word16* out_vector);
-
-/****************************************************************************
- * WebRtcVad_LogOfEnergy(...)
- *
- * This function is used to get the logarithm of the power of one frequency band.
- *
- * Input:
- * - vector : Input speech samples for one frequency band
- * - offset : Offset value for the current frequency band
- * - vector_length : Length of input vector
- *
- * Output:
- * - enerlogval : 10*log10(energy);
- * - power : Update total power in speech frame. NOTE! This value
- * is not exact since it is only used in a comparison.
- *
- */
-void WebRtcVad_LogOfEnergy(WebRtc_Word16* vector,
- WebRtc_Word16* enerlogval,
- WebRtc_Word16* power,
- WebRtc_Word16 offset,
- int vector_length);
-
-#endif // WEBRTC_VAD_FILTERBANK_H_
diff --git a/src/common_audio/vad/main/source/vad_gmm.c b/src/common_audio/vad/main/source/vad_gmm.c
deleted file mode 100644
index 23d12fb335..0000000000
--- a/src/common_audio/vad/main/source/vad_gmm.c
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-
-/*
- * This file includes the implementation of the internal VAD call
- * WebRtcVad_GaussianProbability. For function description, see vad_gmm.h.
- */
-
-#include "vad_gmm.h"
-#include "signal_processing_library.h"
-#include "vad_const.h"
-
-WebRtc_Word32 WebRtcVad_GaussianProbability(WebRtc_Word16 in_sample,
- WebRtc_Word16 mean,
- WebRtc_Word16 std,
- WebRtc_Word16 *delta)
-{
- WebRtc_Word16 tmp16, tmpDiv, tmpDiv2, expVal, tmp16_1, tmp16_2;
- WebRtc_Word32 tmp32, y32;
-
- // Calculate tmpDiv=1/std, in Q10
- tmp32 = (WebRtc_Word32)WEBRTC_SPL_RSHIFT_W16(std,1) + (WebRtc_Word32)131072; // 1 in Q17
- tmpDiv = (WebRtc_Word16)WebRtcSpl_DivW32W16(tmp32, std); // Q17/Q7 = Q10
-
- // Calculate tmpDiv2=1/std^2, in Q14
- tmp16 = WEBRTC_SPL_RSHIFT_W16(tmpDiv, 2); // From Q10 to Q8
- tmpDiv2 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(tmp16, tmp16, 2); // (Q8 * Q8)>>2 = Q14
-
- tmp16 = WEBRTC_SPL_LSHIFT_W16(in_sample, 3); // Q7
- tmp16 = tmp16 - mean; // Q7 - Q7 = Q7
-
- // To be used later, when updating noise/speech model
- // delta = (x-m)/std^2, in Q11
- *delta = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(tmpDiv2, tmp16, 10); //(Q14*Q7)>>10 = Q11
-
- // Calculate tmp32=(x-m)^2/(2*std^2), in Q10
- tmp32 = (WebRtc_Word32)WEBRTC_SPL_MUL_16_16_RSFT(*delta, tmp16, 9); // One shift for /2
-
- // Calculate expVal ~= exp(-(x-m)^2/(2*std^2)) ~= exp2(-log2(exp(1))*tmp32)
- if (tmp32 < kCompVar)
- {
- // Calculate tmp16 = log2(exp(1))*tmp32 , in Q10
- tmp16 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT((WebRtc_Word16)tmp32,
- kLog10Const, 12);
- tmp16 = -tmp16;
- tmp16_2 = (WebRtc_Word16)(0x0400 | (tmp16 & 0x03FF));
- tmp16_1 = (WebRtc_Word16)(tmp16 ^ 0xFFFF);
- tmp16 = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W16(tmp16_1, 10);
- tmp16 += 1;
- // Calculate expVal=log2(-tmp32), in Q10
- expVal = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32((WebRtc_Word32)tmp16_2, tmp16);
-
- } else
- {
- expVal = 0;
- }
-
- // Calculate y32=(1/std)*exp(-(x-m)^2/(2*std^2)), in Q20
- y32 = WEBRTC_SPL_MUL_16_16(tmpDiv, expVal); // Q10 * Q10 = Q20
-
- return y32; // Q20
-}
diff --git a/src/common_audio/vad/main/source/vad_gmm.h b/src/common_audio/vad/main/source/vad_gmm.h
deleted file mode 100644
index e0747fb7e5..0000000000
--- a/src/common_audio/vad/main/source/vad_gmm.h
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-
-/*
- * This header file includes the description of the internal VAD call
- * WebRtcVad_GaussianProbability.
- */
-
-#ifndef WEBRTC_VAD_GMM_H_
-#define WEBRTC_VAD_GMM_H_
-
-#include "typedefs.h"
-
-/****************************************************************************
- * WebRtcVad_GaussianProbability(...)
- *
- * This function calculates the probability for the value 'in_sample', given that in_sample
- * comes from a normal distribution with mean 'mean' and standard deviation 'std'.
- *
- * Input:
- * - in_sample : Input sample in Q4
- * - mean : mean value in the statistical model, Q7
- * - std : standard deviation, Q7
- *
- * Output:
- *
- * - delta : Value used when updating the model, Q11
- *
- * Return:
- * - out : out = 1/std * exp(-(x-m)^2/(2*std^2));
- * Probability for x.
- *
- */
-WebRtc_Word32 WebRtcVad_GaussianProbability(WebRtc_Word16 in_sample,
- WebRtc_Word16 mean,
- WebRtc_Word16 std,
- WebRtc_Word16 *delta);
-
-#endif // WEBRTC_VAD_GMM_H_
diff --git a/src/common_audio/vad/main/source/vad_sp.c b/src/common_audio/vad/main/source/vad_sp.c
deleted file mode 100644
index f347ab5904..0000000000
--- a/src/common_audio/vad/main/source/vad_sp.c
+++ /dev/null
@@ -1,231 +0,0 @@
-/*
- * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-
-/*
- * This file includes the implementation of the VAD internal calls for Downsampling and
- * FindMinimum.
- * For function call descriptions; See vad_sp.h.
- */
-
-#include "vad_sp.h"
-#include "vad_defines.h"
-#include "vad_const.h"
-#include "signal_processing_library.h"
-
-// Downsampling filter based on the splitting filter and the allpass functions
-// in vad_filterbank.c
-void WebRtcVad_Downsampling(WebRtc_Word16* signal_in,
- WebRtc_Word16* signal_out,
- WebRtc_Word32* filter_state,
- int inlen)
-{
- WebRtc_Word16 tmp16_1, tmp16_2;
- WebRtc_Word32 tmp32_1, tmp32_2;
- int n, halflen;
-
- // Downsampling by 2 and get two branches
- halflen = WEBRTC_SPL_RSHIFT_W16(inlen, 1);
-
- tmp32_1 = filter_state[0];
- tmp32_2 = filter_state[1];
-
- // Filter coefficients in Q13, filter state in Q0
- for (n = 0; n < halflen; n++)
- {
- // All-pass filtering upper branch
- tmp16_1 = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(tmp32_1, 1)
- + (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT((kAllPassCoefsQ13[0]),
- *signal_in, 14);
- *signal_out = tmp16_1;
- tmp32_1 = (WebRtc_Word32)(*signal_in++)
- - (WebRtc_Word32)WEBRTC_SPL_MUL_16_16_RSFT((kAllPassCoefsQ13[0]), tmp16_1, 12);
-
- // All-pass filtering lower branch
- tmp16_2 = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(tmp32_2, 1)
- + (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT((kAllPassCoefsQ13[1]),
- *signal_in, 14);
- *signal_out++ += tmp16_2;
- tmp32_2 = (WebRtc_Word32)(*signal_in++)
- - (WebRtc_Word32)WEBRTC_SPL_MUL_16_16_RSFT((kAllPassCoefsQ13[1]), tmp16_2, 12);
- }
- filter_state[0] = tmp32_1;
- filter_state[1] = tmp32_2;
-}
-
-WebRtc_Word16 WebRtcVad_FindMinimum(VadInstT* inst,
- WebRtc_Word16 x,
- int n)
-{
- int i, j, k, II = -1, offset;
- WebRtc_Word16 meanV, alpha;
- WebRtc_Word32 tmp32, tmp32_1;
- WebRtc_Word16 *valptr, *idxptr, *p1, *p2, *p3;
-
- // Offset to beginning of the 16 minimum values in memory
- offset = WEBRTC_SPL_LSHIFT_W16(n, 4);
-
- // Pointer to memory for the 16 minimum values and the age of each value
- idxptr = &inst->index_vector[offset];
- valptr = &inst->low_value_vector[offset];
-
- // Each value in low_value_vector is getting 1 loop older.
- // Update age of each value in indexVal, and remove old values.
- for (i = 0; i < 16; i++)
- {
- p3 = idxptr + i;
- if (*p3 != 100)
- {
- *p3 += 1;
- } else
- {
- p1 = valptr + i + 1;
- p2 = p3 + 1;
- for (j = i; j < 16; j++)
- {
- *(valptr + j) = *p1++;
- *(idxptr + j) = *p2++;
- }
- *(idxptr + 15) = 101;
- *(valptr + 15) = 10000;
- }
- }
-
- // Check if x smaller than any of the values in low_value_vector.
- // If so, find position.
- if (x < *(valptr + 7))
- {
- if (x < *(valptr + 3))
- {
- if (x < *(valptr + 1))
- {
- if (x < *valptr)
- {
- II = 0;
- } else
- {
- II = 1;
- }
- } else if (x < *(valptr + 2))
- {
- II = 2;
- } else
- {
- II = 3;
- }
- } else if (x < *(valptr + 5))
- {
- if (x < *(valptr + 4))
- {
- II = 4;
- } else
- {
- II = 5;
- }
- } else if (x < *(valptr + 6))
- {
- II = 6;
- } else
- {
- II = 7;
- }
- } else if (x < *(valptr + 15))
- {
- if (x < *(valptr + 11))
- {
- if (x < *(valptr + 9))
- {
- if (x < *(valptr + 8))
- {
- II = 8;
- } else
- {
- II = 9;
- }
- } else if (x < *(valptr + 10))
- {
- II = 10;
- } else
- {
- II = 11;
- }
- } else if (x < *(valptr + 13))
- {
- if (x < *(valptr + 12))
- {
- II = 12;
- } else
- {
- II = 13;
- }
- } else if (x < *(valptr + 14))
- {
- II = 14;
- } else
- {
- II = 15;
- }
- }
-
- // Put new min value on right position and shift bigger values up
- if (II > -1)
- {
- for (i = 15; i > II; i--)
- {
- k = i - 1;
- *(valptr + i) = *(valptr + k);
- *(idxptr + i) = *(idxptr + k);
- }
- *(valptr + II) = x;
- *(idxptr + II) = 1;
- }
-
- meanV = 0;
- if ((inst->frame_counter) > 4)
- {
- j = 5;
- } else
- {
- j = inst->frame_counter;
- }
-
- if (j > 2)
- {
- meanV = *(valptr + 2);
- } else if (j > 0)
- {
- meanV = *valptr;
- } else
- {
- meanV = 1600;
- }
-
- if (inst->frame_counter > 0)
- {
- if (meanV < inst->mean_value[n])
- {
- alpha = (WebRtc_Word16)ALPHA1; // 0.2 in Q15
- } else
- {
- alpha = (WebRtc_Word16)ALPHA2; // 0.99 in Q15
- }
- } else
- {
- alpha = 0;
- }
-
- tmp32 = WEBRTC_SPL_MUL_16_16((alpha+1), inst->mean_value[n]);
- tmp32_1 = WEBRTC_SPL_MUL_16_16(WEBRTC_SPL_WORD16_MAX - alpha, meanV);
- tmp32 += tmp32_1;
- tmp32 += 16384;
- inst->mean_value[n] = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(tmp32, 15);
-
- return inst->mean_value[n];
-}
diff --git a/src/common_audio/vad/main/source/vad_sp.h b/src/common_audio/vad/main/source/vad_sp.h
deleted file mode 100644
index ae15c11ad6..0000000000
--- a/src/common_audio/vad/main/source/vad_sp.h
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-
-/*
- * This header file includes the VAD internal calls for Downsampling and FindMinimum.
- * Specific function calls are given below.
- */
-
-#ifndef WEBRTC_VAD_SP_H_
-#define WEBRTC_VAD_SP_H_
-
-#include "vad_core.h"
-
-/****************************************************************************
- * WebRtcVad_Downsampling(...)
- *
- * Downsamples the signal a factor 2, eg. 32->16 or 16->8
- *
- * Input:
- * - signal_in : Input signal
- * - in_length : Length of input signal in samples
- *
- * Input & Output:
- * - filter_state : Filter state for first all-pass filters
- *
- * Output:
- * - signal_out : Downsampled signal (of length len/2)
- */
-void WebRtcVad_Downsampling(WebRtc_Word16* signal_in,
- WebRtc_Word16* signal_out,
- WebRtc_Word32* filter_state,
- int in_length);
-
-/****************************************************************************
- * WebRtcVad_FindMinimum(...)
- *
- * Find the five lowest values of x in 100 frames long window. Return a mean
- * value of these five values.
- *
- * Input:
- * - feature_value : Feature value
- * - channel : Channel number
- *
- * Input & Output:
- * - inst : State information
- *
- * Output:
- * return value : Weighted minimum value for a moving window.
- */
-WebRtc_Word16 WebRtcVad_FindMinimum(VadInstT* inst, WebRtc_Word16 feature_value, int channel);
-
-#endif // WEBRTC_VAD_SP_H_
diff --git a/src/common_audio/vad/main/source/webrtc_vad.c b/src/common_audio/vad/main/source/webrtc_vad.c
deleted file mode 100644
index dcfbda1128..0000000000
--- a/src/common_audio/vad/main/source/webrtc_vad.c
+++ /dev/null
@@ -1,197 +0,0 @@
-/*
- * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-
-/*
- * This file includes the VAD API calls. For a specific function call description,
- * see webrtc_vad.h
- */
-
-#include <stdlib.h>
-#include <string.h>
-
-#include "webrtc_vad.h"
-#include "vad_core.h"
-
-static const int kInitCheck = 42;
-
-WebRtc_Word16 WebRtcVad_get_version(char *version, size_t size_bytes)
-{
- const char my_version[] = "VAD 1.2.0";
-
- if (version == NULL)
- {
- return -1;
- }
-
- if (size_bytes < sizeof(my_version))
- {
- return -1;
- }
-
- memcpy(version, my_version, sizeof(my_version));
- return 0;
-}
-
-WebRtc_Word16 WebRtcVad_AssignSize(int *size_in_bytes)
-{
- *size_in_bytes = sizeof(VadInstT) * 2 / sizeof(WebRtc_Word16);
- return 0;
-}
-
-WebRtc_Word16 WebRtcVad_Assign(VadInst **vad_inst, void *vad_inst_addr)
-{
-
- if (vad_inst == NULL)
- {
- return -1;
- }
-
- if (vad_inst_addr != NULL)
- {
- *vad_inst = (VadInst*)vad_inst_addr;
- return 0;
- } else
- {
- return -1;
- }
-}
-
-WebRtc_Word16 WebRtcVad_Create(VadInst **vad_inst)
-{
-
- VadInstT *vad_ptr = NULL;
-
- if (vad_inst == NULL)
- {
- return -1;
- }
-
- *vad_inst = NULL;
-
- vad_ptr = (VadInstT *)malloc(sizeof(VadInstT));
- *vad_inst = (VadInst *)vad_ptr;
-
- if (vad_ptr == NULL)
- {
- return -1;
- }
-
- vad_ptr->init_flag = 0;
-
- return 0;
-}
-
-WebRtc_Word16 WebRtcVad_Free(VadInst *vad_inst)
-{
-
- if (vad_inst == NULL)
- {
- return -1;
- }
-
- free(vad_inst);
- return 0;
-}
-
-WebRtc_Word16 WebRtcVad_Init(VadInst *vad_inst)
-{
- short mode = 0; // Default high quality
-
- if (vad_inst == NULL)
- {
- return -1;
- }
-
- return WebRtcVad_InitCore((VadInstT*)vad_inst, mode);
-}
-
-WebRtc_Word16 WebRtcVad_set_mode(VadInst *vad_inst, WebRtc_Word16 mode)
-{
- VadInstT* vad_ptr;
-
- if (vad_inst == NULL)
- {
- return -1;
- }
-
- vad_ptr = (VadInstT*)vad_inst;
- if (vad_ptr->init_flag != kInitCheck)
- {
- return -1;
- }
-
- return WebRtcVad_set_mode_core((VadInstT*)vad_inst, mode);
-}
-
-WebRtc_Word16 WebRtcVad_Process(VadInst *vad_inst,
- WebRtc_Word16 fs,
- WebRtc_Word16 *speech_frame,
- WebRtc_Word16 frame_length)
-{
- WebRtc_Word16 vad;
- VadInstT* vad_ptr;
-
- if (vad_inst == NULL)
- {
- return -1;
- }
-
- vad_ptr = (VadInstT*)vad_inst;
- if (vad_ptr->init_flag != kInitCheck)
- {
- return -1;
- }
-
- if (speech_frame == NULL)
- {
- return -1;
- }
-
- if (fs == 32000)
- {
- if ((frame_length != 320) && (frame_length != 640) && (frame_length != 960))
- {
- return -1;
- }
- vad = WebRtcVad_CalcVad32khz((VadInstT*)vad_inst, speech_frame, frame_length);
-
- } else if (fs == 16000)
- {
- if ((frame_length != 160) && (frame_length != 320) && (frame_length != 480))
- {
- return -1;
- }
- vad = WebRtcVad_CalcVad16khz((VadInstT*)vad_inst, speech_frame, frame_length);
-
- } else if (fs == 8000)
- {
- if ((frame_length != 80) && (frame_length != 160) && (frame_length != 240))
- {
- return -1;
- }
- vad = WebRtcVad_CalcVad8khz((VadInstT*)vad_inst, speech_frame, frame_length);
-
- } else
- {
- return -1; // Not a supported sampling frequency
- }
-
- if (vad > 0)
- {
- return 1;
- } else if (vad == 0)
- {
- return 0;
- } else
- {
- return -1;
- }
-}