diff options
Diffstat (limited to 'src/common_audio/vad/main/source')
-rw-r--r-- | src/common_audio/vad/main/source/Android.mk | 64 | ||||
-rw-r--r-- | src/common_audio/vad/main/source/vad.gyp | 51 | ||||
-rw-r--r-- | src/common_audio/vad/main/source/vad_const.c | 80 | ||||
-rw-r--r-- | src/common_audio/vad/main/source/vad_const.h | 59 | ||||
-rw-r--r-- | src/common_audio/vad/main/source/vad_core.c | 685 | ||||
-rw-r--r-- | src/common_audio/vad/main/source/vad_core.h | 132 | ||||
-rw-r--r-- | src/common_audio/vad/main/source/vad_defines.h | 95 | ||||
-rw-r--r-- | src/common_audio/vad/main/source/vad_filterbank.c | 267 | ||||
-rw-r--r-- | src/common_audio/vad/main/source/vad_filterbank.h | 143 | ||||
-rw-r--r-- | src/common_audio/vad/main/source/vad_gmm.c | 70 | ||||
-rw-r--r-- | src/common_audio/vad/main/source/vad_gmm.h | 47 | ||||
-rw-r--r-- | src/common_audio/vad/main/source/vad_sp.c | 231 | ||||
-rw-r--r-- | src/common_audio/vad/main/source/vad_sp.h | 60 | ||||
-rw-r--r-- | src/common_audio/vad/main/source/webrtc_vad.c | 197 |
14 files changed, 0 insertions, 2181 deletions
diff --git a/src/common_audio/vad/main/source/Android.mk b/src/common_audio/vad/main/source/Android.mk deleted file mode 100644 index f52df935d1..0000000000 --- a/src/common_audio/vad/main/source/Android.mk +++ /dev/null @@ -1,64 +0,0 @@ -# Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. -# -# Use of this source code is governed by a BSD-style license -# that can be found in the LICENSE file in the root of the source -# tree. An additional intellectual property rights grant can be found -# in the file PATENTS. All contributing project authors may -# be found in the AUTHORS file in the root of the source tree. - -LOCAL_PATH := $(call my-dir) - -include $(CLEAR_VARS) - -LOCAL_ARM_MODE := arm -LOCAL_MODULE_CLASS := STATIC_LIBRARIES -LOCAL_MODULE := libwebrtc_vad -LOCAL_MODULE_TAGS := optional -LOCAL_GENERATED_SOURCES := -LOCAL_SRC_FILES := webrtc_vad.c \ - vad_const.c \ - vad_core.c \ - vad_filterbank.c \ - vad_gmm.c \ - vad_sp.c - -# Flags passed to both C and C++ files. -MY_CFLAGS := -MY_CFLAGS_C := -MY_DEFS := '-DNO_TCMALLOC' \ - '-DNO_HEAPCHECKER' \ - '-DWEBRTC_TARGET_PC' \ - '-DWEBRTC_LINUX' -ifeq ($(TARGET_ARCH),arm) -MY_DEFS += \ - '-DWEBRTC_ANDROID' \ - '-DANDROID' -endif -LOCAL_CFLAGS := $(MY_CFLAGS_C) $(MY_CFLAGS) $(MY_DEFS) - -# Include paths placed before CFLAGS/CPPFLAGS -LOCAL_C_INCLUDES := $(LOCAL_PATH)/../../../.. \ - $(LOCAL_PATH)/../interface \ - $(LOCAL_PATH)/../../../signal_processing_library/main/interface - -# Flags passed to only C++ (and not C) files. -LOCAL_CPPFLAGS := - -LOCAL_LDFLAGS := - -LOCAL_STATIC_LIBRARIES := - -LOCAL_SHARED_LIBRARIES := libdl \ - libstlport -LOCAL_ADDITIONAL_DEPENDENCIES := - -ifeq ($(TARGET_OS)-$(TARGET_SIMULATOR),linux-true) -LOCAL_LDLIBS += -ldl -lpthread -endif - -ifneq ($(TARGET_SIMULATOR),true) -LOCAL_SHARED_LIBRARIES += libdl -endif - -include external/stlport/libstlport.mk -include $(BUILD_STATIC_LIBRARY) diff --git a/src/common_audio/vad/main/source/vad.gyp b/src/common_audio/vad/main/source/vad.gyp deleted file mode 100644 index 754b684d5b..0000000000 --- a/src/common_audio/vad/main/source/vad.gyp +++ /dev/null @@ -1,51 +0,0 @@ -# Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. -# -# Use of this source code is governed by a BSD-style license -# that can be found in the LICENSE file in the root of the source -# tree. An additional intellectual property rights grant can be found -# in the file PATENTS. All contributing project authors may -# be found in the AUTHORS file in the root of the source tree. - -{ - 'includes': [ - '../../../../common_settings.gypi', # Common settings - ], - 'targets': [ - { - 'target_name': 'vad', - 'type': '<(library)', - 'dependencies': [ - '../../../signal_processing_library/main/source/spl.gyp:spl', - ], - 'include_dirs': [ - '../interface', - ], - 'direct_dependent_settings': { - 'include_dirs': [ - '../interface', - ], - }, - 'sources': [ - '../interface/webrtc_vad.h', - 'webrtc_vad.c', - 'vad_const.c', - 'vad_const.h', - 'vad_defines.h', - 'vad_core.c', - 'vad_core.h', - 'vad_filterbank.c', - 'vad_filterbank.h', - 'vad_gmm.c', - 'vad_gmm.h', - 'vad_sp.c', - 'vad_sp.h', - ], - }, - ], -} - -# Local Variables: -# tab-width:2 -# indent-tabs-mode:nil -# End: -# vim: set expandtab tabstop=2 shiftwidth=2: diff --git a/src/common_audio/vad/main/source/vad_const.c b/src/common_audio/vad/main/source/vad_const.c deleted file mode 100644 index 47b6a4b8ca..0000000000 --- a/src/common_audio/vad/main/source/vad_const.c +++ /dev/null @@ -1,80 +0,0 @@ -/* - * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -/* - * This file includes the constant values used internally in VAD. - */ - -#include "vad_const.h" - -// Spectrum Weighting -const WebRtc_Word16 kSpectrumWeight[6] = {6, 8, 10, 12, 14, 16}; - -const WebRtc_Word16 kCompVar = 22005; - -// Constant 160*log10(2) in Q9 -const WebRtc_Word16 kLogConst = 24660; - -// Constant log2(exp(1)) in Q12 -const WebRtc_Word16 kLog10Const = 5909; - -// Q15 -const WebRtc_Word16 kNoiseUpdateConst = 655; -const WebRtc_Word16 kSpeechUpdateConst = 6554; - -// Q8 -const WebRtc_Word16 kBackEta = 154; - -// Coefficients used by WebRtcVad_HpOutput, Q14 -const WebRtc_Word16 kHpZeroCoefs[3] = {6631, -13262, 6631}; -const WebRtc_Word16 kHpPoleCoefs[3] = {16384, -7756, 5620}; - -// Allpass filter coefficients, upper and lower, in Q15 -// Upper: 0.64, Lower: 0.17 -const WebRtc_Word16 kAllPassCoefsQ15[2] = {20972, 5571}; -const WebRtc_Word16 kAllPassCoefsQ13[2] = {5243, 1392}; // Q13 - -// Minimum difference between the two models, Q5 -const WebRtc_Word16 kMinimumDifference[6] = {544, 544, 576, 576, 576, 576}; - -// Upper limit of mean value for speech model, Q7 -const WebRtc_Word16 kMaximumSpeech[6] = {11392, 11392, 11520, 11520, 11520, 11520}; - -// Minimum value for mean value -const WebRtc_Word16 kMinimumMean[2] = {640, 768}; - -// Upper limit of mean value for noise model, Q7 -const WebRtc_Word16 kMaximumNoise[6] = {9216, 9088, 8960, 8832, 8704, 8576}; - -// Adjustment for division with two in WebRtcVad_SplitFilter -const WebRtc_Word16 kOffsetVector[6] = {368, 368, 272, 176, 176, 176}; - -// Start values for the Gaussian models, Q7 -// Weights for the two Gaussians for the six channels (noise) -const WebRtc_Word16 kNoiseDataWeights[12] = {34, 62, 72, 66, 53, 25, 94, 66, 56, 62, 75, 103}; - -// Weights for the two Gaussians for the six channels (speech) -const WebRtc_Word16 kSpeechDataWeights[12] = {48, 82, 45, 87, 50, 47, 80, 46, 83, 41, 78, 81}; - -// Means for the two Gaussians for the six channels (noise) -const WebRtc_Word16 kNoiseDataMeans[12] = {6738, 4892, 7065, 6715, 6771, 3369, 7646, 3863, - 7820, 7266, 5020, 4362}; - -// Means for the two Gaussians for the six channels (speech) -const WebRtc_Word16 kSpeechDataMeans[12] = {8306, 10085, 10078, 11823, 11843, 6309, 9473, - 9571, 10879, 7581, 8180, 7483}; - -// Stds for the two Gaussians for the six channels (noise) -const WebRtc_Word16 kNoiseDataStds[12] = {378, 1064, 493, 582, 688, 593, 474, 697, 475, 688, - 421, 455}; - -// Stds for the two Gaussians for the six channels (speech) -const WebRtc_Word16 kSpeechDataStds[12] = {555, 505, 567, 524, 585, 1231, 509, 828, 492, 1540, - 1079, 850}; diff --git a/src/common_audio/vad/main/source/vad_const.h b/src/common_audio/vad/main/source/vad_const.h deleted file mode 100644 index 89804379be..0000000000 --- a/src/common_audio/vad/main/source/vad_const.h +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -/* - * This header file includes the declarations of the internally used constants. - */ - -#ifndef WEBRTC_VAD_CONST_H_ -#define WEBRTC_VAD_CONST_H_ - -#include "typedefs.h" - -// TODO(ajm): give these internal-linkage by moving to the appropriate file -// where possible, and otherwise tag with WebRtcVad_. - -// Spectrum Weighting -extern const WebRtc_Word16 kSpectrumWeight[]; -extern const WebRtc_Word16 kCompVar; -// Logarithm constant -extern const WebRtc_Word16 kLogConst; -extern const WebRtc_Word16 kLog10Const; -// Q15 -extern const WebRtc_Word16 kNoiseUpdateConst; -extern const WebRtc_Word16 kSpeechUpdateConst; -// Q8 -extern const WebRtc_Word16 kBackEta; -// Coefficients used by WebRtcVad_HpOutput, Q14 -extern const WebRtc_Word16 kHpZeroCoefs[]; -extern const WebRtc_Word16 kHpPoleCoefs[]; -// Allpass filter coefficients, upper and lower, in Q15 resp. Q13 -extern const WebRtc_Word16 kAllPassCoefsQ15[]; -extern const WebRtc_Word16 kAllPassCoefsQ13[]; -// Minimum difference between the two models, Q5 -extern const WebRtc_Word16 kMinimumDifference[]; -// Maximum value when updating the speech model, Q7 -extern const WebRtc_Word16 kMaximumSpeech[]; -// Minimum value for mean value -extern const WebRtc_Word16 kMinimumMean[]; -// Upper limit of mean value for noise model, Q7 -extern const WebRtc_Word16 kMaximumNoise[]; -// Adjustment for division with two in WebRtcVad_SplitFilter -extern const WebRtc_Word16 kOffsetVector[]; -// Start values for the Gaussian models, Q7 -extern const WebRtc_Word16 kNoiseDataWeights[]; -extern const WebRtc_Word16 kSpeechDataWeights[]; -extern const WebRtc_Word16 kNoiseDataMeans[]; -extern const WebRtc_Word16 kSpeechDataMeans[]; -extern const WebRtc_Word16 kNoiseDataStds[]; -extern const WebRtc_Word16 kSpeechDataStds[]; - -#endif // WEBRTC_VAD_CONST_H_ diff --git a/src/common_audio/vad/main/source/vad_core.c b/src/common_audio/vad/main/source/vad_core.c deleted file mode 100644 index e8829993d5..0000000000 --- a/src/common_audio/vad/main/source/vad_core.c +++ /dev/null @@ -1,685 +0,0 @@ -/* - * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -/* - * This file includes the implementation of the core functionality in VAD. - * For function description, see vad_core.h. - */ - -#include "vad_core.h" -#include "vad_const.h" -#include "vad_defines.h" -#include "vad_filterbank.h" -#include "vad_gmm.h" -#include "vad_sp.h" -#include "signal_processing_library.h" - -static const int kInitCheck = 42; - -// Initialize VAD -int WebRtcVad_InitCore(VadInstT *inst, short mode) -{ - int i; - - // Initialization of struct - inst->vad = 1; - inst->frame_counter = 0; - inst->over_hang = 0; - inst->num_of_speech = 0; - - // Initialization of downsampling filter state - inst->downsampling_filter_states[0] = 0; - inst->downsampling_filter_states[1] = 0; - inst->downsampling_filter_states[2] = 0; - inst->downsampling_filter_states[3] = 0; - - // Read initial PDF parameters - for (i = 0; i < NUM_TABLE_VALUES; i++) - { - inst->noise_means[i] = kNoiseDataMeans[i]; - inst->speech_means[i] = kSpeechDataMeans[i]; - inst->noise_stds[i] = kNoiseDataStds[i]; - inst->speech_stds[i] = kSpeechDataStds[i]; - } - - // Index and Minimum value vectors are initialized - for (i = 0; i < 16 * NUM_CHANNELS; i++) - { - inst->low_value_vector[i] = 10000; - inst->index_vector[i] = 0; - } - - for (i = 0; i < 5; i++) - { - inst->upper_state[i] = 0; - inst->lower_state[i] = 0; - } - - for (i = 0; i < 4; i++) - { - inst->hp_filter_state[i] = 0; - } - - // Init mean value memory, for FindMin function - inst->mean_value[0] = 1600; - inst->mean_value[1] = 1600; - inst->mean_value[2] = 1600; - inst->mean_value[3] = 1600; - inst->mean_value[4] = 1600; - inst->mean_value[5] = 1600; - - if (mode == 0) - { - // Quality mode - inst->over_hang_max_1[0] = OHMAX1_10MS_Q; // Overhang short speech burst - inst->over_hang_max_1[1] = OHMAX1_20MS_Q; // Overhang short speech burst - inst->over_hang_max_1[2] = OHMAX1_30MS_Q; // Overhang short speech burst - inst->over_hang_max_2[0] = OHMAX2_10MS_Q; // Overhang long speech burst - inst->over_hang_max_2[1] = OHMAX2_20MS_Q; // Overhang long speech burst - inst->over_hang_max_2[2] = OHMAX2_30MS_Q; // Overhang long speech burst - - inst->individual[0] = INDIVIDUAL_10MS_Q; - inst->individual[1] = INDIVIDUAL_20MS_Q; - inst->individual[2] = INDIVIDUAL_30MS_Q; - - inst->total[0] = TOTAL_10MS_Q; - inst->total[1] = TOTAL_20MS_Q; - inst->total[2] = TOTAL_30MS_Q; - } else if (mode == 1) - { - // Low bitrate mode - inst->over_hang_max_1[0] = OHMAX1_10MS_LBR; // Overhang short speech burst - inst->over_hang_max_1[1] = OHMAX1_20MS_LBR; // Overhang short speech burst - inst->over_hang_max_1[2] = OHMAX1_30MS_LBR; // Overhang short speech burst - inst->over_hang_max_2[0] = OHMAX2_10MS_LBR; // Overhang long speech burst - inst->over_hang_max_2[1] = OHMAX2_20MS_LBR; // Overhang long speech burst - inst->over_hang_max_2[2] = OHMAX2_30MS_LBR; // Overhang long speech burst - - inst->individual[0] = INDIVIDUAL_10MS_LBR; - inst->individual[1] = INDIVIDUAL_20MS_LBR; - inst->individual[2] = INDIVIDUAL_30MS_LBR; - - inst->total[0] = TOTAL_10MS_LBR; - inst->total[1] = TOTAL_20MS_LBR; - inst->total[2] = TOTAL_30MS_LBR; - } else if (mode == 2) - { - // Aggressive mode - inst->over_hang_max_1[0] = OHMAX1_10MS_AGG; // Overhang short speech burst - inst->over_hang_max_1[1] = OHMAX1_20MS_AGG; // Overhang short speech burst - inst->over_hang_max_1[2] = OHMAX1_30MS_AGG; // Overhang short speech burst - inst->over_hang_max_2[0] = OHMAX2_10MS_AGG; // Overhang long speech burst - inst->over_hang_max_2[1] = OHMAX2_20MS_AGG; // Overhang long speech burst - inst->over_hang_max_2[2] = OHMAX2_30MS_AGG; // Overhang long speech burst - - inst->individual[0] = INDIVIDUAL_10MS_AGG; - inst->individual[1] = INDIVIDUAL_20MS_AGG; - inst->individual[2] = INDIVIDUAL_30MS_AGG; - - inst->total[0] = TOTAL_10MS_AGG; - inst->total[1] = TOTAL_20MS_AGG; - inst->total[2] = TOTAL_30MS_AGG; - } else - { - // Very aggressive mode - inst->over_hang_max_1[0] = OHMAX1_10MS_VAG; // Overhang short speech burst - inst->over_hang_max_1[1] = OHMAX1_20MS_VAG; // Overhang short speech burst - inst->over_hang_max_1[2] = OHMAX1_30MS_VAG; // Overhang short speech burst - inst->over_hang_max_2[0] = OHMAX2_10MS_VAG; // Overhang long speech burst - inst->over_hang_max_2[1] = OHMAX2_20MS_VAG; // Overhang long speech burst - inst->over_hang_max_2[2] = OHMAX2_30MS_VAG; // Overhang long speech burst - - inst->individual[0] = INDIVIDUAL_10MS_VAG; - inst->individual[1] = INDIVIDUAL_20MS_VAG; - inst->individual[2] = INDIVIDUAL_30MS_VAG; - - inst->total[0] = TOTAL_10MS_VAG; - inst->total[1] = TOTAL_20MS_VAG; - inst->total[2] = TOTAL_30MS_VAG; - } - - inst->init_flag = kInitCheck; - - return 0; -} - -// Set aggressiveness mode -int WebRtcVad_set_mode_core(VadInstT *inst, short mode) -{ - - if (mode == 0) - { - // Quality mode - inst->over_hang_max_1[0] = OHMAX1_10MS_Q; // Overhang short speech burst - inst->over_hang_max_1[1] = OHMAX1_20MS_Q; // Overhang short speech burst - inst->over_hang_max_1[2] = OHMAX1_30MS_Q; // Overhang short speech burst - inst->over_hang_max_2[0] = OHMAX2_10MS_Q; // Overhang long speech burst - inst->over_hang_max_2[1] = OHMAX2_20MS_Q; // Overhang long speech burst - inst->over_hang_max_2[2] = OHMAX2_30MS_Q; // Overhang long speech burst - - inst->individual[0] = INDIVIDUAL_10MS_Q; - inst->individual[1] = INDIVIDUAL_20MS_Q; - inst->individual[2] = INDIVIDUAL_30MS_Q; - - inst->total[0] = TOTAL_10MS_Q; - inst->total[1] = TOTAL_20MS_Q; - inst->total[2] = TOTAL_30MS_Q; - } else if (mode == 1) - { - // Low bitrate mode - inst->over_hang_max_1[0] = OHMAX1_10MS_LBR; // Overhang short speech burst - inst->over_hang_max_1[1] = OHMAX1_20MS_LBR; // Overhang short speech burst - inst->over_hang_max_1[2] = OHMAX1_30MS_LBR; // Overhang short speech burst - inst->over_hang_max_2[0] = OHMAX2_10MS_LBR; // Overhang long speech burst - inst->over_hang_max_2[1] = OHMAX2_20MS_LBR; // Overhang long speech burst - inst->over_hang_max_2[2] = OHMAX2_30MS_LBR; // Overhang long speech burst - - inst->individual[0] = INDIVIDUAL_10MS_LBR; - inst->individual[1] = INDIVIDUAL_20MS_LBR; - inst->individual[2] = INDIVIDUAL_30MS_LBR; - - inst->total[0] = TOTAL_10MS_LBR; - inst->total[1] = TOTAL_20MS_LBR; - inst->total[2] = TOTAL_30MS_LBR; - } else if (mode == 2) - { - // Aggressive mode - inst->over_hang_max_1[0] = OHMAX1_10MS_AGG; // Overhang short speech burst - inst->over_hang_max_1[1] = OHMAX1_20MS_AGG; // Overhang short speech burst - inst->over_hang_max_1[2] = OHMAX1_30MS_AGG; // Overhang short speech burst - inst->over_hang_max_2[0] = OHMAX2_10MS_AGG; // Overhang long speech burst - inst->over_hang_max_2[1] = OHMAX2_20MS_AGG; // Overhang long speech burst - inst->over_hang_max_2[2] = OHMAX2_30MS_AGG; // Overhang long speech burst - - inst->individual[0] = INDIVIDUAL_10MS_AGG; - inst->individual[1] = INDIVIDUAL_20MS_AGG; - inst->individual[2] = INDIVIDUAL_30MS_AGG; - - inst->total[0] = TOTAL_10MS_AGG; - inst->total[1] = TOTAL_20MS_AGG; - inst->total[2] = TOTAL_30MS_AGG; - } else if (mode == 3) - { - // Very aggressive mode - inst->over_hang_max_1[0] = OHMAX1_10MS_VAG; // Overhang short speech burst - inst->over_hang_max_1[1] = OHMAX1_20MS_VAG; // Overhang short speech burst - inst->over_hang_max_1[2] = OHMAX1_30MS_VAG; // Overhang short speech burst - inst->over_hang_max_2[0] = OHMAX2_10MS_VAG; // Overhang long speech burst - inst->over_hang_max_2[1] = OHMAX2_20MS_VAG; // Overhang long speech burst - inst->over_hang_max_2[2] = OHMAX2_30MS_VAG; // Overhang long speech burst - - inst->individual[0] = INDIVIDUAL_10MS_VAG; - inst->individual[1] = INDIVIDUAL_20MS_VAG; - inst->individual[2] = INDIVIDUAL_30MS_VAG; - - inst->total[0] = TOTAL_10MS_VAG; - inst->total[1] = TOTAL_20MS_VAG; - inst->total[2] = TOTAL_30MS_VAG; - } else - { - return -1; - } - - return 0; -} - -// Calculate VAD decision by first extracting feature values and then calculate -// probability for both speech and background noise. - -WebRtc_Word16 WebRtcVad_CalcVad32khz(VadInstT *inst, WebRtc_Word16 *speech_frame, - int frame_length) -{ - WebRtc_Word16 len, vad; - WebRtc_Word16 speechWB[480]; // Downsampled speech frame: 960 samples (30ms in SWB) - WebRtc_Word16 speechNB[240]; // Downsampled speech frame: 480 samples (30ms in WB) - - - // Downsample signal 32->16->8 before doing VAD - WebRtcVad_Downsampling(speech_frame, speechWB, &(inst->downsampling_filter_states[2]), - frame_length); - len = WEBRTC_SPL_RSHIFT_W16(frame_length, 1); - - WebRtcVad_Downsampling(speechWB, speechNB, inst->downsampling_filter_states, len); - len = WEBRTC_SPL_RSHIFT_W16(len, 1); - - // Do VAD on an 8 kHz signal - vad = WebRtcVad_CalcVad8khz(inst, speechNB, len); - - return vad; -} - -WebRtc_Word16 WebRtcVad_CalcVad16khz(VadInstT *inst, WebRtc_Word16 *speech_frame, - int frame_length) -{ - WebRtc_Word16 len, vad; - WebRtc_Word16 speechNB[240]; // Downsampled speech frame: 480 samples (30ms in WB) - - // Wideband: Downsample signal before doing VAD - WebRtcVad_Downsampling(speech_frame, speechNB, inst->downsampling_filter_states, - frame_length); - - len = WEBRTC_SPL_RSHIFT_W16(frame_length, 1); - vad = WebRtcVad_CalcVad8khz(inst, speechNB, len); - - return vad; -} - -WebRtc_Word16 WebRtcVad_CalcVad8khz(VadInstT *inst, WebRtc_Word16 *speech_frame, - int frame_length) -{ - WebRtc_Word16 feature_vector[NUM_CHANNELS], total_power; - - // Get power in the bands - total_power = WebRtcVad_get_features(inst, speech_frame, frame_length, feature_vector); - - // Make a VAD - inst->vad = WebRtcVad_GmmProbability(inst, feature_vector, total_power, frame_length); - - return inst->vad; -} - -// Calculate probability for both speech and background noise, and perform a -// hypothesis-test. -WebRtc_Word16 WebRtcVad_GmmProbability(VadInstT *inst, WebRtc_Word16 *feature_vector, - WebRtc_Word16 total_power, int frame_length) -{ - int n, k; - WebRtc_Word16 backval; - WebRtc_Word16 h0, h1; - WebRtc_Word16 ratvec, xval; - WebRtc_Word16 vadflag; - WebRtc_Word16 shifts0, shifts1; - WebRtc_Word16 tmp16, tmp16_1, tmp16_2; - WebRtc_Word16 diff, nr, pos; - WebRtc_Word16 nmk, nmk2, nmk3, smk, smk2, nsk, ssk; - WebRtc_Word16 delt, ndelt; - WebRtc_Word16 maxspe, maxmu; - WebRtc_Word16 deltaN[NUM_TABLE_VALUES], deltaS[NUM_TABLE_VALUES]; - WebRtc_Word16 ngprvec[NUM_TABLE_VALUES], sgprvec[NUM_TABLE_VALUES]; - WebRtc_Word32 h0test, h1test; - WebRtc_Word32 tmp32_1, tmp32_2; - WebRtc_Word32 dotVal; - WebRtc_Word32 nmid, smid; - WebRtc_Word32 probn[NUM_MODELS], probs[NUM_MODELS]; - WebRtc_Word16 *nmean1ptr, *nmean2ptr, *smean1ptr, *smean2ptr, *nstd1ptr, *nstd2ptr, - *sstd1ptr, *sstd2ptr; - WebRtc_Word16 overhead1, overhead2, individualTest, totalTest; - - // Set the thresholds to different values based on frame length - if (frame_length == 80) - { - // 80 input samples - overhead1 = inst->over_hang_max_1[0]; - overhead2 = inst->over_hang_max_2[0]; - individualTest = inst->individual[0]; - totalTest = inst->total[0]; - } else if (frame_length == 160) - { - // 160 input samples - overhead1 = inst->over_hang_max_1[1]; - overhead2 = inst->over_hang_max_2[1]; - individualTest = inst->individual[1]; - totalTest = inst->total[1]; - } else - { - // 240 input samples - overhead1 = inst->over_hang_max_1[2]; - overhead2 = inst->over_hang_max_2[2]; - individualTest = inst->individual[2]; - totalTest = inst->total[2]; - } - - if (total_power > MIN_ENERGY) - { // If signal present at all - - // Set pointers to the gaussian parameters - nmean1ptr = &inst->noise_means[0]; - nmean2ptr = &inst->noise_means[NUM_CHANNELS]; - smean1ptr = &inst->speech_means[0]; - smean2ptr = &inst->speech_means[NUM_CHANNELS]; - nstd1ptr = &inst->noise_stds[0]; - nstd2ptr = &inst->noise_stds[NUM_CHANNELS]; - sstd1ptr = &inst->speech_stds[0]; - sstd2ptr = &inst->speech_stds[NUM_CHANNELS]; - - vadflag = 0; - dotVal = 0; - for (n = 0; n < NUM_CHANNELS; n++) - { // For all channels - - pos = WEBRTC_SPL_LSHIFT_W16(n, 1); - xval = feature_vector[n]; - - // Probability for Noise, Q7 * Q20 = Q27 - tmp32_1 = WebRtcVad_GaussianProbability(xval, *nmean1ptr++, *nstd1ptr++, - &deltaN[pos]); - probn[0] = (WebRtc_Word32)(kNoiseDataWeights[n] * tmp32_1); - tmp32_1 = WebRtcVad_GaussianProbability(xval, *nmean2ptr++, *nstd2ptr++, - &deltaN[pos + 1]); - probn[1] = (WebRtc_Word32)(kNoiseDataWeights[n + NUM_CHANNELS] * tmp32_1); - h0test = probn[0] + probn[1]; // Q27 - h0 = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(h0test, 12); // Q15 - - // Probability for Speech - tmp32_1 = WebRtcVad_GaussianProbability(xval, *smean1ptr++, *sstd1ptr++, - &deltaS[pos]); - probs[0] = (WebRtc_Word32)(kSpeechDataWeights[n] * tmp32_1); - tmp32_1 = WebRtcVad_GaussianProbability(xval, *smean2ptr++, *sstd2ptr++, - &deltaS[pos + 1]); - probs[1] = (WebRtc_Word32)(kSpeechDataWeights[n + NUM_CHANNELS] * tmp32_1); - h1test = probs[0] + probs[1]; // Q27 - h1 = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(h1test, 12); // Q15 - - // Get likelihood ratio. Approximate log2(H1/H0) with shifts0 - shifts1 - shifts0 = WebRtcSpl_NormW32(h0test); - shifts1 = WebRtcSpl_NormW32(h1test); - - if ((h0test > 0) && (h1test > 0)) - { - ratvec = shifts0 - shifts1; - } else if (h1test > 0) - { - ratvec = 31 - shifts1; - } else if (h0test > 0) - { - ratvec = shifts0 - 31; - } else - { - ratvec = 0; - } - - // VAD decision with spectrum weighting - dotVal += WEBRTC_SPL_MUL_16_16(ratvec, kSpectrumWeight[n]); - - // Individual channel test - if ((ratvec << 2) > individualTest) - { - vadflag = 1; - } - - // Probabilities used when updating model - if (h0 > 0) - { - tmp32_1 = probn[0] & 0xFFFFF000; // Q27 - tmp32_2 = WEBRTC_SPL_LSHIFT_W32(tmp32_1, 2); // Q29 - ngprvec[pos] = (WebRtc_Word16)WebRtcSpl_DivW32W16(tmp32_2, h0); - ngprvec[pos + 1] = 16384 - ngprvec[pos]; - } else - { - ngprvec[pos] = 16384; - ngprvec[pos + 1] = 0; - } - - // Probabilities used when updating model - if (h1 > 0) - { - tmp32_1 = probs[0] & 0xFFFFF000; - tmp32_2 = WEBRTC_SPL_LSHIFT_W32(tmp32_1, 2); - sgprvec[pos] = (WebRtc_Word16)WebRtcSpl_DivW32W16(tmp32_2, h1); - sgprvec[pos + 1] = 16384 - sgprvec[pos]; - } else - { - sgprvec[pos] = 0; - sgprvec[pos + 1] = 0; - } - } - - // Overall test - if (dotVal >= totalTest) - { - vadflag |= 1; - } - - // Set pointers to the means and standard deviations. - nmean1ptr = &inst->noise_means[0]; - smean1ptr = &inst->speech_means[0]; - nstd1ptr = &inst->noise_stds[0]; - sstd1ptr = &inst->speech_stds[0]; - - maxspe = 12800; - - // Update the model's parameters - for (n = 0; n < NUM_CHANNELS; n++) - { - - pos = WEBRTC_SPL_LSHIFT_W16(n, 1); - - // Get min value in past which is used for long term correction - backval = WebRtcVad_FindMinimum(inst, feature_vector[n], n); // Q4 - - // Compute the "global" mean, that is the sum of the two means weighted - nmid = WEBRTC_SPL_MUL_16_16(kNoiseDataWeights[n], *nmean1ptr); // Q7 * Q7 - nmid += WEBRTC_SPL_MUL_16_16(kNoiseDataWeights[n+NUM_CHANNELS], - *(nmean1ptr+NUM_CHANNELS)); - tmp16_1 = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(nmid, 6); // Q8 - - for (k = 0; k < NUM_MODELS; k++) - { - - nr = pos + k; - - nmean2ptr = nmean1ptr + k * NUM_CHANNELS; - smean2ptr = smean1ptr + k * NUM_CHANNELS; - nstd2ptr = nstd1ptr + k * NUM_CHANNELS; - sstd2ptr = sstd1ptr + k * NUM_CHANNELS; - nmk = *nmean2ptr; - smk = *smean2ptr; - nsk = *nstd2ptr; - ssk = *sstd2ptr; - - // Update noise mean vector if the frame consists of noise only - nmk2 = nmk; - if (!vadflag) - { - // deltaN = (x-mu)/sigma^2 - // ngprvec[k] = probn[k]/(probn[0] + probn[1]) - - delt = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(ngprvec[nr], - deltaN[nr], 11); // Q14*Q11 - nmk2 = nmk + (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(delt, - kNoiseUpdateConst, - 22); // Q7+(Q14*Q15>>22) - } - - // Long term correction of the noise mean - ndelt = WEBRTC_SPL_LSHIFT_W16(backval, 4); - ndelt -= tmp16_1; // Q8 - Q8 - nmk3 = nmk2 + (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(ndelt, - kBackEta, - 9); // Q7+(Q8*Q8)>>9 - - // Control that the noise mean does not drift to much - tmp16 = WEBRTC_SPL_LSHIFT_W16(k+5, 7); - if (nmk3 < tmp16) - nmk3 = tmp16; - tmp16 = WEBRTC_SPL_LSHIFT_W16(72+k-n, 7); - if (nmk3 > tmp16) - nmk3 = tmp16; - *nmean2ptr = nmk3; - - if (vadflag) - { - // Update speech mean vector: - // deltaS = (x-mu)/sigma^2 - // sgprvec[k] = probn[k]/(probn[0] + probn[1]) - - delt = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(sgprvec[nr], - deltaS[nr], - 11); // (Q14*Q11)>>11=Q14 - tmp16 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(delt, - kSpeechUpdateConst, - 21) + 1; - smk2 = smk + (tmp16 >> 1); // Q7 + (Q14 * Q15 >> 22) - - // Control that the speech mean does not drift to much - maxmu = maxspe + 640; - if (smk2 < kMinimumMean[k]) - smk2 = kMinimumMean[k]; - if (smk2 > maxmu) - smk2 = maxmu; - - *smean2ptr = smk2; - - // (Q7>>3) = Q4 - tmp16 = WEBRTC_SPL_RSHIFT_W16((smk + 4), 3); - - tmp16 = feature_vector[n] - tmp16; // Q4 - tmp32_1 = WEBRTC_SPL_MUL_16_16_RSFT(deltaS[nr], tmp16, 3); - tmp32_2 = tmp32_1 - (WebRtc_Word32)4096; // Q12 - tmp16 = WEBRTC_SPL_RSHIFT_W16((sgprvec[nr]), 2); - tmp32_1 = (WebRtc_Word32)(tmp16 * tmp32_2);// (Q15>>3)*(Q14>>2)=Q12*Q12=Q24 - - tmp32_2 = WEBRTC_SPL_RSHIFT_W32(tmp32_1, 4); // Q20 - - // 0.1 * Q20 / Q7 = Q13 - if (tmp32_2 > 0) - tmp16 = (WebRtc_Word16)WebRtcSpl_DivW32W16(tmp32_2, ssk * 10); - else - { - tmp16 = (WebRtc_Word16)WebRtcSpl_DivW32W16(-tmp32_2, ssk * 10); - tmp16 = -tmp16; - } - // divide by 4 giving an update factor of 0.025 - tmp16 += 128; // Rounding - ssk += WEBRTC_SPL_RSHIFT_W16(tmp16, 8); - // Division with 8 plus Q7 - if (ssk < MIN_STD) - ssk = MIN_STD; - *sstd2ptr = ssk; - } else - { - // Update GMM variance vectors - // deltaN * (feature_vector[n] - nmk) - 1, Q11 * Q4 - tmp16 = feature_vector[n] - WEBRTC_SPL_RSHIFT_W16(nmk, 3); - - // (Q15>>3) * (Q14>>2) = Q12 * Q12 = Q24 - tmp32_1 = WEBRTC_SPL_MUL_16_16_RSFT(deltaN[nr], tmp16, 3) - 4096; - tmp16 = WEBRTC_SPL_RSHIFT_W16((ngprvec[nr]+2), 2); - tmp32_2 = (WebRtc_Word32)(tmp16 * tmp32_1); - tmp32_1 = WEBRTC_SPL_RSHIFT_W32(tmp32_2, 14); - // Q20 * approx 0.001 (2^-10=0.0009766) - - // Q20 / Q7 = Q13 - tmp16 = (WebRtc_Word16)WebRtcSpl_DivW32W16(tmp32_1, nsk); - if (tmp32_1 > 0) - tmp16 = (WebRtc_Word16)WebRtcSpl_DivW32W16(tmp32_1, nsk); - else - { - tmp16 = (WebRtc_Word16)WebRtcSpl_DivW32W16(-tmp32_1, nsk); - tmp16 = -tmp16; - } - tmp16 += 32; // Rounding - nsk += WEBRTC_SPL_RSHIFT_W16(tmp16, 6); - - if (nsk < MIN_STD) - nsk = MIN_STD; - - *nstd2ptr = nsk; - } - } - - // Separate models if they are too close - nmid in Q14 - nmid = WEBRTC_SPL_MUL_16_16(kNoiseDataWeights[n], *nmean1ptr); - nmid += WEBRTC_SPL_MUL_16_16(kNoiseDataWeights[n+NUM_CHANNELS], *nmean2ptr); - - // smid in Q14 - smid = WEBRTC_SPL_MUL_16_16(kSpeechDataWeights[n], *smean1ptr); - smid += WEBRTC_SPL_MUL_16_16(kSpeechDataWeights[n+NUM_CHANNELS], *smean2ptr); - - // diff = "global" speech mean - "global" noise mean - diff = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(smid, 9); - tmp16 = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(nmid, 9); - diff -= tmp16; - - if (diff < kMinimumDifference[n]) - { - - tmp16 = kMinimumDifference[n] - diff; // Q5 - - // tmp16_1 = ~0.8 * (kMinimumDifference - diff) in Q7 - // tmp16_2 = ~0.2 * (kMinimumDifference - diff) in Q7 - tmp16_1 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(13, tmp16, 2); - tmp16_2 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(3, tmp16, 2); - - // First Gauss, speech model - tmp16 = tmp16_1 + *smean1ptr; - *smean1ptr = tmp16; - smid = WEBRTC_SPL_MUL_16_16(tmp16, kSpeechDataWeights[n]); - - // Second Gauss, speech model - tmp16 = tmp16_1 + *smean2ptr; - *smean2ptr = tmp16; - smid += WEBRTC_SPL_MUL_16_16(tmp16, kSpeechDataWeights[n+NUM_CHANNELS]); - - // First Gauss, noise model - tmp16 = *nmean1ptr - tmp16_2; - *nmean1ptr = tmp16; - - nmid = WEBRTC_SPL_MUL_16_16(tmp16, kNoiseDataWeights[n]); - - // Second Gauss, noise model - tmp16 = *nmean2ptr - tmp16_2; - *nmean2ptr = tmp16; - nmid += WEBRTC_SPL_MUL_16_16(tmp16, kNoiseDataWeights[n+NUM_CHANNELS]); - } - - // Control that the speech & noise means do not drift to much - maxspe = kMaximumSpeech[n]; - tmp16_2 = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(smid, 7); - if (tmp16_2 > maxspe) - { // Upper limit of speech model - tmp16_2 -= maxspe; - - *smean1ptr -= tmp16_2; - *smean2ptr -= tmp16_2; - } - - tmp16_2 = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(nmid, 7); - if (tmp16_2 > kMaximumNoise[n]) - { - tmp16_2 -= kMaximumNoise[n]; - - *nmean1ptr -= tmp16_2; - *nmean2ptr -= tmp16_2; - } - - *nmean1ptr++; - *smean1ptr++; - *nstd1ptr++; - *sstd1ptr++; - } - inst->frame_counter++; - } else - { - vadflag = 0; - } - - // Hangover smoothing - if (!vadflag) - { - if (inst->over_hang > 0) - { - vadflag = 2 + inst->over_hang; - inst->over_hang = inst->over_hang - 1; - } - inst->num_of_speech = 0; - } else - { - inst->num_of_speech = inst->num_of_speech + 1; - if (inst->num_of_speech > NSP_MAX) - { - inst->num_of_speech = NSP_MAX; - inst->over_hang = overhead2; - } else - inst->over_hang = overhead1; - } - return vadflag; -} diff --git a/src/common_audio/vad/main/source/vad_core.h b/src/common_audio/vad/main/source/vad_core.h deleted file mode 100644 index 544caf5ab3..0000000000 --- a/src/common_audio/vad/main/source/vad_core.h +++ /dev/null @@ -1,132 +0,0 @@ -/* - * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -/* - * This header file includes the descriptions of the core VAD calls. - */ - -#ifndef WEBRTC_VAD_CORE_H_ -#define WEBRTC_VAD_CORE_H_ - -#include "typedefs.h" -#include "vad_defines.h" - -typedef struct VadInstT_ -{ - - WebRtc_Word16 vad; - WebRtc_Word32 downsampling_filter_states[4]; - WebRtc_Word16 noise_means[NUM_TABLE_VALUES]; - WebRtc_Word16 speech_means[NUM_TABLE_VALUES]; - WebRtc_Word16 noise_stds[NUM_TABLE_VALUES]; - WebRtc_Word16 speech_stds[NUM_TABLE_VALUES]; - WebRtc_Word32 frame_counter; - WebRtc_Word16 over_hang; // Over Hang - WebRtc_Word16 num_of_speech; - WebRtc_Word16 index_vector[16 * NUM_CHANNELS]; - WebRtc_Word16 low_value_vector[16 * NUM_CHANNELS]; - WebRtc_Word16 mean_value[NUM_CHANNELS]; - WebRtc_Word16 upper_state[5]; - WebRtc_Word16 lower_state[5]; - WebRtc_Word16 hp_filter_state[4]; - WebRtc_Word16 over_hang_max_1[3]; - WebRtc_Word16 over_hang_max_2[3]; - WebRtc_Word16 individual[3]; - WebRtc_Word16 total[3]; - - short init_flag; - -} VadInstT; - -/**************************************************************************** - * WebRtcVad_InitCore(...) - * - * This function initializes a VAD instance - * - * Input: - * - inst : Instance that should be initialized - * - mode : Aggressiveness degree - * 0 (High quality) - 3 (Highly aggressive) - * - * Output: - * - inst : Initialized instance - * - * Return value : 0 - Ok - * -1 - Error - */ -int WebRtcVad_InitCore(VadInstT* inst, short mode); - -/**************************************************************************** - * WebRtcVad_set_mode_core(...) - * - * This function changes the VAD settings - * - * Input: - * - inst : VAD instance - * - mode : Aggressiveness degree - * 0 (High quality) - 3 (Highly aggressive) - * - * Output: - * - inst : Changed instance - * - * Return value : 0 - Ok - * -1 - Error - */ - -int WebRtcVad_set_mode_core(VadInstT* inst, short mode); - -/**************************************************************************** - * WebRtcVad_CalcVad32khz(...) - * WebRtcVad_CalcVad16khz(...) - * WebRtcVad_CalcVad8khz(...) - * - * Calculate probability for active speech and make VAD decision. - * - * Input: - * - inst : Instance that should be initialized - * - speech_frame : Input speech frame - * - frame_length : Number of input samples - * - * Output: - * - inst : Updated filter states etc. - * - * Return value : VAD decision - * 0 - No active speech - * 1-6 - Active speech - */ -WebRtc_Word16 WebRtcVad_CalcVad32khz(VadInstT* inst, WebRtc_Word16* speech_frame, - int frame_length); -WebRtc_Word16 WebRtcVad_CalcVad16khz(VadInstT* inst, WebRtc_Word16* speech_frame, - int frame_length); -WebRtc_Word16 WebRtcVad_CalcVad8khz(VadInstT* inst, WebRtc_Word16* speech_frame, - int frame_length); - -/**************************************************************************** - * WebRtcVad_GmmProbability(...) - * - * This function calculates the probabilities for background noise and - * speech using Gaussian Mixture Models. A hypothesis-test is performed to decide - * which type of signal is most probable. - * - * Input: - * - inst : Pointer to VAD instance - * - feature_vector : Feature vector = log10(energy in frequency band) - * - total_power : Total power in frame. - * - frame_length : Number of input samples - * - * Output: - * VAD decision : 0 - noise, 1 - speech - * - */ -WebRtc_Word16 WebRtcVad_GmmProbability(VadInstT* inst, WebRtc_Word16* feature_vector, - WebRtc_Word16 total_power, int frame_length); - -#endif // WEBRTC_VAD_CORE_H_ diff --git a/src/common_audio/vad/main/source/vad_defines.h b/src/common_audio/vad/main/source/vad_defines.h deleted file mode 100644 index b33af2ef7d..0000000000 --- a/src/common_audio/vad/main/source/vad_defines.h +++ /dev/null @@ -1,95 +0,0 @@ -/* - * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -/* - * This header file includes the macros used in VAD. - */ - -#ifndef WEBRTC_VAD_DEFINES_H_ -#define WEBRTC_VAD_DEFINES_H_ - -#define NUM_CHANNELS 6 // Eight frequency bands -#define NUM_MODELS 2 // Number of Gaussian models -#define NUM_TABLE_VALUES NUM_CHANNELS * NUM_MODELS - -#define MIN_ENERGY 10 -#define ALPHA1 6553 // 0.2 in Q15 -#define ALPHA2 32439 // 0.99 in Q15 -#define NSP_MAX 6 // Maximum number of VAD=1 frames in a row counted -#define MIN_STD 384 // Minimum standard deviation -// Mode 0, Quality thresholds - Different thresholds for the different frame lengths -#define INDIVIDUAL_10MS_Q 24 -#define INDIVIDUAL_20MS_Q 21 // (log10(2)*66)<<2 ~=16 -#define INDIVIDUAL_30MS_Q 24 - -#define TOTAL_10MS_Q 57 -#define TOTAL_20MS_Q 48 -#define TOTAL_30MS_Q 57 - -#define OHMAX1_10MS_Q 8 // Max Overhang 1 -#define OHMAX2_10MS_Q 14 // Max Overhang 2 -#define OHMAX1_20MS_Q 4 // Max Overhang 1 -#define OHMAX2_20MS_Q 7 // Max Overhang 2 -#define OHMAX1_30MS_Q 3 -#define OHMAX2_30MS_Q 5 - -// Mode 1, Low bitrate thresholds - Different thresholds for the different frame lengths -#define INDIVIDUAL_10MS_LBR 37 -#define INDIVIDUAL_20MS_LBR 32 -#define INDIVIDUAL_30MS_LBR 37 - -#define TOTAL_10MS_LBR 100 -#define TOTAL_20MS_LBR 80 -#define TOTAL_30MS_LBR 100 - -#define OHMAX1_10MS_LBR 8 // Max Overhang 1 -#define OHMAX2_10MS_LBR 14 // Max Overhang 2 -#define OHMAX1_20MS_LBR 4 -#define OHMAX2_20MS_LBR 7 - -#define OHMAX1_30MS_LBR 3 -#define OHMAX2_30MS_LBR 5 - -// Mode 2, Very aggressive thresholds - Different thresholds for the different frame lengths -#define INDIVIDUAL_10MS_AGG 82 -#define INDIVIDUAL_20MS_AGG 78 -#define INDIVIDUAL_30MS_AGG 82 - -#define TOTAL_10MS_AGG 285 //580 -#define TOTAL_20MS_AGG 260 -#define TOTAL_30MS_AGG 285 - -#define OHMAX1_10MS_AGG 6 // Max Overhang 1 -#define OHMAX2_10MS_AGG 9 // Max Overhang 2 -#define OHMAX1_20MS_AGG 3 -#define OHMAX2_20MS_AGG 5 - -#define OHMAX1_30MS_AGG 2 -#define OHMAX2_30MS_AGG 3 - -// Mode 3, Super aggressive thresholds - Different thresholds for the different frame lengths -#define INDIVIDUAL_10MS_VAG 94 -#define INDIVIDUAL_20MS_VAG 94 -#define INDIVIDUAL_30MS_VAG 94 - -#define TOTAL_10MS_VAG 1100 //1700 -#define TOTAL_20MS_VAG 1050 -#define TOTAL_30MS_VAG 1100 - -#define OHMAX1_10MS_VAG 6 // Max Overhang 1 -#define OHMAX2_10MS_VAG 9 // Max Overhang 2 -#define OHMAX1_20MS_VAG 3 -#define OHMAX2_20MS_VAG 5 - -#define OHMAX1_30MS_VAG 2 -#define OHMAX2_30MS_VAG 3 - -#endif // WEBRTC_VAD_DEFINES_H_ diff --git a/src/common_audio/vad/main/source/vad_filterbank.c b/src/common_audio/vad/main/source/vad_filterbank.c deleted file mode 100644 index 11392c917a..0000000000 --- a/src/common_audio/vad/main/source/vad_filterbank.c +++ /dev/null @@ -1,267 +0,0 @@ -/* - * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -/* - * This file includes the implementation of the internal filterbank associated functions. - * For function description, see vad_filterbank.h. - */ - -#include "vad_filterbank.h" -#include "vad_defines.h" -#include "vad_const.h" -#include "signal_processing_library.h" - -void WebRtcVad_HpOutput(WebRtc_Word16 *in_vector, - WebRtc_Word16 in_vector_length, - WebRtc_Word16 *out_vector, - WebRtc_Word16 *filter_state) -{ - WebRtc_Word16 i, *pi, *outPtr; - WebRtc_Word32 tmpW32; - - pi = &in_vector[0]; - outPtr = &out_vector[0]; - - // The sum of the absolute values of the impulse response: - // The zero/pole-filter has a max amplification of a single sample of: 1.4546 - // Impulse response: 0.4047 -0.6179 -0.0266 0.1993 0.1035 -0.0194 - // The all-zero section has a max amplification of a single sample of: 1.6189 - // Impulse response: 0.4047 -0.8094 0.4047 0 0 0 - // The all-pole section has a max amplification of a single sample of: 1.9931 - // Impulse response: 1.0000 0.4734 -0.1189 -0.2187 -0.0627 0.04532 - - for (i = 0; i < in_vector_length; i++) - { - // all-zero section (filter coefficients in Q14) - tmpW32 = (WebRtc_Word32)WEBRTC_SPL_MUL_16_16(kHpZeroCoefs[0], (*pi)); - tmpW32 += (WebRtc_Word32)WEBRTC_SPL_MUL_16_16(kHpZeroCoefs[1], filter_state[0]); - tmpW32 += (WebRtc_Word32)WEBRTC_SPL_MUL_16_16(kHpZeroCoefs[2], filter_state[1]); // Q14 - filter_state[1] = filter_state[0]; - filter_state[0] = *pi++; - - // all-pole section - tmpW32 -= (WebRtc_Word32)WEBRTC_SPL_MUL_16_16(kHpPoleCoefs[1], filter_state[2]); // Q14 - tmpW32 -= (WebRtc_Word32)WEBRTC_SPL_MUL_16_16(kHpPoleCoefs[2], filter_state[3]); - filter_state[3] = filter_state[2]; - filter_state[2] = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32 (tmpW32, 14); - *outPtr++ = filter_state[2]; - } -} - -void WebRtcVad_Allpass(WebRtc_Word16 *in_vector, - WebRtc_Word16 *out_vector, - WebRtc_Word16 filter_coefficients, - int vector_length, - WebRtc_Word16 *filter_state) -{ - // The filter can only cause overflow (in the w16 output variable) - // if more than 4 consecutive input numbers are of maximum value and - // has the the same sign as the impulse responses first taps. - // First 6 taps of the impulse response: 0.6399 0.5905 -0.3779 - // 0.2418 -0.1547 0.0990 - - int n; - WebRtc_Word16 tmp16; - WebRtc_Word32 tmp32, in32, state32; - - state32 = WEBRTC_SPL_LSHIFT_W32(((WebRtc_Word32)(*filter_state)), 16); // Q31 - - for (n = 0; n < vector_length; n++) - { - - tmp32 = state32 + WEBRTC_SPL_MUL_16_16(filter_coefficients, (*in_vector)); - tmp16 = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(tmp32, 16); - *out_vector++ = tmp16; - in32 = WEBRTC_SPL_LSHIFT_W32(((WebRtc_Word32)(*in_vector)), 14); - state32 = in32 - WEBRTC_SPL_MUL_16_16(filter_coefficients, tmp16); - state32 = WEBRTC_SPL_LSHIFT_W32(state32, 1); - in_vector += 2; - } - - *filter_state = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(state32, 16); -} - -void WebRtcVad_SplitFilter(WebRtc_Word16 *in_vector, - WebRtc_Word16 *out_vector_hp, - WebRtc_Word16 *out_vector_lp, - WebRtc_Word16 *upper_state, - WebRtc_Word16 *lower_state, - int in_vector_length) -{ - WebRtc_Word16 tmpOut; - int k, halflen; - - // Downsampling by 2 and get two branches - halflen = WEBRTC_SPL_RSHIFT_W16(in_vector_length, 1); - - // All-pass filtering upper branch - WebRtcVad_Allpass(&in_vector[0], out_vector_hp, kAllPassCoefsQ15[0], halflen, upper_state); - - // All-pass filtering lower branch - WebRtcVad_Allpass(&in_vector[1], out_vector_lp, kAllPassCoefsQ15[1], halflen, lower_state); - - // Make LP and HP signals - for (k = 0; k < halflen; k++) - { - tmpOut = *out_vector_hp; - *out_vector_hp++ -= *out_vector_lp; - *out_vector_lp++ += tmpOut; - } -} - -WebRtc_Word16 WebRtcVad_get_features(VadInstT *inst, - WebRtc_Word16 *in_vector, - int frame_size, - WebRtc_Word16 *out_vector) -{ - int curlen, filtno; - WebRtc_Word16 vecHP1[120], vecLP1[120]; - WebRtc_Word16 vecHP2[60], vecLP2[60]; - WebRtc_Word16 *ptin; - WebRtc_Word16 *hptout, *lptout; - WebRtc_Word16 power = 0; - - // Split at 2000 Hz and downsample - filtno = 0; - ptin = in_vector; - hptout = vecHP1; - lptout = vecLP1; - curlen = frame_size; - WebRtcVad_SplitFilter(ptin, hptout, lptout, &inst->upper_state[filtno], - &inst->lower_state[filtno], curlen); - - // Split at 3000 Hz and downsample - filtno = 1; - ptin = vecHP1; - hptout = vecHP2; - lptout = vecLP2; - curlen = WEBRTC_SPL_RSHIFT_W16(frame_size, 1); - - WebRtcVad_SplitFilter(ptin, hptout, lptout, &inst->upper_state[filtno], - &inst->lower_state[filtno], curlen); - - // Energy in 3000 Hz - 4000 Hz - curlen = WEBRTC_SPL_RSHIFT_W16(curlen, 1); - WebRtcVad_LogOfEnergy(vecHP2, &out_vector[5], &power, kOffsetVector[5], curlen); - - // Energy in 2000 Hz - 3000 Hz - WebRtcVad_LogOfEnergy(vecLP2, &out_vector[4], &power, kOffsetVector[4], curlen); - - // Split at 1000 Hz and downsample - filtno = 2; - ptin = vecLP1; - hptout = vecHP2; - lptout = vecLP2; - curlen = WEBRTC_SPL_RSHIFT_W16(frame_size, 1); - WebRtcVad_SplitFilter(ptin, hptout, lptout, &inst->upper_state[filtno], - &inst->lower_state[filtno], curlen); - - // Energy in 1000 Hz - 2000 Hz - curlen = WEBRTC_SPL_RSHIFT_W16(curlen, 1); - WebRtcVad_LogOfEnergy(vecHP2, &out_vector[3], &power, kOffsetVector[3], curlen); - - // Split at 500 Hz - filtno = 3; - ptin = vecLP2; - hptout = vecHP1; - lptout = vecLP1; - - WebRtcVad_SplitFilter(ptin, hptout, lptout, &inst->upper_state[filtno], - &inst->lower_state[filtno], curlen); - - // Energy in 500 Hz - 1000 Hz - curlen = WEBRTC_SPL_RSHIFT_W16(curlen, 1); - WebRtcVad_LogOfEnergy(vecHP1, &out_vector[2], &power, kOffsetVector[2], curlen); - // Split at 250 Hz - filtno = 4; - ptin = vecLP1; - hptout = vecHP2; - lptout = vecLP2; - - WebRtcVad_SplitFilter(ptin, hptout, lptout, &inst->upper_state[filtno], - &inst->lower_state[filtno], curlen); - - // Energy in 250 Hz - 500 Hz - curlen = WEBRTC_SPL_RSHIFT_W16(curlen, 1); - WebRtcVad_LogOfEnergy(vecHP2, &out_vector[1], &power, kOffsetVector[1], curlen); - - // Remove DC and LFs - WebRtcVad_HpOutput(vecLP2, curlen, vecHP1, inst->hp_filter_state); - - // Power in 80 Hz - 250 Hz - WebRtcVad_LogOfEnergy(vecHP1, &out_vector[0], &power, kOffsetVector[0], curlen); - - return power; -} - -void WebRtcVad_LogOfEnergy(WebRtc_Word16 *vector, - WebRtc_Word16 *enerlogval, - WebRtc_Word16 *power, - WebRtc_Word16 offset, - int vector_length) -{ - WebRtc_Word16 enerSum = 0; - WebRtc_Word16 zeros, frac, log2; - WebRtc_Word32 energy; - - int shfts = 0, shfts2; - - energy = WebRtcSpl_Energy(vector, vector_length, &shfts); - - if (energy > 0) - { - - shfts2 = 16 - WebRtcSpl_NormW32(energy); - shfts += shfts2; - // "shfts" is the total number of right shifts that has been done to enerSum. - enerSum = (WebRtc_Word16)WEBRTC_SPL_SHIFT_W32(energy, -shfts2); - - // Find: - // 160*log10(enerSum*2^shfts) = 160*log10(2)*log2(enerSum*2^shfts) = - // 160*log10(2)*(log2(enerSum) + log2(2^shfts)) = - // 160*log10(2)*(log2(enerSum) + shfts) - - zeros = WebRtcSpl_NormU32(enerSum); - frac = (WebRtc_Word16)(((WebRtc_UWord32)((WebRtc_Word32)(enerSum) << zeros) - & 0x7FFFFFFF) >> 21); - log2 = (WebRtc_Word16)(((31 - zeros) << 10) + frac); - - *enerlogval = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(kLogConst, log2, 19) - + (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(shfts, kLogConst, 9); - - if (*enerlogval < 0) - { - *enerlogval = 0; - } - } else - { - *enerlogval = 0; - shfts = -15; - enerSum = 0; - } - - *enerlogval += offset; - - // Total power in frame - if (*power <= MIN_ENERGY) - { - if (shfts > 0) - { - *power += MIN_ENERGY + 1; - } else if (WEBRTC_SPL_SHIFT_W16(enerSum, shfts) > MIN_ENERGY) - { - *power += MIN_ENERGY + 1; - } else - { - *power += WEBRTC_SPL_SHIFT_W16(enerSum, shfts); - } - } -} diff --git a/src/common_audio/vad/main/source/vad_filterbank.h b/src/common_audio/vad/main/source/vad_filterbank.h deleted file mode 100644 index a5507ead65..0000000000 --- a/src/common_audio/vad/main/source/vad_filterbank.h +++ /dev/null @@ -1,143 +0,0 @@ -/* - * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -/* - * This header file includes the description of the internal VAD call - * WebRtcVad_GaussianProbability. - */ - -#ifndef WEBRTC_VAD_FILTERBANK_H_ -#define WEBRTC_VAD_FILTERBANK_H_ - -#include "vad_core.h" - -/**************************************************************************** - * WebRtcVad_HpOutput(...) - * - * This function removes DC from the lowest frequency band - * - * Input: - * - in_vector : Samples in the frequency interval 0 - 250 Hz - * - in_vector_length : Length of input and output vector - * - filter_state : Current state of the filter - * - * Output: - * - out_vector : Samples in the frequency interval 80 - 250 Hz - * - filter_state : Updated state of the filter - * - */ -void WebRtcVad_HpOutput(WebRtc_Word16* in_vector, - WebRtc_Word16 in_vector_length, - WebRtc_Word16* out_vector, - WebRtc_Word16* filter_state); - -/**************************************************************************** - * WebRtcVad_Allpass(...) - * - * This function is used when before splitting a speech file into - * different frequency bands - * - * Note! Do NOT let the arrays in_vector and out_vector correspond to the same address. - * - * Input: - * - in_vector : (Q0) - * - filter_coefficients : (Q15) - * - vector_length : Length of input and output vector - * - filter_state : Current state of the filter (Q(-1)) - * - * Output: - * - out_vector : Output speech signal (Q(-1)) - * - filter_state : Updated state of the filter (Q(-1)) - * - */ -void WebRtcVad_Allpass(WebRtc_Word16* in_vector, - WebRtc_Word16* outw16, - WebRtc_Word16 filter_coefficients, - int vector_length, - WebRtc_Word16* filter_state); - -/**************************************************************************** - * WebRtcVad_SplitFilter(...) - * - * This function is used when before splitting a speech file into - * different frequency bands - * - * Input: - * - in_vector : Input signal to be split into two frequency bands. - * - upper_state : Current state of the upper filter - * - lower_state : Current state of the lower filter - * - in_vector_length : Length of input vector - * - * Output: - * - out_vector_hp : Upper half of the spectrum - * - out_vector_lp : Lower half of the spectrum - * - upper_state : Updated state of the upper filter - * - lower_state : Updated state of the lower filter - * - */ -void WebRtcVad_SplitFilter(WebRtc_Word16* in_vector, - WebRtc_Word16* out_vector_hp, - WebRtc_Word16* out_vector_lp, - WebRtc_Word16* upper_state, - WebRtc_Word16* lower_state, - int in_vector_length); - -/**************************************************************************** - * WebRtcVad_get_features(...) - * - * This function is used to get the logarithm of the power of each of the - * 6 frequency bands used by the VAD: - * 80 Hz - 250 Hz - * 250 Hz - 500 Hz - * 500 Hz - 1000 Hz - * 1000 Hz - 2000 Hz - * 2000 Hz - 3000 Hz - * 3000 Hz - 4000 Hz - * - * Input: - * - inst : Pointer to VAD instance - * - in_vector : Input speech signal - * - frame_size : Frame size, in number of samples - * - * Output: - * - out_vector : 10*log10(power in each freq. band), Q4 - * - * Return: total power in the signal (NOTE! This value is not exact since it - * is only used in a comparison. - */ -WebRtc_Word16 WebRtcVad_get_features(VadInstT* inst, - WebRtc_Word16* in_vector, - int frame_size, - WebRtc_Word16* out_vector); - -/**************************************************************************** - * WebRtcVad_LogOfEnergy(...) - * - * This function is used to get the logarithm of the power of one frequency band. - * - * Input: - * - vector : Input speech samples for one frequency band - * - offset : Offset value for the current frequency band - * - vector_length : Length of input vector - * - * Output: - * - enerlogval : 10*log10(energy); - * - power : Update total power in speech frame. NOTE! This value - * is not exact since it is only used in a comparison. - * - */ -void WebRtcVad_LogOfEnergy(WebRtc_Word16* vector, - WebRtc_Word16* enerlogval, - WebRtc_Word16* power, - WebRtc_Word16 offset, - int vector_length); - -#endif // WEBRTC_VAD_FILTERBANK_H_ diff --git a/src/common_audio/vad/main/source/vad_gmm.c b/src/common_audio/vad/main/source/vad_gmm.c deleted file mode 100644 index 23d12fb335..0000000000 --- a/src/common_audio/vad/main/source/vad_gmm.c +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -/* - * This file includes the implementation of the internal VAD call - * WebRtcVad_GaussianProbability. For function description, see vad_gmm.h. - */ - -#include "vad_gmm.h" -#include "signal_processing_library.h" -#include "vad_const.h" - -WebRtc_Word32 WebRtcVad_GaussianProbability(WebRtc_Word16 in_sample, - WebRtc_Word16 mean, - WebRtc_Word16 std, - WebRtc_Word16 *delta) -{ - WebRtc_Word16 tmp16, tmpDiv, tmpDiv2, expVal, tmp16_1, tmp16_2; - WebRtc_Word32 tmp32, y32; - - // Calculate tmpDiv=1/std, in Q10 - tmp32 = (WebRtc_Word32)WEBRTC_SPL_RSHIFT_W16(std,1) + (WebRtc_Word32)131072; // 1 in Q17 - tmpDiv = (WebRtc_Word16)WebRtcSpl_DivW32W16(tmp32, std); // Q17/Q7 = Q10 - - // Calculate tmpDiv2=1/std^2, in Q14 - tmp16 = WEBRTC_SPL_RSHIFT_W16(tmpDiv, 2); // From Q10 to Q8 - tmpDiv2 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(tmp16, tmp16, 2); // (Q8 * Q8)>>2 = Q14 - - tmp16 = WEBRTC_SPL_LSHIFT_W16(in_sample, 3); // Q7 - tmp16 = tmp16 - mean; // Q7 - Q7 = Q7 - - // To be used later, when updating noise/speech model - // delta = (x-m)/std^2, in Q11 - *delta = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(tmpDiv2, tmp16, 10); //(Q14*Q7)>>10 = Q11 - - // Calculate tmp32=(x-m)^2/(2*std^2), in Q10 - tmp32 = (WebRtc_Word32)WEBRTC_SPL_MUL_16_16_RSFT(*delta, tmp16, 9); // One shift for /2 - - // Calculate expVal ~= exp(-(x-m)^2/(2*std^2)) ~= exp2(-log2(exp(1))*tmp32) - if (tmp32 < kCompVar) - { - // Calculate tmp16 = log2(exp(1))*tmp32 , in Q10 - tmp16 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT((WebRtc_Word16)tmp32, - kLog10Const, 12); - tmp16 = -tmp16; - tmp16_2 = (WebRtc_Word16)(0x0400 | (tmp16 & 0x03FF)); - tmp16_1 = (WebRtc_Word16)(tmp16 ^ 0xFFFF); - tmp16 = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W16(tmp16_1, 10); - tmp16 += 1; - // Calculate expVal=log2(-tmp32), in Q10 - expVal = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32((WebRtc_Word32)tmp16_2, tmp16); - - } else - { - expVal = 0; - } - - // Calculate y32=(1/std)*exp(-(x-m)^2/(2*std^2)), in Q20 - y32 = WEBRTC_SPL_MUL_16_16(tmpDiv, expVal); // Q10 * Q10 = Q20 - - return y32; // Q20 -} diff --git a/src/common_audio/vad/main/source/vad_gmm.h b/src/common_audio/vad/main/source/vad_gmm.h deleted file mode 100644 index e0747fb7e5..0000000000 --- a/src/common_audio/vad/main/source/vad_gmm.h +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -/* - * This header file includes the description of the internal VAD call - * WebRtcVad_GaussianProbability. - */ - -#ifndef WEBRTC_VAD_GMM_H_ -#define WEBRTC_VAD_GMM_H_ - -#include "typedefs.h" - -/**************************************************************************** - * WebRtcVad_GaussianProbability(...) - * - * This function calculates the probability for the value 'in_sample', given that in_sample - * comes from a normal distribution with mean 'mean' and standard deviation 'std'. - * - * Input: - * - in_sample : Input sample in Q4 - * - mean : mean value in the statistical model, Q7 - * - std : standard deviation, Q7 - * - * Output: - * - * - delta : Value used when updating the model, Q11 - * - * Return: - * - out : out = 1/std * exp(-(x-m)^2/(2*std^2)); - * Probability for x. - * - */ -WebRtc_Word32 WebRtcVad_GaussianProbability(WebRtc_Word16 in_sample, - WebRtc_Word16 mean, - WebRtc_Word16 std, - WebRtc_Word16 *delta); - -#endif // WEBRTC_VAD_GMM_H_ diff --git a/src/common_audio/vad/main/source/vad_sp.c b/src/common_audio/vad/main/source/vad_sp.c deleted file mode 100644 index f347ab5904..0000000000 --- a/src/common_audio/vad/main/source/vad_sp.c +++ /dev/null @@ -1,231 +0,0 @@ -/* - * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -/* - * This file includes the implementation of the VAD internal calls for Downsampling and - * FindMinimum. - * For function call descriptions; See vad_sp.h. - */ - -#include "vad_sp.h" -#include "vad_defines.h" -#include "vad_const.h" -#include "signal_processing_library.h" - -// Downsampling filter based on the splitting filter and the allpass functions -// in vad_filterbank.c -void WebRtcVad_Downsampling(WebRtc_Word16* signal_in, - WebRtc_Word16* signal_out, - WebRtc_Word32* filter_state, - int inlen) -{ - WebRtc_Word16 tmp16_1, tmp16_2; - WebRtc_Word32 tmp32_1, tmp32_2; - int n, halflen; - - // Downsampling by 2 and get two branches - halflen = WEBRTC_SPL_RSHIFT_W16(inlen, 1); - - tmp32_1 = filter_state[0]; - tmp32_2 = filter_state[1]; - - // Filter coefficients in Q13, filter state in Q0 - for (n = 0; n < halflen; n++) - { - // All-pass filtering upper branch - tmp16_1 = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(tmp32_1, 1) - + (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT((kAllPassCoefsQ13[0]), - *signal_in, 14); - *signal_out = tmp16_1; - tmp32_1 = (WebRtc_Word32)(*signal_in++) - - (WebRtc_Word32)WEBRTC_SPL_MUL_16_16_RSFT((kAllPassCoefsQ13[0]), tmp16_1, 12); - - // All-pass filtering lower branch - tmp16_2 = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(tmp32_2, 1) - + (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT((kAllPassCoefsQ13[1]), - *signal_in, 14); - *signal_out++ += tmp16_2; - tmp32_2 = (WebRtc_Word32)(*signal_in++) - - (WebRtc_Word32)WEBRTC_SPL_MUL_16_16_RSFT((kAllPassCoefsQ13[1]), tmp16_2, 12); - } - filter_state[0] = tmp32_1; - filter_state[1] = tmp32_2; -} - -WebRtc_Word16 WebRtcVad_FindMinimum(VadInstT* inst, - WebRtc_Word16 x, - int n) -{ - int i, j, k, II = -1, offset; - WebRtc_Word16 meanV, alpha; - WebRtc_Word32 tmp32, tmp32_1; - WebRtc_Word16 *valptr, *idxptr, *p1, *p2, *p3; - - // Offset to beginning of the 16 minimum values in memory - offset = WEBRTC_SPL_LSHIFT_W16(n, 4); - - // Pointer to memory for the 16 minimum values and the age of each value - idxptr = &inst->index_vector[offset]; - valptr = &inst->low_value_vector[offset]; - - // Each value in low_value_vector is getting 1 loop older. - // Update age of each value in indexVal, and remove old values. - for (i = 0; i < 16; i++) - { - p3 = idxptr + i; - if (*p3 != 100) - { - *p3 += 1; - } else - { - p1 = valptr + i + 1; - p2 = p3 + 1; - for (j = i; j < 16; j++) - { - *(valptr + j) = *p1++; - *(idxptr + j) = *p2++; - } - *(idxptr + 15) = 101; - *(valptr + 15) = 10000; - } - } - - // Check if x smaller than any of the values in low_value_vector. - // If so, find position. - if (x < *(valptr + 7)) - { - if (x < *(valptr + 3)) - { - if (x < *(valptr + 1)) - { - if (x < *valptr) - { - II = 0; - } else - { - II = 1; - } - } else if (x < *(valptr + 2)) - { - II = 2; - } else - { - II = 3; - } - } else if (x < *(valptr + 5)) - { - if (x < *(valptr + 4)) - { - II = 4; - } else - { - II = 5; - } - } else if (x < *(valptr + 6)) - { - II = 6; - } else - { - II = 7; - } - } else if (x < *(valptr + 15)) - { - if (x < *(valptr + 11)) - { - if (x < *(valptr + 9)) - { - if (x < *(valptr + 8)) - { - II = 8; - } else - { - II = 9; - } - } else if (x < *(valptr + 10)) - { - II = 10; - } else - { - II = 11; - } - } else if (x < *(valptr + 13)) - { - if (x < *(valptr + 12)) - { - II = 12; - } else - { - II = 13; - } - } else if (x < *(valptr + 14)) - { - II = 14; - } else - { - II = 15; - } - } - - // Put new min value on right position and shift bigger values up - if (II > -1) - { - for (i = 15; i > II; i--) - { - k = i - 1; - *(valptr + i) = *(valptr + k); - *(idxptr + i) = *(idxptr + k); - } - *(valptr + II) = x; - *(idxptr + II) = 1; - } - - meanV = 0; - if ((inst->frame_counter) > 4) - { - j = 5; - } else - { - j = inst->frame_counter; - } - - if (j > 2) - { - meanV = *(valptr + 2); - } else if (j > 0) - { - meanV = *valptr; - } else - { - meanV = 1600; - } - - if (inst->frame_counter > 0) - { - if (meanV < inst->mean_value[n]) - { - alpha = (WebRtc_Word16)ALPHA1; // 0.2 in Q15 - } else - { - alpha = (WebRtc_Word16)ALPHA2; // 0.99 in Q15 - } - } else - { - alpha = 0; - } - - tmp32 = WEBRTC_SPL_MUL_16_16((alpha+1), inst->mean_value[n]); - tmp32_1 = WEBRTC_SPL_MUL_16_16(WEBRTC_SPL_WORD16_MAX - alpha, meanV); - tmp32 += tmp32_1; - tmp32 += 16384; - inst->mean_value[n] = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(tmp32, 15); - - return inst->mean_value[n]; -} diff --git a/src/common_audio/vad/main/source/vad_sp.h b/src/common_audio/vad/main/source/vad_sp.h deleted file mode 100644 index ae15c11ad6..0000000000 --- a/src/common_audio/vad/main/source/vad_sp.h +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -/* - * This header file includes the VAD internal calls for Downsampling and FindMinimum. - * Specific function calls are given below. - */ - -#ifndef WEBRTC_VAD_SP_H_ -#define WEBRTC_VAD_SP_H_ - -#include "vad_core.h" - -/**************************************************************************** - * WebRtcVad_Downsampling(...) - * - * Downsamples the signal a factor 2, eg. 32->16 or 16->8 - * - * Input: - * - signal_in : Input signal - * - in_length : Length of input signal in samples - * - * Input & Output: - * - filter_state : Filter state for first all-pass filters - * - * Output: - * - signal_out : Downsampled signal (of length len/2) - */ -void WebRtcVad_Downsampling(WebRtc_Word16* signal_in, - WebRtc_Word16* signal_out, - WebRtc_Word32* filter_state, - int in_length); - -/**************************************************************************** - * WebRtcVad_FindMinimum(...) - * - * Find the five lowest values of x in 100 frames long window. Return a mean - * value of these five values. - * - * Input: - * - feature_value : Feature value - * - channel : Channel number - * - * Input & Output: - * - inst : State information - * - * Output: - * return value : Weighted minimum value for a moving window. - */ -WebRtc_Word16 WebRtcVad_FindMinimum(VadInstT* inst, WebRtc_Word16 feature_value, int channel); - -#endif // WEBRTC_VAD_SP_H_ diff --git a/src/common_audio/vad/main/source/webrtc_vad.c b/src/common_audio/vad/main/source/webrtc_vad.c deleted file mode 100644 index dcfbda1128..0000000000 --- a/src/common_audio/vad/main/source/webrtc_vad.c +++ /dev/null @@ -1,197 +0,0 @@ -/* - * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -/* - * This file includes the VAD API calls. For a specific function call description, - * see webrtc_vad.h - */ - -#include <stdlib.h> -#include <string.h> - -#include "webrtc_vad.h" -#include "vad_core.h" - -static const int kInitCheck = 42; - -WebRtc_Word16 WebRtcVad_get_version(char *version, size_t size_bytes) -{ - const char my_version[] = "VAD 1.2.0"; - - if (version == NULL) - { - return -1; - } - - if (size_bytes < sizeof(my_version)) - { - return -1; - } - - memcpy(version, my_version, sizeof(my_version)); - return 0; -} - -WebRtc_Word16 WebRtcVad_AssignSize(int *size_in_bytes) -{ - *size_in_bytes = sizeof(VadInstT) * 2 / sizeof(WebRtc_Word16); - return 0; -} - -WebRtc_Word16 WebRtcVad_Assign(VadInst **vad_inst, void *vad_inst_addr) -{ - - if (vad_inst == NULL) - { - return -1; - } - - if (vad_inst_addr != NULL) - { - *vad_inst = (VadInst*)vad_inst_addr; - return 0; - } else - { - return -1; - } -} - -WebRtc_Word16 WebRtcVad_Create(VadInst **vad_inst) -{ - - VadInstT *vad_ptr = NULL; - - if (vad_inst == NULL) - { - return -1; - } - - *vad_inst = NULL; - - vad_ptr = (VadInstT *)malloc(sizeof(VadInstT)); - *vad_inst = (VadInst *)vad_ptr; - - if (vad_ptr == NULL) - { - return -1; - } - - vad_ptr->init_flag = 0; - - return 0; -} - -WebRtc_Word16 WebRtcVad_Free(VadInst *vad_inst) -{ - - if (vad_inst == NULL) - { - return -1; - } - - free(vad_inst); - return 0; -} - -WebRtc_Word16 WebRtcVad_Init(VadInst *vad_inst) -{ - short mode = 0; // Default high quality - - if (vad_inst == NULL) - { - return -1; - } - - return WebRtcVad_InitCore((VadInstT*)vad_inst, mode); -} - -WebRtc_Word16 WebRtcVad_set_mode(VadInst *vad_inst, WebRtc_Word16 mode) -{ - VadInstT* vad_ptr; - - if (vad_inst == NULL) - { - return -1; - } - - vad_ptr = (VadInstT*)vad_inst; - if (vad_ptr->init_flag != kInitCheck) - { - return -1; - } - - return WebRtcVad_set_mode_core((VadInstT*)vad_inst, mode); -} - -WebRtc_Word16 WebRtcVad_Process(VadInst *vad_inst, - WebRtc_Word16 fs, - WebRtc_Word16 *speech_frame, - WebRtc_Word16 frame_length) -{ - WebRtc_Word16 vad; - VadInstT* vad_ptr; - - if (vad_inst == NULL) - { - return -1; - } - - vad_ptr = (VadInstT*)vad_inst; - if (vad_ptr->init_flag != kInitCheck) - { - return -1; - } - - if (speech_frame == NULL) - { - return -1; - } - - if (fs == 32000) - { - if ((frame_length != 320) && (frame_length != 640) && (frame_length != 960)) - { - return -1; - } - vad = WebRtcVad_CalcVad32khz((VadInstT*)vad_inst, speech_frame, frame_length); - - } else if (fs == 16000) - { - if ((frame_length != 160) && (frame_length != 320) && (frame_length != 480)) - { - return -1; - } - vad = WebRtcVad_CalcVad16khz((VadInstT*)vad_inst, speech_frame, frame_length); - - } else if (fs == 8000) - { - if ((frame_length != 80) && (frame_length != 160) && (frame_length != 240)) - { - return -1; - } - vad = WebRtcVad_CalcVad8khz((VadInstT*)vad_inst, speech_frame, frame_length); - - } else - { - return -1; // Not a supported sampling frequency - } - - if (vad > 0) - { - return 1; - } else if (vad == 0) - { - return 0; - } else - { - return -1; - } -} |