diff options
Diffstat (limited to 'src/common_audio/vad/vad_filterbank.h')
-rw-r--r-- | src/common_audio/vad/vad_filterbank.h | 144 |
1 files changed, 144 insertions, 0 deletions
diff --git a/src/common_audio/vad/vad_filterbank.h b/src/common_audio/vad/vad_filterbank.h new file mode 100644 index 0000000000..1285c47dda --- /dev/null +++ b/src/common_audio/vad/vad_filterbank.h @@ -0,0 +1,144 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/* + * This header file includes the description of the internal VAD call + * WebRtcVad_GaussianProbability. + */ + +#ifndef WEBRTC_COMMON_AUDIO_VAD_VAD_FILTERBANK_H_ +#define WEBRTC_COMMON_AUDIO_VAD_VAD_FILTERBANK_H_ + +#include "typedefs.h" +#include "vad_core.h" + +// TODO(bjornv): Move local functions to vad_filterbank.c and make static. +/**************************************************************************** + * WebRtcVad_HpOutput(...) + * + * This function removes DC from the lowest frequency band + * + * Input: + * - in_vector : Samples in the frequency interval 0 - 250 Hz + * - in_vector_length : Length of input and output vector + * - filter_state : Current state of the filter + * + * Output: + * - out_vector : Samples in the frequency interval 80 - 250 Hz + * - filter_state : Updated state of the filter + * + */ +void WebRtcVad_HpOutput(int16_t* in_vector, + int in_vector_length, + int16_t* filter_state, + int16_t* out_vector); + +/**************************************************************************** + * WebRtcVad_Allpass(...) + * + * This function is used when before splitting a speech file into + * different frequency bands + * + * Note! Do NOT let the arrays in_vector and out_vector correspond to the same address. + * + * Input: + * - in_vector : (Q0) + * - filter_coefficients : (Q15) + * - vector_length : Length of input and output vector + * - filter_state : Current state of the filter (Q(-1)) + * + * Output: + * - out_vector : Output speech signal (Q(-1)) + * - filter_state : Updated state of the filter (Q(-1)) + * + */ +void WebRtcVad_Allpass(int16_t* in_vector, + int16_t filter_coefficients, + int vector_length, + int16_t* filter_state, + int16_t* outw16); + +/**************************************************************************** + * WebRtcVad_SplitFilter(...) + * + * This function is used when before splitting a speech file into + * different frequency bands + * + * Input: + * - in_vector : Input signal to be split into two frequency bands. + * - upper_state : Current state of the upper filter + * - lower_state : Current state of the lower filter + * - in_vector_length : Length of input vector + * + * Output: + * - out_vector_hp : Upper half of the spectrum + * - out_vector_lp : Lower half of the spectrum + * - upper_state : Updated state of the upper filter + * - lower_state : Updated state of the lower filter + * + */ +void WebRtcVad_SplitFilter(int16_t* in_vector, + int in_vector_length, + int16_t* upper_state, + int16_t* lower_state, + int16_t* out_vector_hp, + int16_t* out_vector_lp); + +/**************************************************************************** + * WebRtcVad_get_features(...) + * + * This function is used to get the logarithm of the power of each of the + * 6 frequency bands used by the VAD: + * 80 Hz - 250 Hz + * 250 Hz - 500 Hz + * 500 Hz - 1000 Hz + * 1000 Hz - 2000 Hz + * 2000 Hz - 3000 Hz + * 3000 Hz - 4000 Hz + * + * Input: + * - inst : Pointer to VAD instance + * - in_vector : Input speech signal + * - frame_size : Frame size, in number of samples + * + * Output: + * - out_vector : 10*log10(power in each freq. band), Q4 + * + * Return: total power in the signal (NOTE! This value is not exact since it + * is only used in a comparison. + */ +int16_t WebRtcVad_get_features(VadInstT* inst, + int16_t* in_vector, + int frame_size, + int16_t* out_vector); + +/**************************************************************************** + * WebRtcVad_LogOfEnergy(...) + * + * This function is used to get the logarithm of the power of one frequency band. + * + * Input: + * - vector : Input speech samples for one frequency band + * - offset : Offset value for the current frequency band + * - vector_length : Length of input vector + * + * Output: + * - log_energy : 10*log10(energy); + * - power : Update total power in speech frame. NOTE! This value + * is not exact since it is only used in a comparison. + * + */ +void WebRtcVad_LogOfEnergy(int16_t* vector, + int vector_length, + int16_t offset, + int16_t* power, + int16_t* log_energy); + +#endif // WEBRTC_COMMON_AUDIO_VAD_VAD_FILTERBANK_H_ |