aboutsummaryrefslogtreecommitdiff
path: root/src/common_audio/vad/vad_filterbank.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/common_audio/vad/vad_filterbank.h')
-rw-r--r--src/common_audio/vad/vad_filterbank.h144
1 files changed, 144 insertions, 0 deletions
diff --git a/src/common_audio/vad/vad_filterbank.h b/src/common_audio/vad/vad_filterbank.h
new file mode 100644
index 0000000000..1285c47dda
--- /dev/null
+++ b/src/common_audio/vad/vad_filterbank.h
@@ -0,0 +1,144 @@
+/*
+ * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+/*
+ * This header file includes the description of the internal VAD call
+ * WebRtcVad_GaussianProbability.
+ */
+
+#ifndef WEBRTC_COMMON_AUDIO_VAD_VAD_FILTERBANK_H_
+#define WEBRTC_COMMON_AUDIO_VAD_VAD_FILTERBANK_H_
+
+#include "typedefs.h"
+#include "vad_core.h"
+
+// TODO(bjornv): Move local functions to vad_filterbank.c and make static.
+/****************************************************************************
+ * WebRtcVad_HpOutput(...)
+ *
+ * This function removes DC from the lowest frequency band
+ *
+ * Input:
+ * - in_vector : Samples in the frequency interval 0 - 250 Hz
+ * - in_vector_length : Length of input and output vector
+ * - filter_state : Current state of the filter
+ *
+ * Output:
+ * - out_vector : Samples in the frequency interval 80 - 250 Hz
+ * - filter_state : Updated state of the filter
+ *
+ */
+void WebRtcVad_HpOutput(int16_t* in_vector,
+ int in_vector_length,
+ int16_t* filter_state,
+ int16_t* out_vector);
+
+/****************************************************************************
+ * WebRtcVad_Allpass(...)
+ *
+ * This function is used when before splitting a speech file into
+ * different frequency bands
+ *
+ * Note! Do NOT let the arrays in_vector and out_vector correspond to the same address.
+ *
+ * Input:
+ * - in_vector : (Q0)
+ * - filter_coefficients : (Q15)
+ * - vector_length : Length of input and output vector
+ * - filter_state : Current state of the filter (Q(-1))
+ *
+ * Output:
+ * - out_vector : Output speech signal (Q(-1))
+ * - filter_state : Updated state of the filter (Q(-1))
+ *
+ */
+void WebRtcVad_Allpass(int16_t* in_vector,
+ int16_t filter_coefficients,
+ int vector_length,
+ int16_t* filter_state,
+ int16_t* outw16);
+
+/****************************************************************************
+ * WebRtcVad_SplitFilter(...)
+ *
+ * This function is used when before splitting a speech file into
+ * different frequency bands
+ *
+ * Input:
+ * - in_vector : Input signal to be split into two frequency bands.
+ * - upper_state : Current state of the upper filter
+ * - lower_state : Current state of the lower filter
+ * - in_vector_length : Length of input vector
+ *
+ * Output:
+ * - out_vector_hp : Upper half of the spectrum
+ * - out_vector_lp : Lower half of the spectrum
+ * - upper_state : Updated state of the upper filter
+ * - lower_state : Updated state of the lower filter
+ *
+ */
+void WebRtcVad_SplitFilter(int16_t* in_vector,
+ int in_vector_length,
+ int16_t* upper_state,
+ int16_t* lower_state,
+ int16_t* out_vector_hp,
+ int16_t* out_vector_lp);
+
+/****************************************************************************
+ * WebRtcVad_get_features(...)
+ *
+ * This function is used to get the logarithm of the power of each of the
+ * 6 frequency bands used by the VAD:
+ * 80 Hz - 250 Hz
+ * 250 Hz - 500 Hz
+ * 500 Hz - 1000 Hz
+ * 1000 Hz - 2000 Hz
+ * 2000 Hz - 3000 Hz
+ * 3000 Hz - 4000 Hz
+ *
+ * Input:
+ * - inst : Pointer to VAD instance
+ * - in_vector : Input speech signal
+ * - frame_size : Frame size, in number of samples
+ *
+ * Output:
+ * - out_vector : 10*log10(power in each freq. band), Q4
+ *
+ * Return: total power in the signal (NOTE! This value is not exact since it
+ * is only used in a comparison.
+ */
+int16_t WebRtcVad_get_features(VadInstT* inst,
+ int16_t* in_vector,
+ int frame_size,
+ int16_t* out_vector);
+
+/****************************************************************************
+ * WebRtcVad_LogOfEnergy(...)
+ *
+ * This function is used to get the logarithm of the power of one frequency band.
+ *
+ * Input:
+ * - vector : Input speech samples for one frequency band
+ * - offset : Offset value for the current frequency band
+ * - vector_length : Length of input vector
+ *
+ * Output:
+ * - log_energy : 10*log10(energy);
+ * - power : Update total power in speech frame. NOTE! This value
+ * is not exact since it is only used in a comparison.
+ *
+ */
+void WebRtcVad_LogOfEnergy(int16_t* vector,
+ int vector_length,
+ int16_t offset,
+ int16_t* power,
+ int16_t* log_energy);
+
+#endif // WEBRTC_COMMON_AUDIO_VAD_VAD_FILTERBANK_H_