aboutsummaryrefslogtreecommitdiff
path: root/src/common_audio/vad/vad_filterbank.c
blob: 63eef5b2bbb6f492f630472ebc86b00e3b4e11b7 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
/*
 *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */

/*
 * This file includes the implementation of the internal filterbank associated functions.
 * For function description, see vad_filterbank.h.
 */

#include "vad_filterbank.h"

#include "signal_processing_library.h"
#include "typedefs.h"
#include "vad_defines.h"

// Constant 160*log10(2) in Q9
static const int16_t kLogConst = 24660;

// Coefficients used by WebRtcVad_HpOutput, Q14
static const int16_t kHpZeroCoefs[3] = { 6631, -13262, 6631 };
static const int16_t kHpPoleCoefs[3] = { 16384, -7756, 5620 };

// Allpass filter coefficients, upper and lower, in Q15
// Upper: 0.64, Lower: 0.17
static const int16_t kAllPassCoefsQ15[2] = { 20972, 5571 };

// Adjustment for division with two in WebRtcVad_SplitFilter
static const int16_t kOffsetVector[6] = { 368, 368, 272, 176, 176, 176 };

void WebRtcVad_HpOutput(int16_t* in_vector,
                        int in_vector_length,
                        int16_t* filter_state,
                        int16_t* out_vector) {
  int i;
  int16_t* in_ptr = in_vector;
  int16_t* out_ptr = out_vector;
  int32_t tmp32 = 0;


  // The sum of the absolute values of the impulse response:
  // The zero/pole-filter has a max amplification of a single sample of: 1.4546
  // Impulse response: 0.4047 -0.6179 -0.0266  0.1993  0.1035  -0.0194
  // The all-zero section has a max amplification of a single sample of: 1.6189
  // Impulse response: 0.4047 -0.8094  0.4047  0       0        0
  // The all-pole section has a max amplification of a single sample of: 1.9931
  // Impulse response: 1.0000  0.4734 -0.1189 -0.2187 -0.0627   0.04532

  for (i = 0; i < in_vector_length; i++) {
    // all-zero section (filter coefficients in Q14)
    tmp32 = (int32_t) WEBRTC_SPL_MUL_16_16(kHpZeroCoefs[0], (*in_ptr));
    tmp32 += (int32_t) WEBRTC_SPL_MUL_16_16(kHpZeroCoefs[1], filter_state[0]);
    tmp32 += (int32_t) WEBRTC_SPL_MUL_16_16(kHpZeroCoefs[2],
                                            filter_state[1]);  // Q14
    filter_state[1] = filter_state[0];
    filter_state[0] = *in_ptr++;

    // all-pole section
    tmp32 -= (int32_t) WEBRTC_SPL_MUL_16_16(kHpPoleCoefs[1],
                                            filter_state[2]);  // Q14
    tmp32 -= (int32_t) WEBRTC_SPL_MUL_16_16(kHpPoleCoefs[2], filter_state[3]);
    filter_state[3] = filter_state[2];
    filter_state[2] = (int16_t) WEBRTC_SPL_RSHIFT_W32 (tmp32, 14);
    *out_ptr++ = filter_state[2];
  }
}

void WebRtcVad_Allpass(int16_t* in_vector,
                       int16_t filter_coefficients,
                       int vector_length,
                       int16_t* filter_state,
                       int16_t* out_vector) {
  // The filter can only cause overflow (in the w16 output variable)
  // if more than 4 consecutive input numbers are of maximum value and
  // has the the same sign as the impulse responses first taps.
  // First 6 taps of the impulse response: 0.6399 0.5905 -0.3779
  // 0.2418 -0.1547 0.0990

  int i;
  int16_t tmp16 = 0;
  int32_t tmp32 = 0, in32 = 0;
  int32_t state32 = WEBRTC_SPL_LSHIFT_W32((int32_t) (*filter_state), 16); // Q31

  for (i = 0; i < vector_length; i++) {
    tmp32 = state32 + WEBRTC_SPL_MUL_16_16(filter_coefficients, (*in_vector));
    tmp16 = (int16_t) WEBRTC_SPL_RSHIFT_W32(tmp32, 16);
    *out_vector++ = tmp16;
    in32 = WEBRTC_SPL_LSHIFT_W32(((int32_t) (*in_vector)), 14);
    state32 = in32 - WEBRTC_SPL_MUL_16_16(filter_coefficients, tmp16);
    state32 = WEBRTC_SPL_LSHIFT_W32(state32, 1);
    in_vector += 2;
  }

  *filter_state = (int16_t) WEBRTC_SPL_RSHIFT_W32(state32, 16);
}

void WebRtcVad_SplitFilter(int16_t* in_vector,
                           int in_vector_length,
                           int16_t* upper_state,
                           int16_t* lower_state,
                           int16_t* out_vector_hp,
                           int16_t* out_vector_lp) {
  int16_t tmp_out;
  int i;
  int half_length = WEBRTC_SPL_RSHIFT_W16(in_vector_length, 1);

  // All-pass filtering upper branch
  WebRtcVad_Allpass(&in_vector[0], kAllPassCoefsQ15[0], half_length,
                    upper_state, out_vector_hp);

  // All-pass filtering lower branch
  WebRtcVad_Allpass(&in_vector[1], kAllPassCoefsQ15[1], half_length,
                    lower_state, out_vector_lp);

  // Make LP and HP signals
  for (i = 0; i < half_length; i++) {
    tmp_out = *out_vector_hp;
    *out_vector_hp++ -= *out_vector_lp;
    *out_vector_lp++ += tmp_out;
  }
}

int16_t WebRtcVad_get_features(VadInstT* inst,
                               int16_t* in_vector,
                               int frame_size,
                               int16_t* out_vector) {
  int16_t power = 0;
  // We expect |frame_size| to be 80, 160 or 240 samples, which corresponds to
  // 10, 20 or 30 ms in 8 kHz. Therefore, the intermediate downsampled data will
  // have at most 120 samples after the first split and at most 60 samples after
  // the second split.
  int16_t hp_120[120], lp_120[120];
  int16_t hp_60[60], lp_60[60];
  // Initialize variables for the first SplitFilter().
  int length = frame_size;
  int frequency_band = 0;
  int16_t* in_ptr = in_vector;
  int16_t* hp_out_ptr = hp_120;
  int16_t* lp_out_ptr = lp_120;

  // Split at 2000 Hz and downsample
  WebRtcVad_SplitFilter(in_ptr, length, &inst->upper_state[frequency_band],
                        &inst->lower_state[frequency_band], hp_out_ptr,
                        lp_out_ptr);

  // Split at 3000 Hz and downsample
  frequency_band = 1;
  in_ptr = hp_120;
  hp_out_ptr = hp_60;
  lp_out_ptr = lp_60;
  length = WEBRTC_SPL_RSHIFT_W16(frame_size, 1);

  WebRtcVad_SplitFilter(in_ptr, length, &inst->upper_state[frequency_band],
                        &inst->lower_state[frequency_band], hp_out_ptr,
                        lp_out_ptr);

  // Energy in 3000 Hz - 4000 Hz
  length = WEBRTC_SPL_RSHIFT_W16(length, 1);
  WebRtcVad_LogOfEnergy(hp_60, length, kOffsetVector[5], &power,
                        &out_vector[5]);

  // Energy in 2000 Hz - 3000 Hz
  WebRtcVad_LogOfEnergy(lp_60, length, kOffsetVector[4], &power,
                        &out_vector[4]);

  // Split at 1000 Hz and downsample
  frequency_band = 2;
  in_ptr = lp_120;
  hp_out_ptr = hp_60;
  lp_out_ptr = lp_60;
  length = WEBRTC_SPL_RSHIFT_W16(frame_size, 1);
  WebRtcVad_SplitFilter(in_ptr, length, &inst->upper_state[frequency_band],
                        &inst->lower_state[frequency_band], hp_out_ptr,
                        lp_out_ptr);

  // Energy in 1000 Hz - 2000 Hz
  length = WEBRTC_SPL_RSHIFT_W16(length, 1);
  WebRtcVad_LogOfEnergy(hp_60, length, kOffsetVector[3], &power,
                        &out_vector[3]);

  // Split at 500 Hz
  frequency_band = 3;
  in_ptr = lp_60;
  hp_out_ptr = hp_120;
  lp_out_ptr = lp_120;

  WebRtcVad_SplitFilter(in_ptr, length, &inst->upper_state[frequency_band],
                        &inst->lower_state[frequency_band], hp_out_ptr,
                        lp_out_ptr);

  // Energy in 500 Hz - 1000 Hz
  length = WEBRTC_SPL_RSHIFT_W16(length, 1);
  WebRtcVad_LogOfEnergy(hp_120, length, kOffsetVector[2], &power,
                        &out_vector[2]);

  // Split at 250 Hz
  frequency_band = 4;
  in_ptr = lp_120;
  hp_out_ptr = hp_60;
  lp_out_ptr = lp_60;

  WebRtcVad_SplitFilter(in_ptr, length, &inst->upper_state[frequency_band],
                        &inst->lower_state[frequency_band], hp_out_ptr,
                        lp_out_ptr);

  // Energy in 250 Hz - 500 Hz
  length = WEBRTC_SPL_RSHIFT_W16(length, 1);
  WebRtcVad_LogOfEnergy(hp_60, length, kOffsetVector[1], &power,
                        &out_vector[1]);

  // Remove DC and LFs
  WebRtcVad_HpOutput(lp_60, length, inst->hp_filter_state, hp_120);

  // Power in 80 Hz - 250 Hz
  WebRtcVad_LogOfEnergy(hp_120, length, kOffsetVector[0], &power,
                        &out_vector[0]);

  return power;
}

void WebRtcVad_LogOfEnergy(int16_t* vector,
                           int vector_length,
                           int16_t offset,
                           int16_t* power,
                           int16_t* log_energy) {
  int shfts = 0, shfts2 = 0;
  int16_t energy_s16 = 0;
  int16_t zeros = 0, frac = 0, log2 = 0;
  int32_t energy = WebRtcSpl_Energy(vector, vector_length, &shfts);

  if (energy > 0) {

    shfts2 = 16 - WebRtcSpl_NormW32(energy);
    shfts += shfts2;
    // "shfts" is the total number of right shifts that has been done to
    // energy_s16.
    energy_s16 = (int16_t) WEBRTC_SPL_SHIFT_W32(energy, -shfts2);

    // Find:
    // 160*log10(energy_s16*2^shfts) = 160*log10(2)*log2(energy_s16*2^shfts) =
    // 160*log10(2)*(log2(energy_s16) + log2(2^shfts)) =
    // 160*log10(2)*(log2(energy_s16) + shfts)

    zeros = WebRtcSpl_NormU32(energy_s16);
    frac = (int16_t) (((uint32_t) ((int32_t) (energy_s16) << zeros)
        & 0x7FFFFFFF) >> 21);
    log2 = (int16_t) (((31 - zeros) << 10) + frac);

    *log_energy = (int16_t) WEBRTC_SPL_MUL_16_16_RSFT(kLogConst, log2, 19)
        + (int16_t) WEBRTC_SPL_MUL_16_16_RSFT(shfts, kLogConst, 9);

    if (*log_energy < 0) {
      *log_energy = 0;
    }
  } else {
    *log_energy = 0;
    shfts = -15;
    energy_s16 = 0;
  }

  *log_energy += offset;

  // Total power in frame
  if (*power <= MIN_ENERGY) {
    if (shfts > 0) {
      *power += MIN_ENERGY + 1;
    } else if (WEBRTC_SPL_SHIFT_W16(energy_s16, shfts) > MIN_ENERGY) {
      *power += MIN_ENERGY + 1;
    } else {
      *power += WEBRTC_SPL_SHIFT_W16(energy_s16, shfts);
    }
  }
}