aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--modules/audio_processing/agc2/rnn_vad/BUILD.gn8
-rw-r--r--modules/audio_processing/agc2/rnn_vad/DEPS3
-rw-r--r--modules/audio_processing/agc2/rnn_vad/common.h2
-rw-r--r--modules/audio_processing/agc2/rnn_vad/rnn.cc227
-rw-r--r--modules/audio_processing/agc2/rnn_vad/rnn.h116
-rw-r--r--modules/audio_processing/agc2/rnn_vad/rnn_unittest.cc180
-rw-r--r--modules/audio_processing/agc2/rnn_vad/test_utils.cc15
-rw-r--r--modules/audio_processing/agc2/rnn_vad/test_utils.h6
-rw-r--r--resources/audio_processing/agc2/rnn_vad/sil_features.dat.sha11
-rw-r--r--resources/audio_processing/agc2/rnn_vad/vad_prob.dat.sha11
-rwxr-xr-xtools_webrtc/libs/generate_licenses.py1
11 files changed, 560 insertions, 0 deletions
diff --git a/modules/audio_processing/agc2/rnn_vad/BUILD.gn b/modules/audio_processing/agc2/rnn_vad/BUILD.gn
index e05dcab604..395e5224ee 100644
--- a/modules/audio_processing/agc2/rnn_vad/BUILD.gn
+++ b/modules/audio_processing/agc2/rnn_vad/BUILD.gn
@@ -25,12 +25,15 @@ source_set("lib") {
"pitch_search_internal.cc",
"pitch_search_internal.h",
"ring_buffer.h",
+ "rnn.cc",
+ "rnn.h",
"sequence_buffer.h",
"symmetric_matrix_buffer.h",
]
deps = [
"../../../../api:array_view",
"../../../../rtc_base:checks",
+ "//third_party/rnnoise:rnn_vad",
]
}
@@ -53,6 +56,8 @@ if (rtc_include_tests) {
unittest_resources = [
"../../../../resources/audio_processing/agc2/rnn_vad/pitch_buf_24k.dat",
"../../../../resources/audio_processing/agc2/rnn_vad/pitch_lp_res.dat",
+ "../../../../resources/audio_processing/agc2/rnn_vad/sil_features.dat",
+ "../../../../resources/audio_processing/agc2/rnn_vad/vad_prob.dat",
]
if (is_ios) {
@@ -72,6 +77,7 @@ if (rtc_include_tests) {
"pitch_search_internal_unittest.cc",
"pitch_search_unittest.cc",
"ring_buffer_unittest.cc",
+ "rnn_unittest.cc",
"sequence_buffer_unittest.cc",
"symmetric_matrix_buffer_unittest.cc",
]
@@ -79,7 +85,9 @@ if (rtc_include_tests) {
":lib",
":lib_test",
"../../../../api:array_view",
+ "../../../../rtc_base:checks",
"../../../../test:test_support",
+ "//third_party/rnnoise:rnn_vad",
]
data = unittest_resources
if (is_ios) {
diff --git a/modules/audio_processing/agc2/rnn_vad/DEPS b/modules/audio_processing/agc2/rnn_vad/DEPS
new file mode 100644
index 0000000000..773c2d7edd
--- /dev/null
+++ b/modules/audio_processing/agc2/rnn_vad/DEPS
@@ -0,0 +1,3 @@
+include_rules = [
+ "+third_party/rnnoise",
+]
diff --git a/modules/audio_processing/agc2/rnn_vad/common.h b/modules/audio_processing/agc2/rnn_vad/common.h
index 252bf8472c..3af0719c16 100644
--- a/modules/audio_processing/agc2/rnn_vad/common.h
+++ b/modules/audio_processing/agc2/rnn_vad/common.h
@@ -43,6 +43,8 @@ constexpr size_t kMaxPitch12kHz = kMaxPitch24kHz / 2;
constexpr size_t kMinPitch48kHz = kMinPitch24kHz * 2;
constexpr size_t kMaxPitch48kHz = kMaxPitch24kHz * 2;
+constexpr size_t kFeatureVectorSize = 42;
+
} // namespace rnn_vad
} // namespace webrtc
diff --git a/modules/audio_processing/agc2/rnn_vad/rnn.cc b/modules/audio_processing/agc2/rnn_vad/rnn.cc
new file mode 100644
index 0000000000..f88fb75e71
--- /dev/null
+++ b/modules/audio_processing/agc2/rnn_vad/rnn.cc
@@ -0,0 +1,227 @@
+/*
+ * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/audio_processing/agc2/rnn_vad/rnn.h"
+
+#include <algorithm>
+#include <array>
+#include <cmath>
+
+#include "rtc_base/checks.h"
+#include "third_party/rnnoise/src/rnn_activations.h"
+#include "third_party/rnnoise/src/rnn_vad_weights.h"
+
+namespace webrtc {
+namespace rnn_vad {
+
+using rnnoise::kWeightsScale;
+
+using rnnoise::kInputLayerInputSize;
+static_assert(kFeatureVectorSize == kInputLayerInputSize, "");
+using rnnoise::kInputDenseWeights;
+using rnnoise::kInputDenseBias;
+using rnnoise::kInputLayerOutputSize;
+static_assert(kInputLayerOutputSize <= kFullyConnectedLayersMaxUnits,
+ "Increase kFullyConnectedLayersMaxUnits.");
+
+using rnnoise::kHiddenGruRecurrentWeights;
+using rnnoise::kHiddenGruWeights;
+using rnnoise::kHiddenGruBias;
+using rnnoise::kHiddenLayerOutputSize;
+static_assert(kHiddenLayerOutputSize <= kRecurrentLayersMaxUnits,
+ "Increase kRecurrentLayersMaxUnits.");
+
+using rnnoise::kOutputDenseWeights;
+using rnnoise::kOutputDenseBias;
+using rnnoise::kOutputLayerOutputSize;
+static_assert(kOutputLayerOutputSize <= kFullyConnectedLayersMaxUnits,
+ "Increase kFullyConnectedLayersMaxUnits.");
+
+using rnnoise::RectifiedLinearUnit;
+using rnnoise::SigmoidApproximated;
+using rnnoise::TansigApproximated;
+
+FullyConnectedLayer::FullyConnectedLayer(
+ const size_t input_size,
+ const size_t output_size,
+ const rtc::ArrayView<const int8_t> bias,
+ const rtc::ArrayView<const int8_t> weights,
+ float (*const activation_function)(float))
+ : input_size_(input_size),
+ output_size_(output_size),
+ bias_(bias),
+ weights_(weights),
+ activation_function_(activation_function) {
+ RTC_DCHECK_LE(output_size_, kFullyConnectedLayersMaxUnits)
+ << "Static over-allocation of fully-connected layers output vectors is "
+ "not sufficient.";
+ RTC_DCHECK_EQ(output_size_, bias_.size())
+ << "Mismatching output size and bias terms array size.";
+ RTC_DCHECK_EQ(input_size_ * output_size_, weights_.size())
+ << "Mismatching input-output size and weight coefficients array size.";
+}
+
+FullyConnectedLayer::~FullyConnectedLayer() = default;
+
+rtc::ArrayView<const float> FullyConnectedLayer::GetOutput() const {
+ return rtc::ArrayView<const float>(output_.data(), output_size_);
+}
+
+void FullyConnectedLayer::ComputeOutput(rtc::ArrayView<const float> input) {
+ // TODO(bugs.chromium.org/9076): Optimize using SSE/AVX fused multiply-add
+ // operations.
+ for (size_t o = 0; o < output_size_; ++o) {
+ output_[o] = bias_[o];
+ // TODO(bugs.chromium.org/9076): Benchmark how different layouts for
+ // |weights_| change the performance across different platforms.
+ for (size_t i = 0; i < input_size_; ++i) {
+ output_[o] += input[i] * weights_[i * output_size_ + o];
+ }
+ output_[o] = (*activation_function_)(kWeightsScale * output_[o]);
+ }
+}
+
+GatedRecurrentLayer::GatedRecurrentLayer(
+ const size_t input_size,
+ const size_t output_size,
+ const rtc::ArrayView<const int8_t> bias,
+ const rtc::ArrayView<const int8_t> weights,
+ const rtc::ArrayView<const int8_t> recurrent_weights,
+ float (*const activation_function)(float))
+ : input_size_(input_size),
+ output_size_(output_size),
+ bias_(bias),
+ weights_(weights),
+ recurrent_weights_(recurrent_weights),
+ activation_function_(activation_function) {
+ RTC_DCHECK_LE(output_size_, kRecurrentLayersMaxUnits)
+ << "Static over-allocation of recurrent layers state vectors is not "
+ << "sufficient.";
+ RTC_DCHECK_EQ(3 * output_size_, bias_.size())
+ << "Mismatching output size and bias terms array size.";
+ RTC_DCHECK_EQ(3 * input_size_ * output_size_, weights_.size())
+ << "Mismatching input-output size and weight coefficients array size.";
+ RTC_DCHECK_EQ(3 * input_size_ * output_size_, recurrent_weights_.size())
+ << "Mismatching input-output size and recurrent weight coefficients array"
+ << " size.";
+ Reset();
+}
+
+GatedRecurrentLayer::~GatedRecurrentLayer() = default;
+
+rtc::ArrayView<const float> GatedRecurrentLayer::GetOutput() const {
+ return rtc::ArrayView<const float>(state_.data(), output_size_);
+}
+
+void GatedRecurrentLayer::Reset() {
+ state_.fill(0.f);
+}
+
+void GatedRecurrentLayer::ComputeOutput(rtc::ArrayView<const float> input) {
+ // TODO(bugs.chromium.org/9076): Optimize using SSE/AVX fused multiply-add
+ // operations.
+ // Stride and offset used to read parameter arrays.
+ const size_t stride = 3 * output_size_;
+ size_t offset = 0;
+
+ // Compute update gates.
+ std::array<float, kRecurrentLayersMaxUnits> update;
+ for (size_t o = 0; o < output_size_; ++o) {
+ update[o] = bias_[o];
+ // TODO(bugs.chromium.org/9076): Benchmark how different layouts for
+ // |weights_| and |recurrent_weights_| change the performance across
+ // different platforms.
+ for (size_t i = 0; i < input_size_; ++i) { // Add input.
+ update[o] += input[i] * weights_[i * stride + o];
+ }
+ for (size_t s = 0; s < output_size_; ++s) {
+ update[o] += state_[s] * recurrent_weights_[s * stride + o];
+ } // Add state.
+ update[o] = SigmoidApproximated(kWeightsScale * update[o]);
+ }
+
+ // Compute reset gates.
+ offset += output_size_;
+ std::array<float, kRecurrentLayersMaxUnits> reset;
+ for (size_t o = 0; o < output_size_; ++o) {
+ reset[o] = bias_[offset + o];
+ for (size_t i = 0; i < input_size_; ++i) { // Add input.
+ reset[o] += input[i] * weights_[offset + i * stride + o];
+ }
+ for (size_t s = 0; s < output_size_; ++s) { // Add state.
+ reset[o] += state_[s] * recurrent_weights_[offset + s * stride + o];
+ }
+ reset[o] = SigmoidApproximated(kWeightsScale * reset[o]);
+ }
+
+ // Compute output.
+ offset += output_size_;
+ std::array<float, kRecurrentLayersMaxUnits> output;
+ for (size_t o = 0; o < output_size_; ++o) {
+ output[o] = bias_[offset + o];
+ for (size_t i = 0; i < input_size_; ++i) { // Add input.
+ output[o] += input[i] * weights_[offset + i * stride + o];
+ }
+ for (size_t s = 0; s < output_size_;
+ ++s) { // Add state through reset gates.
+ output[o] +=
+ state_[s] * recurrent_weights_[offset + s * stride + o] * reset[s];
+ }
+ output[o] = (*activation_function_)(kWeightsScale * output[o]);
+ // Update output through the update gates.
+ output[o] = update[o] * state_[o] + (1.f - update[o]) * output[o];
+ }
+
+ // Update the state. Not done in the previous loop since that would pollute
+ // the current state and lead to incorrect output values.
+ std::copy(output.begin(), output.end(), state_.begin());
+}
+
+RnnBasedVad::RnnBasedVad()
+ : input_layer_(kInputLayerInputSize,
+ kInputLayerOutputSize,
+ kInputDenseBias,
+ kInputDenseWeights,
+ TansigApproximated),
+ hidden_layer_(kInputLayerOutputSize,
+ kHiddenLayerOutputSize,
+ kHiddenGruBias,
+ kHiddenGruWeights,
+ kHiddenGruRecurrentWeights,
+ RectifiedLinearUnit),
+ output_layer_(kHiddenLayerOutputSize,
+ kOutputLayerOutputSize,
+ kOutputDenseBias,
+ kOutputDenseWeights,
+ SigmoidApproximated) {
+ // Input-output chaining size checks.
+ RTC_DCHECK_EQ(input_layer_.output_size(), hidden_layer_.input_size())
+ << "The input and the hidden layers sizes do not match.";
+ RTC_DCHECK_EQ(hidden_layer_.output_size(), output_layer_.input_size())
+ << "The hidden and the output layers sizes do not match.";
+}
+
+RnnBasedVad::~RnnBasedVad() = default;
+
+void RnnBasedVad::Reset() {
+ hidden_layer_.Reset();
+}
+
+void RnnBasedVad::ComputeVadProbability(
+ rtc::ArrayView<const float, kFeatureVectorSize> feature_vector) {
+ input_layer_.ComputeOutput(feature_vector);
+ hidden_layer_.ComputeOutput(input_layer_.GetOutput());
+ output_layer_.ComputeOutput(hidden_layer_.GetOutput());
+ const auto vad_output = output_layer_.GetOutput();
+ vad_probability_ = vad_output[0];
+}
+
+} // namespace rnn_vad
+} // namespace webrtc
diff --git a/modules/audio_processing/agc2/rnn_vad/rnn.h b/modules/audio_processing/agc2/rnn_vad/rnn.h
new file mode 100644
index 0000000000..81ab87ed48
--- /dev/null
+++ b/modules/audio_processing/agc2/rnn_vad/rnn.h
@@ -0,0 +1,116 @@
+/*
+ * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_RNN_H_
+#define MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_RNN_H_
+
+#include <array>
+
+#include "api/array_view.h"
+#include "modules/audio_processing/agc2/rnn_vad/common.h"
+
+namespace webrtc {
+namespace rnn_vad {
+
+// Maximum number of units for a fully-connected layer. This value is used to
+// over-allocate space for fully-connected layers output vectors (implemented as
+// std::array). The value should equal the number of units of the largest
+// fully-connected layer.
+constexpr size_t kFullyConnectedLayersMaxUnits = 24;
+
+// Maximum number of units for a recurrent layer. This value is used to
+// over-allocate space for recurrent layers state vectors (implemented as
+// std::array). The value should equal the number of units of the largest
+// recurrent layer.
+constexpr size_t kRecurrentLayersMaxUnits = 24;
+
+// Fully-connected layer.
+class FullyConnectedLayer {
+ public:
+ FullyConnectedLayer(const size_t input_size,
+ const size_t output_size,
+ const rtc::ArrayView<const int8_t> bias,
+ const rtc::ArrayView<const int8_t> weights,
+ float (*const activation_function)(float));
+ FullyConnectedLayer(const FullyConnectedLayer&) = delete;
+ FullyConnectedLayer& operator=(const FullyConnectedLayer&) = delete;
+ ~FullyConnectedLayer();
+ size_t input_size() const { return input_size_; }
+ size_t output_size() const { return output_size_; }
+ rtc::ArrayView<const float> GetOutput() const;
+ // Computes the fully-connected layer output.
+ void ComputeOutput(rtc::ArrayView<const float> input);
+
+ private:
+ const size_t input_size_;
+ const size_t output_size_;
+ const rtc::ArrayView<const int8_t> bias_;
+ const rtc::ArrayView<const int8_t> weights_;
+ float (*const activation_function_)(float);
+ // The output vector of a recurrent layer has length equal to |output_size_|.
+ // However, for efficiency, over-allocation is used.
+ std::array<float, kFullyConnectedLayersMaxUnits> output_;
+};
+
+// Recurrent layer with gated recurrent units (GRUs).
+class GatedRecurrentLayer {
+ public:
+ GatedRecurrentLayer(const size_t input_size,
+ const size_t output_size,
+ const rtc::ArrayView<const int8_t> bias,
+ const rtc::ArrayView<const int8_t> weights,
+ const rtc::ArrayView<const int8_t> recurrent_weights,
+ float (*const activation_function)(float));
+ GatedRecurrentLayer(const GatedRecurrentLayer&) = delete;
+ GatedRecurrentLayer& operator=(const GatedRecurrentLayer&) = delete;
+ ~GatedRecurrentLayer();
+ size_t input_size() const { return input_size_; }
+ size_t output_size() const { return output_size_; }
+ rtc::ArrayView<const float> GetOutput() const;
+ void Reset();
+ // Computes the recurrent layer output and updates the status.
+ void ComputeOutput(rtc::ArrayView<const float> input);
+
+ private:
+ const size_t input_size_;
+ const size_t output_size_;
+ const rtc::ArrayView<const int8_t> bias_;
+ const rtc::ArrayView<const int8_t> weights_;
+ const rtc::ArrayView<const int8_t> recurrent_weights_;
+ float (*const activation_function_)(float);
+ // The state vector of a recurrent layer has length equal to |output_size_|.
+ // However, to avoid dynamic allocation, over-allocation is used.
+ std::array<float, kRecurrentLayersMaxUnits> state_;
+};
+
+// Recurrent network based VAD.
+class RnnBasedVad {
+ public:
+ RnnBasedVad();
+ RnnBasedVad(const RnnBasedVad&) = delete;
+ RnnBasedVad& operator=(const RnnBasedVad&) = delete;
+ ~RnnBasedVad();
+ float vad_probability() const { return vad_probability_; }
+ void Reset();
+ // Compute and returns the probability of voice (range: [0.0, 1.0]).
+ void ComputeVadProbability(
+ rtc::ArrayView<const float, kFeatureVectorSize> feature_vector);
+
+ private:
+ FullyConnectedLayer input_layer_;
+ GatedRecurrentLayer hidden_layer_;
+ FullyConnectedLayer output_layer_;
+ float vad_probability_;
+};
+
+} // namespace rnn_vad
+} // namespace webrtc
+
+#endif // MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_RNN_H_
diff --git a/modules/audio_processing/agc2/rnn_vad/rnn_unittest.cc b/modules/audio_processing/agc2/rnn_vad/rnn_unittest.cc
new file mode 100644
index 0000000000..d774c6d557
--- /dev/null
+++ b/modules/audio_processing/agc2/rnn_vad/rnn_unittest.cc
@@ -0,0 +1,180 @@
+/*
+ * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <algorithm>
+#include <array>
+#include <vector>
+
+#include "modules/audio_processing/agc2/rnn_vad/rnn.h"
+#include "modules/audio_processing/agc2/rnn_vad/test_utils.h"
+#include "rtc_base/checks.h"
+#include "test/gtest.h"
+#include "third_party/rnnoise/src/rnn_activations.h"
+#include "third_party/rnnoise/src/rnn_vad_weights.h"
+
+namespace webrtc {
+namespace rnn_vad {
+namespace test {
+
+using rnnoise::RectifiedLinearUnit;
+using rnnoise::SigmoidApproximated;
+
+namespace {
+
+void TestFullyConnectedLayer(FullyConnectedLayer* fc,
+ rtc::ArrayView<const float> input_vector,
+ const float expected_output) {
+ RTC_CHECK(fc);
+ fc->ComputeOutput(input_vector);
+ const auto output = fc->GetOutput();
+ EXPECT_NEAR(expected_output, output[0], 3e-6f);
+}
+
+void TestGatedRecurrentLayer(
+ GatedRecurrentLayer* gru,
+ rtc::ArrayView<const float> input_sequence,
+ rtc::ArrayView<const float> expected_output_sequence) {
+ RTC_CHECK(gru);
+ auto gru_output_view = gru->GetOutput();
+ const size_t input_sequence_length =
+ rtc::CheckedDivExact(input_sequence.size(), gru->input_size());
+ const size_t output_sequence_length =
+ rtc::CheckedDivExact(expected_output_sequence.size(), gru->output_size());
+ ASSERT_EQ(input_sequence_length, output_sequence_length)
+ << "The test data length is invalid.";
+ // Feed the GRU layer and check the output at every step.
+ gru->Reset();
+ for (size_t i = 0; i < input_sequence_length; ++i) {
+ SCOPED_TRACE(i);
+ gru->ComputeOutput(
+ input_sequence.subview(i * gru->input_size(), gru->input_size()));
+ const auto expected_output = expected_output_sequence.subview(
+ i * gru->output_size(), gru->output_size());
+ ExpectNearAbsolute(expected_output, gru_output_view, 3e-6f);
+ }
+}
+
+} // namespace
+
+// Bit-exactness check for fully connected layers.
+TEST(RnnVadTest, CheckFullyConnectedLayerOutput) {
+ const std::array<int8_t, 1> bias = {-50};
+ const std::array<int8_t, 24> weights = {
+ 127, 127, 127, 127, 127, 20, 127, -126, -126, -54, 14, 125,
+ -126, -126, 127, -125, -126, 127, -127, -127, -57, -30, 127, 80};
+ FullyConnectedLayer fc(24, 1, bias, weights, SigmoidApproximated);
+ // Test on different inputs.
+ {
+ const std::array<float, 24> input_vector = {
+ 0.f, 0.f, 0.f, 0.f, 0.f,
+ 0.f, 0.215833917f, 0.290601075f, 0.238759011f, 0.244751841f,
+ 0.f, 0.0461241305f, 0.106401242f, 0.223070428f, 0.630603909f,
+ 0.690453172f, 0.f, 0.387645692f, 0.166913897f, 0.f,
+ 0.0327451192f, 0.f, 0.136149868f, 0.446351469f};
+ TestFullyConnectedLayer(&fc, {input_vector}, 0.436567038f);
+ }
+ {
+ const std::array<float, 24> input_vector = {
+ 0.592162728f, 0.529089332f, 1.18205106f,
+ 1.21736848f, 0.f, 0.470851123f,
+ 0.130675942f, 0.320903003f, 0.305496395f,
+ 0.0571633279f, 1.57001138f, 0.0182026215f,
+ 0.0977443159f, 0.347477973f, 0.493206412f,
+ 0.9688586f, 0.0320267938f, 0.244722098f,
+ 0.312745273f, 0.f, 0.00650715502f,
+ 0.312553257f, 1.62619662f, 0.782880902f};
+ TestFullyConnectedLayer(&fc, {input_vector}, 0.874741316f);
+ }
+ {
+ const std::array<float, 24> input_vector = {
+ 0.395022154f, 0.333681047f, 0.76302278f,
+ 0.965480626f, 0.f, 0.941198349f,
+ 0.0892967582f, 0.745046318f, 0.635769248f,
+ 0.238564298f, 0.970656633f, 0.014159563f,
+ 0.094203949f, 0.446816623f, 0.640755892f,
+ 1.20532358f, 0.0254284926f, 0.283327013f,
+ 0.726210058f, 0.0550272502f, 0.000344108557f,
+ 0.369803518f, 1.56680179f, 0.997883797f};
+ TestFullyConnectedLayer(&fc, {input_vector}, 0.672785878f);
+ }
+}
+
+TEST(RnnVadTest, CheckGatedRecurrentLayer) {
+ const std::array<int8_t, 12> bias = {96, -99, -81, -114, 49, 119,
+ -118, 68, -76, 91, 121, 125};
+ const std::array<int8_t, 60> weights = {
+ 124, 9, 1, 116, -66, -21, -118, -110, 104, 75, -23, -51,
+ -72, -111, 47, 93, 77, -98, 41, -8, 40, -23, -43, -107,
+ 9, -73, 30, -32, -2, 64, -26, 91, -48, -24, -28, -104,
+ 74, -46, 116, 15, 32, 52, -126, -38, -121, 12, -16, 110,
+ -95, 66, -103, -35, -38, 3, -126, -61, 28, 98, -117, -43};
+ const std::array<int8_t, 60> recurrent_weights = {
+ -3, 87, 50, 51, -22, 27, -39, 62, 31, -83, -52, -48,
+ -6, 83, -19, 104, 105, 48, 23, 68, 23, 40, 7, -120,
+ 64, -62, 117, 85, -51, -43, 54, -105, 120, 56, -128, -107,
+ 39, 50, -17, -47, -117, 14, 108, 12, -7, -72, 103, -87,
+ -66, 82, 84, 100, -98, 102, -49, 44, 122, 106, -20, -69};
+ GatedRecurrentLayer gru(5, 4, bias, weights, recurrent_weights,
+ RectifiedLinearUnit);
+ // Test on different inputs.
+ {
+ const std::array<float, 20> input_sequence = {
+ 0.89395463f, 0.93224651f, 0.55788344f, 0.32341808f, 0.93355054f,
+ 0.13475326f, 0.97370994f, 0.14253306f, 0.93710381f, 0.76093364f,
+ 0.65780413f, 0.41657975f, 0.49403164f, 0.46843281f, 0.75138855f,
+ 0.24517593f, 0.47657707f, 0.57064998f, 0.435184f, 0.19319285f};
+ const std::array<float, 16> expected_output_sequence = {
+ 0.0239123f, 0.5773077f, 0.f, 0.f,
+ 0.01282811f, 0.64330572f, 0.f, 0.04863098f,
+ 0.00781069f, 0.75267816f, 0.f, 0.02579715f,
+ 0.00471378f, 0.59162533f, 0.11087593f, 0.01334511f};
+ TestGatedRecurrentLayer(&gru, input_sequence, expected_output_sequence);
+ }
+}
+
+// TODO(bugs.webrtc.org/9076): Remove when the issue is fixed.
+// Bit-exactness test checking that precomputed frame-wise features lead to the
+// expected VAD probabilities.
+TEST(RnnVadTest, RnnBitExactness) {
+ // Init.
+ auto features_reader = CreateSilenceFlagsFeatureMatrixReader();
+ auto vad_probs_reader = CreateVadProbsReader();
+ ASSERT_EQ(features_reader.second, vad_probs_reader.second);
+ const size_t num_frames = features_reader.second;
+ // Frame-wise buffers.
+ float expected_vad_probability;
+ float is_silence;
+ std::array<float, kFeatureVectorSize> features;
+
+ // Compute VAD probability using the precomputed features.
+ RnnBasedVad vad;
+ for (size_t i = 0; i < num_frames; ++i) {
+ SCOPED_TRACE(i);
+ // Read frame data.
+ RTC_CHECK(vad_probs_reader.first->ReadValue(&expected_vad_probability));
+ // The features file also includes a silence flag for each frame.
+ RTC_CHECK(features_reader.first->ReadValue(&is_silence));
+ RTC_CHECK(
+ features_reader.first->ReadChunk({features.data(), features.size()}));
+ // Skip silent frames.
+ ASSERT_TRUE(is_silence == 0.f || is_silence == 1.f);
+ if (is_silence == 1.f) {
+ ASSERT_EQ(expected_vad_probability, 0.f);
+ continue;
+ }
+ // Compute and check VAD probability.
+ vad.ComputeVadProbability({features.data(), features.size()});
+ EXPECT_NEAR(expected_vad_probability, vad.vad_probability(), 3e-6f);
+ }
+}
+
+} // namespace test
+} // namespace rnn_vad
+} // namespace webrtc
diff --git a/modules/audio_processing/agc2/rnn_vad/test_utils.cc b/modules/audio_processing/agc2/rnn_vad/test_utils.cc
index c6cf21e61b..ff91ef7d13 100644
--- a/modules/audio_processing/agc2/rnn_vad/test_utils.cc
+++ b/modules/audio_processing/agc2/rnn_vad/test_utils.cc
@@ -53,6 +53,21 @@ ReaderPairType CreateLpResidualAndPitchPeriodGainReader() {
rtc::CheckedDivExact(ptr->data_length(), 2 + num_lp_residual_coeffs)};
}
+ReaderPairType CreateSilenceFlagsFeatureMatrixReader() {
+ auto ptr = rtc::MakeUnique<BinaryFileReader<float>>(
+ test::ResourcePath("audio_processing/agc2/rnn_vad/sil_features", "dat"),
+ 42);
+ // Features (42) and silence flag.
+ return {std::move(ptr),
+ rtc::CheckedDivExact(ptr->data_length(), static_cast<size_t>(43))};
+}
+
+ReaderPairType CreateVadProbsReader() {
+ auto ptr = rtc::MakeUnique<BinaryFileReader<float>>(
+ test::ResourcePath("audio_processing/agc2/rnn_vad/vad_prob", "dat"));
+ return {std::move(ptr), ptr->data_length()};
+}
+
} // namespace test
} // namespace rnn_vad
} // namespace webrtc
diff --git a/modules/audio_processing/agc2/rnn_vad/test_utils.h b/modules/audio_processing/agc2/rnn_vad/test_utils.h
index 3f580ab48c..92d370675e 100644
--- a/modules/audio_processing/agc2/rnn_vad/test_utils.h
+++ b/modules/audio_processing/agc2/rnn_vad/test_utils.h
@@ -95,6 +95,12 @@ CreatePitchBuffer24kHzReader();
// and gain values.
std::pair<std::unique_ptr<BinaryFileReader<float>>, const size_t>
CreateLpResidualAndPitchPeriodGainReader();
+// Instance a reader for the silence flags and the feature matrix.
+std::pair<std::unique_ptr<BinaryFileReader<float>>, const size_t>
+CreateSilenceFlagsFeatureMatrixReader();
+// Instance a reader for the VAD probabilities.
+std::pair<std::unique_ptr<BinaryFileReader<float>>, const size_t>
+CreateVadProbsReader();
} // namespace test
} // namespace rnn_vad
diff --git a/resources/audio_processing/agc2/rnn_vad/sil_features.dat.sha1 b/resources/audio_processing/agc2/rnn_vad/sil_features.dat.sha1
new file mode 100644
index 0000000000..bc591e9d6c
--- /dev/null
+++ b/resources/audio_processing/agc2/rnn_vad/sil_features.dat.sha1
@@ -0,0 +1 @@
+e0a92782c2903be9da10385d924d34e8bf212d5e \ No newline at end of file
diff --git a/resources/audio_processing/agc2/rnn_vad/vad_prob.dat.sha1 b/resources/audio_processing/agc2/rnn_vad/vad_prob.dat.sha1
new file mode 100644
index 0000000000..1aa3bd0d83
--- /dev/null
+++ b/resources/audio_processing/agc2/rnn_vad/vad_prob.dat.sha1
@@ -0,0 +1 @@
+05735ede0b457318e307d12f5acfd11bbbbd0afd \ No newline at end of file
diff --git a/tools_webrtc/libs/generate_licenses.py b/tools_webrtc/libs/generate_licenses.py
index 9bbe7526a3..df7ad8210e 100755
--- a/tools_webrtc/libs/generate_licenses.py
+++ b/tools_webrtc/libs/generate_licenses.py
@@ -44,6 +44,7 @@ LIB_TO_LICENSES_DICT = {
'openmax_dl': ['third_party/openmax_dl/LICENSE'],
'opus': ['third_party/opus/src/COPYING'],
'protobuf': ['third_party/protobuf/LICENSE'],
+ 'rnnoise': ['third_party/rnnoise/COPYING'],
'usrsctp': ['third_party/usrsctp/LICENSE'],
'webrtc': ['LICENSE', 'LICENSE_THIRD_PARTY'],
'zlib': ['third_party/zlib/LICENSE'],