aboutsummaryrefslogtreecommitdiff
path: root/modules/audio_processing/aec3
diff options
context:
space:
mode:
authorSam Zackrisson <saza@webrtc.org>2022-04-08 15:28:45 +0200
committerWebRTC LUCI CQ <webrtc-scoped@luci-project-accounts.iam.gserviceaccount.com>2022-04-08 16:43:14 +0000
commitfa07b43074440003a13f5974826b2ca2d0410328 (patch)
tree73f24fc5c1bee9111261a86aa949f5ed4175444b /modules/audio_processing/aec3
parent13fe3674ff3cabbd4cd86968a9dbf1350ab0ad2d (diff)
downloadwebrtc-fa07b43074440003a13f5974826b2ca2d0410328.tar.gz
AEC3: Add fallback to mono processing if no stereo is detected for some time
If playout audio is temporarily stereo, the AEC will currently enter stereo processing mode indefinitely. To save CPU and improve AEC performance, this CL adds support for falling back to mono after a period of no stereo. The feature is enabled by default in the AEC3 config. Bug: chromium:1295710 Change-Id: I690b5b22f8407f950bf41f3bcaa9ca0138452157 Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/258421 Reviewed-by: Per Ã…hgren <peah@webrtc.org> Commit-Queue: Sam Zackrisson <saza@webrtc.org> Cr-Commit-Position: refs/heads/main@{#36502}
Diffstat (limited to 'modules/audio_processing/aec3')
-rw-r--r--modules/audio_processing/aec3/config_selector.cc8
-rw-r--r--modules/audio_processing/aec3/echo_canceller3.cc4
-rw-r--r--modules/audio_processing/aec3/multi_channel_content_detector.cc30
-rw-r--r--modules/audio_processing/aec3/multi_channel_content_detector.h10
-rw-r--r--modules/audio_processing/aec3/multi_channel_content_detector_unittest.cc146
5 files changed, 160 insertions, 38 deletions
diff --git a/modules/audio_processing/aec3/config_selector.cc b/modules/audio_processing/aec3/config_selector.cc
index 9a37da6ccc..c55344da79 100644
--- a/modules/audio_processing/aec3/config_selector.cc
+++ b/modules/audio_processing/aec3/config_selector.cc
@@ -23,21 +23,23 @@ bool CompatibleConfigs(const EchoCanceller3Config& mono_config,
multichannel_config.delay.fixed_capture_delay_samples) {
return false;
}
-
if (mono_config.filter.export_linear_aec_output !=
multichannel_config.filter.export_linear_aec_output) {
return false;
}
-
if (mono_config.filter.high_pass_filter_echo_reference !=
multichannel_config.filter.high_pass_filter_echo_reference) {
return false;
}
-
if (mono_config.multi_channel.detect_stereo_content !=
multichannel_config.multi_channel.detect_stereo_content) {
return false;
}
+ if (mono_config.multi_channel.stereo_detection_timeout_threshold_seconds !=
+ multichannel_config.multi_channel
+ .stereo_detection_timeout_threshold_seconds) {
+ return false;
+ }
return true;
}
diff --git a/modules/audio_processing/aec3/echo_canceller3.cc b/modules/audio_processing/aec3/echo_canceller3.cc
index ef58314c0b..a74c37e0c9 100644
--- a/modules/audio_processing/aec3/echo_canceller3.cc
+++ b/modules/audio_processing/aec3/echo_canceller3.cc
@@ -702,7 +702,9 @@ EchoCanceller3::EchoCanceller3(
config_selector_.active_config().multi_channel.detect_stereo_content,
num_render_input_channels_,
config_selector_.active_config()
- .multi_channel.stereo_detection_threshold),
+ .multi_channel.stereo_detection_threshold,
+ config_selector_.active_config()
+ .multi_channel.stereo_detection_timeout_threshold_seconds),
output_framer_(num_bands_, num_capture_channels_),
capture_blocker_(num_bands_, num_capture_channels_),
render_transfer_queue_(
diff --git a/modules/audio_processing/aec3/multi_channel_content_detector.cc b/modules/audio_processing/aec3/multi_channel_content_detector.cc
index 62b3ae0840..270316ebc1 100644
--- a/modules/audio_processing/aec3/multi_channel_content_detector.cc
+++ b/modules/audio_processing/aec3/multi_channel_content_detector.cc
@@ -17,6 +17,8 @@ namespace webrtc {
namespace {
+constexpr int kNumFramesPerSecond = 100;
+
// Compares the left and right channels in the render `frame` to determine
// whether the signal is a proper stereo signal. To allow for differences
// introduced by hardware drivers, a threshold `detection_threshold` is used for
@@ -43,21 +45,37 @@ bool IsProperStereo(const std::vector<std::vector<std::vector<float>>>& frame,
MultiChannelContentDetector::MultiChannelContentDetector(
bool detect_stereo_content,
int num_render_input_channels,
- float detection_threshold)
+ float detection_threshold,
+ int stereo_detection_timeout_threshold_seconds)
: detect_stereo_content_(detect_stereo_content),
detection_threshold_(detection_threshold),
+ detection_timeout_threshold_frames_(
+ stereo_detection_timeout_threshold_seconds > 0
+ ? absl::make_optional(stereo_detection_timeout_threshold_seconds *
+ kNumFramesPerSecond)
+ : absl::nullopt),
proper_multichannel_content_detected_(!detect_stereo_content &&
num_render_input_channels > 1) {}
bool MultiChannelContentDetector::UpdateDetection(
const std::vector<std::vector<std::vector<float>>>& frame) {
- bool previous_proper_multichannel_content_detected_ =
+ if (!detect_stereo_content_)
+ return false;
+
+ const bool previous_proper_multichannel_content_detected =
proper_multichannel_content_detected_;
- if (detect_stereo_content_ && !proper_multichannel_content_detected_) {
- proper_multichannel_content_detected_ =
- IsProperStereo(frame, detection_threshold_);
+
+ if (IsProperStereo(frame, detection_threshold_)) {
+ proper_multichannel_content_detected_ = true;
+ frames_since_stereo_detected_ = 0;
+ } else {
+ ++frames_since_stereo_detected_;
+ if (detection_timeout_threshold_frames_ &&
+ frames_since_stereo_detected_ >= *detection_timeout_threshold_frames_) {
+ proper_multichannel_content_detected_ = false;
+ }
}
- return previous_proper_multichannel_content_detected_ !=
+ return previous_proper_multichannel_content_detected !=
proper_multichannel_content_detected_;
}
diff --git a/modules/audio_processing/aec3/multi_channel_content_detector.h b/modules/audio_processing/aec3/multi_channel_content_detector.h
index 119cd1f2a4..e4f3f09ba1 100644
--- a/modules/audio_processing/aec3/multi_channel_content_detector.h
+++ b/modules/audio_processing/aec3/multi_channel_content_detector.h
@@ -15,6 +15,8 @@
#include <vector>
+#include "absl/types/optional.h"
+
namespace webrtc {
// Analyzes audio content to determine whether the contained audio is proper
@@ -23,9 +25,13 @@ namespace webrtc {
// detection.
class MultiChannelContentDetector {
public:
+ // If |stereo_detection_timeout_threshold_seconds| <= 0, no timeout is
+ // applied: Once multichannel is detected, the detector remains in that state
+ // for its lifetime.
MultiChannelContentDetector(bool detect_stereo_content,
int num_render_input_channels,
- float detection_threshold);
+ float detection_threshold,
+ int stereo_detection_timeout_threshold_seconds);
// Compares the left and right channels in the render `frame` to determine
// whether the signal is a proper multichannel signal. Returns a bool
@@ -40,7 +46,9 @@ class MultiChannelContentDetector {
private:
const bool detect_stereo_content_;
const float detection_threshold_;
+ const absl::optional<int> detection_timeout_threshold_frames_;
bool proper_multichannel_content_detected_;
+ int frames_since_stereo_detected_ = 0;
};
} // namespace webrtc
diff --git a/modules/audio_processing/aec3/multi_channel_content_detector_unittest.cc b/modules/audio_processing/aec3/multi_channel_content_detector_unittest.cc
index ae2a9336ef..cb495e859f 100644
--- a/modules/audio_processing/aec3/multi_channel_content_detector_unittest.cc
+++ b/modules/audio_processing/aec3/multi_channel_content_detector_unittest.cc
@@ -15,23 +15,29 @@
namespace webrtc {
TEST(MultiChannelContentDetector, HandlingOfMono) {
- MultiChannelContentDetector mc(/*detect_stereo_content=*/true,
- /*num_render_input_channels=*/1,
- /*detection_threshold=*/0.0f);
+ MultiChannelContentDetector mc(
+ /*detect_stereo_content=*/true,
+ /*num_render_input_channels=*/1,
+ /*detection_threshold=*/0.0f,
+ /*stereo_detection_timeout_threshold_seconds=*/0);
EXPECT_FALSE(mc.IsMultiChannelContentDetected());
}
TEST(MultiChannelContentDetector, HandlingOfMonoAndDetectionOff) {
- MultiChannelContentDetector mc(/*detect_stereo_content=*/false,
- /*num_render_input_channels=*/1,
- /*detection_threshold=*/0.0f);
+ MultiChannelContentDetector mc(
+ /*detect_stereo_content=*/false,
+ /*num_render_input_channels=*/1,
+ /*detection_threshold=*/0.0f,
+ /*stereo_detection_timeout_threshold_seconds=*/0);
EXPECT_FALSE(mc.IsMultiChannelContentDetected());
}
TEST(MultiChannelContentDetector, HandlingOfDetectionOff) {
- MultiChannelContentDetector mc(/*detect_stereo_content=*/false,
- /*num_render_input_channels=*/2,
- /*detection_threshold=*/0.0f);
+ MultiChannelContentDetector mc(
+ /*detect_stereo_content=*/false,
+ /*num_render_input_channels=*/2,
+ /*detection_threshold=*/0.0f,
+ /*stereo_detection_timeout_threshold_seconds=*/0);
EXPECT_TRUE(mc.IsMultiChannelContentDetected());
std::vector<std::vector<std::vector<float>>> frame(
@@ -46,16 +52,20 @@ TEST(MultiChannelContentDetector, HandlingOfDetectionOff) {
}
TEST(MultiChannelContentDetector, InitialDetectionOfStereo) {
- MultiChannelContentDetector mc(/*detect_stereo_content=*/true,
- /*num_render_input_channels=*/2,
- /*detection_threshold=*/0.0f);
+ MultiChannelContentDetector mc(
+ /*detect_stereo_content=*/true,
+ /*num_render_input_channels=*/2,
+ /*detection_threshold=*/0.0f,
+ /*stereo_detection_timeout_threshold_seconds=*/0);
EXPECT_FALSE(mc.IsMultiChannelContentDetected());
}
TEST(MultiChannelContentDetector, DetectionWhenFakeStereo) {
- MultiChannelContentDetector mc(/*detect_stereo_content=*/true,
- /*num_render_input_channels=*/2,
- /*detection_threshold=*/0.0f);
+ MultiChannelContentDetector mc(
+ /*detect_stereo_content=*/true,
+ /*num_render_input_channels=*/2,
+ /*detection_threshold=*/0.0f,
+ /*stereo_detection_timeout_threshold_seconds=*/0);
std::vector<std::vector<std::vector<float>>> frame(
1, std::vector<std::vector<float>>(2, std::vector<float>(160, 0.0f)));
std::fill(frame[0][0].begin(), frame[0][0].end(), 100.0f);
@@ -67,9 +77,11 @@ TEST(MultiChannelContentDetector, DetectionWhenFakeStereo) {
}
TEST(MultiChannelContentDetector, DetectionWhenStereo) {
- MultiChannelContentDetector mc(/*detect_stereo_content=*/true,
- /*num_render_input_channels=*/2,
- /*detection_threshold=*/0.0f);
+ MultiChannelContentDetector mc(
+ /*detect_stereo_content=*/true,
+ /*num_render_input_channels=*/2,
+ /*detection_threshold=*/0.0f,
+ /*stereo_detection_timeout_threshold_seconds=*/0);
std::vector<std::vector<std::vector<float>>> frame(
1, std::vector<std::vector<float>>(2, std::vector<float>(160, 0.0f)));
std::fill(frame[0][0].begin(), frame[0][0].end(), 100.0f);
@@ -81,9 +93,11 @@ TEST(MultiChannelContentDetector, DetectionWhenStereo) {
}
TEST(MultiChannelContentDetector, DetectionWhenStereoAfterAWhile) {
- MultiChannelContentDetector mc(/*detect_stereo_content=*/true,
- /*num_render_input_channels=*/2,
- /*detection_threshold=*/0.0f);
+ MultiChannelContentDetector mc(
+ /*detect_stereo_content=*/true,
+ /*num_render_input_channels=*/2,
+ /*detection_threshold=*/0.0f,
+ /*stereo_detection_timeout_threshold_seconds=*/0);
std::vector<std::vector<std::vector<float>>> frame(
1, std::vector<std::vector<float>>(2, std::vector<float>(160, 0.0f)));
@@ -105,9 +119,11 @@ TEST(MultiChannelContentDetector, DetectionWhenStereoAfterAWhile) {
TEST(MultiChannelContentDetector, DetectionWithStereoBelowThreshold) {
constexpr float kThreshold = 1.0f;
- MultiChannelContentDetector mc(/*detect_stereo_content=*/true,
- /*num_render_input_channels=*/2,
- /*detection_threshold=*/kThreshold);
+ MultiChannelContentDetector mc(
+ /*detect_stereo_content=*/true,
+ /*num_render_input_channels=*/2,
+ /*detection_threshold=*/kThreshold,
+ /*stereo_detection_timeout_threshold_seconds=*/0);
std::vector<std::vector<std::vector<float>>> frame(
1, std::vector<std::vector<float>>(2, std::vector<float>(160, 0.0f)));
std::fill(frame[0][0].begin(), frame[0][0].end(), 100.0f);
@@ -121,9 +137,11 @@ TEST(MultiChannelContentDetector, DetectionWithStereoBelowThreshold) {
TEST(MultiChannelContentDetector, DetectionWithStereoAboveThreshold) {
constexpr float kThreshold = 1.0f;
- MultiChannelContentDetector mc(/*detect_stereo_content=*/true,
- /*num_render_input_channels=*/2,
- /*detection_threshold=*/kThreshold);
+ MultiChannelContentDetector mc(
+ /*detect_stereo_content=*/true,
+ /*num_render_input_channels=*/2,
+ /*detection_threshold=*/kThreshold,
+ /*stereo_detection_timeout_threshold_seconds=*/0);
std::vector<std::vector<std::vector<float>>> frame(
1, std::vector<std::vector<float>>(2, std::vector<float>(160, 0.0f)));
std::fill(frame[0][0].begin(), frame[0][0].end(), 100.0f);
@@ -135,4 +153,78 @@ TEST(MultiChannelContentDetector, DetectionWithStereoAboveThreshold) {
EXPECT_FALSE(mc.UpdateDetection(frame));
}
+class MultiChannelContentDetectorTimeoutBehavior
+ : public ::testing::Test,
+ public ::testing::WithParamInterface<std::tuple<bool, int>> {};
+
+INSTANTIATE_TEST_SUITE_P(MultiChannelContentDetector,
+ MultiChannelContentDetectorTimeoutBehavior,
+ ::testing::Combine(::testing::Values(false, true),
+ ::testing::Values(0, 1, 10)));
+
+TEST_P(MultiChannelContentDetectorTimeoutBehavior,
+ TimeOutBehaviorForNonTrueStereo) {
+ constexpr int kNumFramesPerSecond = 100;
+ const bool detect_stereo_content = std::get<0>(GetParam());
+ const int stereo_stereo_detection_timeout_threshold_seconds =
+ std::get<1>(GetParam());
+ const int stereo_detection_timeout_threshold_frames =
+ stereo_stereo_detection_timeout_threshold_seconds * kNumFramesPerSecond;
+
+ MultiChannelContentDetector mc(
+ detect_stereo_content,
+ /*num_render_input_channels=*/2,
+ /*detection_threshold=*/0.0f,
+ stereo_stereo_detection_timeout_threshold_seconds);
+ std::vector<std::vector<std::vector<float>>> true_stereo_frame = {
+ {std::vector<float>(160, 100.0f), std::vector<float>(160, 101.0f)}};
+
+ std::vector<std::vector<std::vector<float>>> fake_stereo_frame = {
+ {std::vector<float>(160, 100.0f), std::vector<float>(160, 100.0f)}};
+
+ // Pass fake stereo frames and verify the content detection.
+ for (int k = 0; k < 10; ++k) {
+ EXPECT_FALSE(mc.UpdateDetection(fake_stereo_frame));
+ if (detect_stereo_content) {
+ EXPECT_FALSE(mc.IsMultiChannelContentDetected());
+ } else {
+ EXPECT_TRUE(mc.IsMultiChannelContentDetected());
+ }
+ }
+
+ // Pass a true stereo frame and verify that it is properly detected.
+ if (detect_stereo_content) {
+ EXPECT_TRUE(mc.UpdateDetection(true_stereo_frame));
+ } else {
+ EXPECT_FALSE(mc.UpdateDetection(true_stereo_frame));
+ }
+ EXPECT_TRUE(mc.IsMultiChannelContentDetected());
+
+ // Pass fake stereo frames until any timeouts are about to occur.
+ for (int k = 0; k < stereo_detection_timeout_threshold_frames - 1; ++k) {
+ EXPECT_FALSE(mc.UpdateDetection(fake_stereo_frame));
+ EXPECT_TRUE(mc.IsMultiChannelContentDetected());
+ }
+
+ // Pass a fake stereo frame and verify that any timeouts properly occur.
+ if (detect_stereo_content && stereo_detection_timeout_threshold_frames > 0) {
+ EXPECT_TRUE(mc.UpdateDetection(fake_stereo_frame));
+ EXPECT_FALSE(mc.IsMultiChannelContentDetected());
+ } else {
+ EXPECT_FALSE(mc.UpdateDetection(fake_stereo_frame));
+ EXPECT_TRUE(mc.IsMultiChannelContentDetected());
+ }
+
+ // Pass fake stereo frames and verify the behavior after any timeout.
+ for (int k = 0; k < 10; ++k) {
+ EXPECT_FALSE(mc.UpdateDetection(fake_stereo_frame));
+ if (detect_stereo_content &&
+ stereo_detection_timeout_threshold_frames > 0) {
+ EXPECT_FALSE(mc.IsMultiChannelContentDetected());
+ } else {
+ EXPECT_TRUE(mc.IsMultiChannelContentDetected());
+ }
+ }
+}
+
} // namespace webrtc