diff options
Diffstat (limited to 'modules/audio_processing/aec3')
5 files changed, 160 insertions, 38 deletions
diff --git a/modules/audio_processing/aec3/config_selector.cc b/modules/audio_processing/aec3/config_selector.cc index 9a37da6ccc..c55344da79 100644 --- a/modules/audio_processing/aec3/config_selector.cc +++ b/modules/audio_processing/aec3/config_selector.cc @@ -23,21 +23,23 @@ bool CompatibleConfigs(const EchoCanceller3Config& mono_config, multichannel_config.delay.fixed_capture_delay_samples) { return false; } - if (mono_config.filter.export_linear_aec_output != multichannel_config.filter.export_linear_aec_output) { return false; } - if (mono_config.filter.high_pass_filter_echo_reference != multichannel_config.filter.high_pass_filter_echo_reference) { return false; } - if (mono_config.multi_channel.detect_stereo_content != multichannel_config.multi_channel.detect_stereo_content) { return false; } + if (mono_config.multi_channel.stereo_detection_timeout_threshold_seconds != + multichannel_config.multi_channel + .stereo_detection_timeout_threshold_seconds) { + return false; + } return true; } diff --git a/modules/audio_processing/aec3/echo_canceller3.cc b/modules/audio_processing/aec3/echo_canceller3.cc index ef58314c0b..a74c37e0c9 100644 --- a/modules/audio_processing/aec3/echo_canceller3.cc +++ b/modules/audio_processing/aec3/echo_canceller3.cc @@ -702,7 +702,9 @@ EchoCanceller3::EchoCanceller3( config_selector_.active_config().multi_channel.detect_stereo_content, num_render_input_channels_, config_selector_.active_config() - .multi_channel.stereo_detection_threshold), + .multi_channel.stereo_detection_threshold, + config_selector_.active_config() + .multi_channel.stereo_detection_timeout_threshold_seconds), output_framer_(num_bands_, num_capture_channels_), capture_blocker_(num_bands_, num_capture_channels_), render_transfer_queue_( diff --git a/modules/audio_processing/aec3/multi_channel_content_detector.cc b/modules/audio_processing/aec3/multi_channel_content_detector.cc index 62b3ae0840..270316ebc1 100644 --- a/modules/audio_processing/aec3/multi_channel_content_detector.cc +++ b/modules/audio_processing/aec3/multi_channel_content_detector.cc @@ -17,6 +17,8 @@ namespace webrtc { namespace { +constexpr int kNumFramesPerSecond = 100; + // Compares the left and right channels in the render `frame` to determine // whether the signal is a proper stereo signal. To allow for differences // introduced by hardware drivers, a threshold `detection_threshold` is used for @@ -43,21 +45,37 @@ bool IsProperStereo(const std::vector<std::vector<std::vector<float>>>& frame, MultiChannelContentDetector::MultiChannelContentDetector( bool detect_stereo_content, int num_render_input_channels, - float detection_threshold) + float detection_threshold, + int stereo_detection_timeout_threshold_seconds) : detect_stereo_content_(detect_stereo_content), detection_threshold_(detection_threshold), + detection_timeout_threshold_frames_( + stereo_detection_timeout_threshold_seconds > 0 + ? absl::make_optional(stereo_detection_timeout_threshold_seconds * + kNumFramesPerSecond) + : absl::nullopt), proper_multichannel_content_detected_(!detect_stereo_content && num_render_input_channels > 1) {} bool MultiChannelContentDetector::UpdateDetection( const std::vector<std::vector<std::vector<float>>>& frame) { - bool previous_proper_multichannel_content_detected_ = + if (!detect_stereo_content_) + return false; + + const bool previous_proper_multichannel_content_detected = proper_multichannel_content_detected_; - if (detect_stereo_content_ && !proper_multichannel_content_detected_) { - proper_multichannel_content_detected_ = - IsProperStereo(frame, detection_threshold_); + + if (IsProperStereo(frame, detection_threshold_)) { + proper_multichannel_content_detected_ = true; + frames_since_stereo_detected_ = 0; + } else { + ++frames_since_stereo_detected_; + if (detection_timeout_threshold_frames_ && + frames_since_stereo_detected_ >= *detection_timeout_threshold_frames_) { + proper_multichannel_content_detected_ = false; + } } - return previous_proper_multichannel_content_detected_ != + return previous_proper_multichannel_content_detected != proper_multichannel_content_detected_; } diff --git a/modules/audio_processing/aec3/multi_channel_content_detector.h b/modules/audio_processing/aec3/multi_channel_content_detector.h index 119cd1f2a4..e4f3f09ba1 100644 --- a/modules/audio_processing/aec3/multi_channel_content_detector.h +++ b/modules/audio_processing/aec3/multi_channel_content_detector.h @@ -15,6 +15,8 @@ #include <vector> +#include "absl/types/optional.h" + namespace webrtc { // Analyzes audio content to determine whether the contained audio is proper @@ -23,9 +25,13 @@ namespace webrtc { // detection. class MultiChannelContentDetector { public: + // If |stereo_detection_timeout_threshold_seconds| <= 0, no timeout is + // applied: Once multichannel is detected, the detector remains in that state + // for its lifetime. MultiChannelContentDetector(bool detect_stereo_content, int num_render_input_channels, - float detection_threshold); + float detection_threshold, + int stereo_detection_timeout_threshold_seconds); // Compares the left and right channels in the render `frame` to determine // whether the signal is a proper multichannel signal. Returns a bool @@ -40,7 +46,9 @@ class MultiChannelContentDetector { private: const bool detect_stereo_content_; const float detection_threshold_; + const absl::optional<int> detection_timeout_threshold_frames_; bool proper_multichannel_content_detected_; + int frames_since_stereo_detected_ = 0; }; } // namespace webrtc diff --git a/modules/audio_processing/aec3/multi_channel_content_detector_unittest.cc b/modules/audio_processing/aec3/multi_channel_content_detector_unittest.cc index ae2a9336ef..cb495e859f 100644 --- a/modules/audio_processing/aec3/multi_channel_content_detector_unittest.cc +++ b/modules/audio_processing/aec3/multi_channel_content_detector_unittest.cc @@ -15,23 +15,29 @@ namespace webrtc { TEST(MultiChannelContentDetector, HandlingOfMono) { - MultiChannelContentDetector mc(/*detect_stereo_content=*/true, - /*num_render_input_channels=*/1, - /*detection_threshold=*/0.0f); + MultiChannelContentDetector mc( + /*detect_stereo_content=*/true, + /*num_render_input_channels=*/1, + /*detection_threshold=*/0.0f, + /*stereo_detection_timeout_threshold_seconds=*/0); EXPECT_FALSE(mc.IsMultiChannelContentDetected()); } TEST(MultiChannelContentDetector, HandlingOfMonoAndDetectionOff) { - MultiChannelContentDetector mc(/*detect_stereo_content=*/false, - /*num_render_input_channels=*/1, - /*detection_threshold=*/0.0f); + MultiChannelContentDetector mc( + /*detect_stereo_content=*/false, + /*num_render_input_channels=*/1, + /*detection_threshold=*/0.0f, + /*stereo_detection_timeout_threshold_seconds=*/0); EXPECT_FALSE(mc.IsMultiChannelContentDetected()); } TEST(MultiChannelContentDetector, HandlingOfDetectionOff) { - MultiChannelContentDetector mc(/*detect_stereo_content=*/false, - /*num_render_input_channels=*/2, - /*detection_threshold=*/0.0f); + MultiChannelContentDetector mc( + /*detect_stereo_content=*/false, + /*num_render_input_channels=*/2, + /*detection_threshold=*/0.0f, + /*stereo_detection_timeout_threshold_seconds=*/0); EXPECT_TRUE(mc.IsMultiChannelContentDetected()); std::vector<std::vector<std::vector<float>>> frame( @@ -46,16 +52,20 @@ TEST(MultiChannelContentDetector, HandlingOfDetectionOff) { } TEST(MultiChannelContentDetector, InitialDetectionOfStereo) { - MultiChannelContentDetector mc(/*detect_stereo_content=*/true, - /*num_render_input_channels=*/2, - /*detection_threshold=*/0.0f); + MultiChannelContentDetector mc( + /*detect_stereo_content=*/true, + /*num_render_input_channels=*/2, + /*detection_threshold=*/0.0f, + /*stereo_detection_timeout_threshold_seconds=*/0); EXPECT_FALSE(mc.IsMultiChannelContentDetected()); } TEST(MultiChannelContentDetector, DetectionWhenFakeStereo) { - MultiChannelContentDetector mc(/*detect_stereo_content=*/true, - /*num_render_input_channels=*/2, - /*detection_threshold=*/0.0f); + MultiChannelContentDetector mc( + /*detect_stereo_content=*/true, + /*num_render_input_channels=*/2, + /*detection_threshold=*/0.0f, + /*stereo_detection_timeout_threshold_seconds=*/0); std::vector<std::vector<std::vector<float>>> frame( 1, std::vector<std::vector<float>>(2, std::vector<float>(160, 0.0f))); std::fill(frame[0][0].begin(), frame[0][0].end(), 100.0f); @@ -67,9 +77,11 @@ TEST(MultiChannelContentDetector, DetectionWhenFakeStereo) { } TEST(MultiChannelContentDetector, DetectionWhenStereo) { - MultiChannelContentDetector mc(/*detect_stereo_content=*/true, - /*num_render_input_channels=*/2, - /*detection_threshold=*/0.0f); + MultiChannelContentDetector mc( + /*detect_stereo_content=*/true, + /*num_render_input_channels=*/2, + /*detection_threshold=*/0.0f, + /*stereo_detection_timeout_threshold_seconds=*/0); std::vector<std::vector<std::vector<float>>> frame( 1, std::vector<std::vector<float>>(2, std::vector<float>(160, 0.0f))); std::fill(frame[0][0].begin(), frame[0][0].end(), 100.0f); @@ -81,9 +93,11 @@ TEST(MultiChannelContentDetector, DetectionWhenStereo) { } TEST(MultiChannelContentDetector, DetectionWhenStereoAfterAWhile) { - MultiChannelContentDetector mc(/*detect_stereo_content=*/true, - /*num_render_input_channels=*/2, - /*detection_threshold=*/0.0f); + MultiChannelContentDetector mc( + /*detect_stereo_content=*/true, + /*num_render_input_channels=*/2, + /*detection_threshold=*/0.0f, + /*stereo_detection_timeout_threshold_seconds=*/0); std::vector<std::vector<std::vector<float>>> frame( 1, std::vector<std::vector<float>>(2, std::vector<float>(160, 0.0f))); @@ -105,9 +119,11 @@ TEST(MultiChannelContentDetector, DetectionWhenStereoAfterAWhile) { TEST(MultiChannelContentDetector, DetectionWithStereoBelowThreshold) { constexpr float kThreshold = 1.0f; - MultiChannelContentDetector mc(/*detect_stereo_content=*/true, - /*num_render_input_channels=*/2, - /*detection_threshold=*/kThreshold); + MultiChannelContentDetector mc( + /*detect_stereo_content=*/true, + /*num_render_input_channels=*/2, + /*detection_threshold=*/kThreshold, + /*stereo_detection_timeout_threshold_seconds=*/0); std::vector<std::vector<std::vector<float>>> frame( 1, std::vector<std::vector<float>>(2, std::vector<float>(160, 0.0f))); std::fill(frame[0][0].begin(), frame[0][0].end(), 100.0f); @@ -121,9 +137,11 @@ TEST(MultiChannelContentDetector, DetectionWithStereoBelowThreshold) { TEST(MultiChannelContentDetector, DetectionWithStereoAboveThreshold) { constexpr float kThreshold = 1.0f; - MultiChannelContentDetector mc(/*detect_stereo_content=*/true, - /*num_render_input_channels=*/2, - /*detection_threshold=*/kThreshold); + MultiChannelContentDetector mc( + /*detect_stereo_content=*/true, + /*num_render_input_channels=*/2, + /*detection_threshold=*/kThreshold, + /*stereo_detection_timeout_threshold_seconds=*/0); std::vector<std::vector<std::vector<float>>> frame( 1, std::vector<std::vector<float>>(2, std::vector<float>(160, 0.0f))); std::fill(frame[0][0].begin(), frame[0][0].end(), 100.0f); @@ -135,4 +153,78 @@ TEST(MultiChannelContentDetector, DetectionWithStereoAboveThreshold) { EXPECT_FALSE(mc.UpdateDetection(frame)); } +class MultiChannelContentDetectorTimeoutBehavior + : public ::testing::Test, + public ::testing::WithParamInterface<std::tuple<bool, int>> {}; + +INSTANTIATE_TEST_SUITE_P(MultiChannelContentDetector, + MultiChannelContentDetectorTimeoutBehavior, + ::testing::Combine(::testing::Values(false, true), + ::testing::Values(0, 1, 10))); + +TEST_P(MultiChannelContentDetectorTimeoutBehavior, + TimeOutBehaviorForNonTrueStereo) { + constexpr int kNumFramesPerSecond = 100; + const bool detect_stereo_content = std::get<0>(GetParam()); + const int stereo_stereo_detection_timeout_threshold_seconds = + std::get<1>(GetParam()); + const int stereo_detection_timeout_threshold_frames = + stereo_stereo_detection_timeout_threshold_seconds * kNumFramesPerSecond; + + MultiChannelContentDetector mc( + detect_stereo_content, + /*num_render_input_channels=*/2, + /*detection_threshold=*/0.0f, + stereo_stereo_detection_timeout_threshold_seconds); + std::vector<std::vector<std::vector<float>>> true_stereo_frame = { + {std::vector<float>(160, 100.0f), std::vector<float>(160, 101.0f)}}; + + std::vector<std::vector<std::vector<float>>> fake_stereo_frame = { + {std::vector<float>(160, 100.0f), std::vector<float>(160, 100.0f)}}; + + // Pass fake stereo frames and verify the content detection. + for (int k = 0; k < 10; ++k) { + EXPECT_FALSE(mc.UpdateDetection(fake_stereo_frame)); + if (detect_stereo_content) { + EXPECT_FALSE(mc.IsMultiChannelContentDetected()); + } else { + EXPECT_TRUE(mc.IsMultiChannelContentDetected()); + } + } + + // Pass a true stereo frame and verify that it is properly detected. + if (detect_stereo_content) { + EXPECT_TRUE(mc.UpdateDetection(true_stereo_frame)); + } else { + EXPECT_FALSE(mc.UpdateDetection(true_stereo_frame)); + } + EXPECT_TRUE(mc.IsMultiChannelContentDetected()); + + // Pass fake stereo frames until any timeouts are about to occur. + for (int k = 0; k < stereo_detection_timeout_threshold_frames - 1; ++k) { + EXPECT_FALSE(mc.UpdateDetection(fake_stereo_frame)); + EXPECT_TRUE(mc.IsMultiChannelContentDetected()); + } + + // Pass a fake stereo frame and verify that any timeouts properly occur. + if (detect_stereo_content && stereo_detection_timeout_threshold_frames > 0) { + EXPECT_TRUE(mc.UpdateDetection(fake_stereo_frame)); + EXPECT_FALSE(mc.IsMultiChannelContentDetected()); + } else { + EXPECT_FALSE(mc.UpdateDetection(fake_stereo_frame)); + EXPECT_TRUE(mc.IsMultiChannelContentDetected()); + } + + // Pass fake stereo frames and verify the behavior after any timeout. + for (int k = 0; k < 10; ++k) { + EXPECT_FALSE(mc.UpdateDetection(fake_stereo_frame)); + if (detect_stereo_content && + stereo_detection_timeout_threshold_frames > 0) { + EXPECT_FALSE(mc.IsMultiChannelContentDetected()); + } else { + EXPECT_TRUE(mc.IsMultiChannelContentDetected()); + } + } +} + } // namespace webrtc |