AEC3: Add hysteresis period before entering stereo processing

Even if playout audio is only very briefly stereo, the AEC will enter stereo processing mode. To save CPU and improve AEC performance, this CL adds a hysteresis period before treating playout as stereo. The feature is enabled by default in the AEC3 config. Bug: chromium:1295710 Change-Id: I29116ab2e7823e25a02aa3b66a1c619f1d966d9e Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/258479 Reviewed-by: Per Åhgren <peah@webrtc.org> Commit-Queue: Sam Zackrisson <saza@webrtc.org> Cr-Commit-Position: refs/heads/main@{#36503}
author: Sam Zackrisson <saza@webrtc.org> 2022-04-08 16:14:06 +0200
committer: WebRTC LUCI CQ <webrtc-scoped@luci-project-accounts.iam.gserviceaccount.com> 2022-04-08 17:01:08 +0000
commit: cf7f7f9fa0a1996f42074a6c398ee76f0ee9e79e (patch)
tree: 1c2b3c12ffffe65d2448a872a176d2b2f078afc1 /modules/audio_processing/aec3
parent: fa07b43074440003a13f5974826b2ca2d0410328 (diff)
download: webrtc-cf7f7f9fa0a1996f42074a6c398ee76f0ee9e79e.tar.gz
6 files changed, 285 insertions, 65 deletions
diff --git a/modules/audio_processing/aec3/echo_canceller3.cc b/modules/audio_processing/aec3/echo_canceller3.cc
index a74c37e0c9..992e295dfb 100644
--- a/modules/audio_processing/aec3/echo_canceller3.cc
+++ b/modules/audio_processing/aec3/echo_canceller3.cc
@@ -704,7 +704,9 @@ EchoCanceller3::EchoCanceller3(
           config_selector_.active_config()
               .multi_channel.stereo_detection_threshold,
           config_selector_.active_config()
-              .multi_channel.stereo_detection_timeout_threshold_seconds),
+              .multi_channel.stereo_detection_timeout_threshold_seconds,
+          config_selector_.active_config()
+              .multi_channel.stereo_detection_hysteresis_seconds),
       output_framer_(num_bands_, num_capture_channels_),
       capture_blocker_(num_bands_, num_capture_channels_),
       render_transfer_queue_(
@@ -772,12 +774,12 @@ void EchoCanceller3::Initialize() {
   RTC_DCHECK_RUNS_SERIALIZED(&capture_race_checker_);
 
   num_render_channels_to_aec_ =
-      multichannel_content_detector_.IsMultiChannelContentDetected()
+      multichannel_content_detector_.IsProperMultiChannelContentDetected()
           ? num_render_input_channels_
           : 1;
 
   config_selector_.Update(
-      multichannel_content_detector_.IsMultiChannelContentDetected());
+      multichannel_content_detector_.IsProperMultiChannelContentDetected());
 
   for (std::vector<std::vector<float>>& block_band : render_block_) {
     block_band.resize(num_render_channels_to_aec_);
diff --git a/modules/audio_processing/aec3/echo_canceller3.h b/modules/audio_processing/aec3/echo_canceller3.h
index ba5895f34a..831a7c738a 100644
--- a/modules/audio_processing/aec3/echo_canceller3.h
+++ b/modules/audio_processing/aec3/echo_canceller3.h
@@ -144,6 +144,8 @@ class EchoCanceller3 : public EchoControl {
   FRIEND_TEST_ALL_PREFIXES(EchoCanceller3,
                            DetectionOfProperStereoUsingThreshold);
   FRIEND_TEST_ALL_PREFIXES(EchoCanceller3,
+                           DetectionOfProperStereoUsingHysteresis);
+  FRIEND_TEST_ALL_PREFIXES(EchoCanceller3,
                            StereoContentDetectionForMonoSignals);
 
   class RenderWriter;
@@ -158,7 +160,7 @@ class EchoCanceller3 : public EchoControl {
 
   // Only for testing. Returns whether stereo processing is active.
   bool StereoRenderProcessingActiveForTesting() const {
-    return multichannel_content_detector_.IsMultiChannelContentDetected();
+    return multichannel_content_detector_.IsProperMultiChannelContentDetected();
   }
 
   // Only for testing.
diff --git a/modules/audio_processing/aec3/echo_canceller3_unittest.cc b/modules/audio_processing/aec3/echo_canceller3_unittest.cc
index 81cefb6f92..9a2df48dde 100644
--- a/modules/audio_processing/aec3/echo_canceller3_unittest.cc
+++ b/modules/audio_processing/aec3/echo_canceller3_unittest.cc
@@ -947,6 +947,7 @@ TEST(EchoCanceller3, DetectionOfProperStereo) {
 
   mono_config.multi_channel.detect_stereo_content = true;
   mono_config.multi_channel.stereo_detection_threshold = 0.0f;
+  mono_config.multi_channel.stereo_detection_hysteresis_seconds = 0.0f;
   multichannel_config = mono_config;
   mono_config.filter.coarse_initial.length_blocks = kNumBlocksForMonoConfig;
   multichannel_config->filter.coarse_initial.length_blocks =
@@ -994,6 +995,7 @@ TEST(EchoCanceller3, DetectionOfProperStereoUsingThreshold) {
   mono_config.multi_channel.detect_stereo_content = true;
   mono_config.multi_channel.stereo_detection_threshold =
       kStereoDetectionThreshold;
+  mono_config.multi_channel.stereo_detection_hysteresis_seconds = 0.0f;
   multichannel_config = mono_config;
   mono_config.filter.coarse_initial.length_blocks = kNumBlocksForMonoConfig;
   multichannel_config->filter.coarse_initial.length_blocks =
@@ -1024,6 +1026,64 @@ TEST(EchoCanceller3, DetectionOfProperStereoUsingThreshold) {
       kNumBlocksForSurroundConfig);
 }
 
+TEST(EchoCanceller3, DetectionOfProperStereoUsingHysteresis) {
+  constexpr int kSampleRateHz = 16000;
+  constexpr int kNumChannels = 2;
+  AudioBuffer buffer(/*input_rate=*/kSampleRateHz,
+                     /*input_num_channels=*/kNumChannels,
+                     /*input_rate=*/kSampleRateHz,
+                     /*buffer_num_channels=*/kNumChannels,
+                     /*output_rate=*/kSampleRateHz,
+                     /*output_num_channels=*/kNumChannels);
+
+  constexpr size_t kNumBlocksForMonoConfig = 1;
+  constexpr size_t kNumBlocksForSurroundConfig = 2;
+  EchoCanceller3Config mono_config;
+  absl::optional<EchoCanceller3Config> surround_config;
+
+  mono_config.multi_channel.detect_stereo_content = true;
+  mono_config.multi_channel.stereo_detection_hysteresis_seconds = 0.5f;
+  surround_config = mono_config;
+  mono_config.filter.coarse_initial.length_blocks = kNumBlocksForMonoConfig;
+  surround_config->filter.coarse_initial.length_blocks =
+      kNumBlocksForSurroundConfig;
+
+  EchoCanceller3 aec3(mono_config, surround_config,
+                      /*sample_rate_hz=*/kSampleRateHz,
+                      /*num_render_channels=*/kNumChannels,
+                      /*num_capture_input_channels=*/kNumChannels);
+
+  EXPECT_FALSE(aec3.StereoRenderProcessingActiveForTesting());
+  EXPECT_EQ(
+      aec3.GetActiveConfigForTesting().filter.coarse_initial.length_blocks,
+      kNumBlocksForMonoConfig);
+
+  RunAecInStereo(buffer, aec3, 100.0f, 100.0f);
+  EXPECT_FALSE(aec3.StereoRenderProcessingActiveForTesting());
+  EXPECT_EQ(
+      aec3.GetActiveConfigForTesting().filter.coarse_initial.length_blocks,
+      kNumBlocksForMonoConfig);
+
+  constexpr int kNumFramesPerSecond = 100;
+  for (int k = 0;
+       k < static_cast<int>(
+               kNumFramesPerSecond *
+               mono_config.multi_channel.stereo_detection_hysteresis_seconds);
+       ++k) {
+    RunAecInStereo(buffer, aec3, 100.0f, 101.0f);
+    EXPECT_FALSE(aec3.StereoRenderProcessingActiveForTesting());
+    EXPECT_EQ(
+        aec3.GetActiveConfigForTesting().filter.coarse_initial.length_blocks,
+        kNumBlocksForMonoConfig);
+  }
+
+  RunAecInStereo(buffer, aec3, 100.0f, 101.0f);
+  EXPECT_TRUE(aec3.StereoRenderProcessingActiveForTesting());
+  EXPECT_EQ(
+      aec3.GetActiveConfigForTesting().filter.coarse_initial.length_blocks,
+      kNumBlocksForSurroundConfig);
+}
+
 TEST(EchoCanceller3, StereoContentDetectionForMonoSignals) {
   constexpr int kSampleRateHz = 16000;
   constexpr int kNumChannels = 2;
diff --git a/modules/audio_processing/aec3/multi_channel_content_detector.cc b/modules/audio_processing/aec3/multi_channel_content_detector.cc
index 270316ebc1..8d1bd9108c 100644
--- a/modules/audio_processing/aec3/multi_channel_content_detector.cc
+++ b/modules/audio_processing/aec3/multi_channel_content_detector.cc
@@ -13,6 +13,8 @@
 
 #include <cmath>
 
+#include "rtc_base/checks.h"
+
 namespace webrtc {
 
 namespace {
@@ -23,8 +25,8 @@ constexpr int kNumFramesPerSecond = 100;
 // whether the signal is a proper stereo signal. To allow for differences
 // introduced by hardware drivers, a threshold `detection_threshold` is used for
 // the detection.
-bool IsProperStereo(const std::vector<std::vector<std::vector<float>>>& frame,
-                    float detection_threshold) {
+bool HasStereoContent(const std::vector<std::vector<std::vector<float>>>& frame,
+                      float detection_threshold) {
   if (frame[0].size() < 2) {
     return false;
   }
@@ -46,7 +48,8 @@ MultiChannelContentDetector::MultiChannelContentDetector(
     bool detect_stereo_content,
     int num_render_input_channels,
     float detection_threshold,
-    int stereo_detection_timeout_threshold_seconds)
+    int stereo_detection_timeout_threshold_seconds,
+    float stereo_detection_hysteresis_seconds)
     : detect_stereo_content_(detect_stereo_content),
       detection_threshold_(detection_threshold),
       detection_timeout_threshold_frames_(
@@ -54,29 +57,46 @@ MultiChannelContentDetector::MultiChannelContentDetector(
               ? absl::make_optional(stereo_detection_timeout_threshold_seconds *
                                     kNumFramesPerSecond)
               : absl::nullopt),
-      proper_multichannel_content_detected_(!detect_stereo_content &&
-                                            num_render_input_channels > 1) {}
+      stereo_detection_hysteresis_frames_(static_cast<int>(
+          stereo_detection_hysteresis_seconds * kNumFramesPerSecond)),
+      persistent_multichannel_content_detected_(
+          !detect_stereo_content && num_render_input_channels > 1) {}
 
 bool MultiChannelContentDetector::UpdateDetection(
     const std::vector<std::vector<std::vector<float>>>& frame) {
-  if (!detect_stereo_content_)
+  if (!detect_stereo_content_) {
+    RTC_DCHECK_EQ(frame[0].size() > 1,
+                  persistent_multichannel_content_detected_);
     return false;
+  }
 
-  const bool previous_proper_multichannel_content_detected =
-      proper_multichannel_content_detected_;
-
-  if (IsProperStereo(frame, detection_threshold_)) {
-    proper_multichannel_content_detected_ = true;
-    frames_since_stereo_detected_ = 0;
-  } else {
-    ++frames_since_stereo_detected_;
-    if (detection_timeout_threshold_frames_ &&
-        frames_since_stereo_detected_ >= *detection_timeout_threshold_frames_) {
-      proper_multichannel_content_detected_ = false;
-    }
+  const bool previous_persistent_multichannel_content_detected =
+      persistent_multichannel_content_detected_;
+  const bool stereo_detected_in_frame =
+      HasStereoContent(frame, detection_threshold_);
+
+  consecutive_frames_with_stereo_ =
+      stereo_detected_in_frame ? consecutive_frames_with_stereo_ + 1 : 0;
+  frames_since_stereo_detected_last_ =
+      stereo_detected_in_frame ? 0 : frames_since_stereo_detected_last_ + 1;
+
+  // Detect persistent multichannel content.
+  if (consecutive_frames_with_stereo_ > stereo_detection_hysteresis_frames_) {
+    persistent_multichannel_content_detected_ = true;
+  }
+  if (detection_timeout_threshold_frames_.has_value() &&
+      frames_since_stereo_detected_last_ >=
+          *detection_timeout_threshold_frames_) {
+    persistent_multichannel_content_detected_ = false;
   }
-  return previous_proper_multichannel_content_detected !=
-         proper_multichannel_content_detected_;
+
+  // Detect temporary multichannel content.
+  temporary_multichannel_content_detected_ =
+      persistent_multichannel_content_detected_ ? false
+                                                : stereo_detected_in_frame;
+
+  return previous_persistent_multichannel_content_detected !=
+         persistent_multichannel_content_detected_;
 }
 
 }  // namespace webrtc
diff --git a/modules/audio_processing/aec3/multi_channel_content_detector.h b/modules/audio_processing/aec3/multi_channel_content_detector.h
index e4f3f09ba1..3120502258 100644
--- a/modules/audio_processing/aec3/multi_channel_content_detector.h
+++ b/modules/audio_processing/aec3/multi_channel_content_detector.h
@@ -31,24 +31,33 @@ class MultiChannelContentDetector {
   MultiChannelContentDetector(bool detect_stereo_content,
                               int num_render_input_channels,
                               float detection_threshold,
-                              int stereo_detection_timeout_threshold_seconds);
+                              int stereo_detection_timeout_threshold_seconds,
+                              float stereo_detection_hysteresis_seconds);
 
   // Compares the left and right channels in the render `frame` to determine
   // whether the signal is a proper multichannel signal. Returns a bool
-  // indicating whether a change in the multichannel was detected.
+  // indicating whether a change in the proper multichannel content was
+  // detected.
   bool UpdateDetection(
       const std::vector<std::vector<std::vector<float>>>& frame);
 
-  bool IsMultiChannelContentDetected() const {
-    return proper_multichannel_content_detected_;
+  bool IsProperMultiChannelContentDetected() const {
+    return persistent_multichannel_content_detected_;
+  }
+
+  bool IsTemporaryMultiChannelContentDetectedForTesting() const {
+    return temporary_multichannel_content_detected_;
   }
 
  private:
   const bool detect_stereo_content_;
   const float detection_threshold_;
   const absl::optional<int> detection_timeout_threshold_frames_;
-  bool proper_multichannel_content_detected_;
-  int frames_since_stereo_detected_ = 0;
+  const int stereo_detection_hysteresis_frames_;
+  bool persistent_multichannel_content_detected_;
+  bool temporary_multichannel_content_detected_ = false;
+  int64_t frames_since_stereo_detected_last_ = 0;
+  int64_t consecutive_frames_with_stereo_ = 0;
 };
 
 }  // namespace webrtc
diff --git a/modules/audio_processing/aec3/multi_channel_content_detector_unittest.cc b/modules/audio_processing/aec3/multi_channel_content_detector_unittest.cc
index cb495e859f..0857bee4bd 100644
--- a/modules/audio_processing/aec3/multi_channel_content_detector_unittest.cc
+++ b/modules/audio_processing/aec3/multi_channel_content_detector_unittest.cc
@@ -19,8 +19,9 @@ TEST(MultiChannelContentDetector, HandlingOfMono) {
       /*detect_stereo_content=*/true,
       /*num_render_input_channels=*/1,
       /*detection_threshold=*/0.0f,
-      /*stereo_detection_timeout_threshold_seconds=*/0);
-  EXPECT_FALSE(mc.IsMultiChannelContentDetected());
+      /*stereo_detection_timeout_threshold_seconds=*/0,
+      /*stereo_detection_hysteresis_seconds=*/0.0f);
+  EXPECT_FALSE(mc.IsProperMultiChannelContentDetected());
 }
 
 TEST(MultiChannelContentDetector, HandlingOfMonoAndDetectionOff) {
@@ -28,8 +29,9 @@ TEST(MultiChannelContentDetector, HandlingOfMonoAndDetectionOff) {
       /*detect_stereo_content=*/false,
       /*num_render_input_channels=*/1,
       /*detection_threshold=*/0.0f,
-      /*stereo_detection_timeout_threshold_seconds=*/0);
-  EXPECT_FALSE(mc.IsMultiChannelContentDetected());
+      /*stereo_detection_timeout_threshold_seconds=*/0,
+      /*stereo_detection_hysteresis_seconds=*/0.0f);
+  EXPECT_FALSE(mc.IsProperMultiChannelContentDetected());
 }
 
 TEST(MultiChannelContentDetector, HandlingOfDetectionOff) {
@@ -37,8 +39,9 @@ TEST(MultiChannelContentDetector, HandlingOfDetectionOff) {
       /*detect_stereo_content=*/false,
       /*num_render_input_channels=*/2,
       /*detection_threshold=*/0.0f,
-      /*stereo_detection_timeout_threshold_seconds=*/0);
-  EXPECT_TRUE(mc.IsMultiChannelContentDetected());
+      /*stereo_detection_timeout_threshold_seconds=*/0,
+      /*stereo_detection_hysteresis_seconds=*/0.0f);
+  EXPECT_TRUE(mc.IsProperMultiChannelContentDetected());
 
   std::vector<std::vector<std::vector<float>>> frame(
       1, std::vector<std::vector<float>>(2, std::vector<float>(160, 0.0f)));
@@ -46,7 +49,7 @@ TEST(MultiChannelContentDetector, HandlingOfDetectionOff) {
   std::fill(frame[0][1].begin(), frame[0][1].end(), 101.0f);
 
   EXPECT_FALSE(mc.UpdateDetection(frame));
-  EXPECT_TRUE(mc.IsMultiChannelContentDetected());
+  EXPECT_TRUE(mc.IsProperMultiChannelContentDetected());
 
   EXPECT_FALSE(mc.UpdateDetection(frame));
 }
@@ -56,8 +59,9 @@ TEST(MultiChannelContentDetector, InitialDetectionOfStereo) {
       /*detect_stereo_content=*/true,
       /*num_render_input_channels=*/2,
       /*detection_threshold=*/0.0f,
-      /*stereo_detection_timeout_threshold_seconds=*/0);
-  EXPECT_FALSE(mc.IsMultiChannelContentDetected());
+      /*stereo_detection_timeout_threshold_seconds=*/0,
+      /*stereo_detection_hysteresis_seconds=*/0.0f);
+  EXPECT_FALSE(mc.IsProperMultiChannelContentDetected());
 }
 
 TEST(MultiChannelContentDetector, DetectionWhenFakeStereo) {
@@ -65,13 +69,14 @@ TEST(MultiChannelContentDetector, DetectionWhenFakeStereo) {
       /*detect_stereo_content=*/true,
       /*num_render_input_channels=*/2,
       /*detection_threshold=*/0.0f,
-      /*stereo_detection_timeout_threshold_seconds=*/0);
+      /*stereo_detection_timeout_threshold_seconds=*/0,
+      /*stereo_detection_hysteresis_seconds=*/0.0f);
   std::vector<std::vector<std::vector<float>>> frame(
       1, std::vector<std::vector<float>>(2, std::vector<float>(160, 0.0f)));
   std::fill(frame[0][0].begin(), frame[0][0].end(), 100.0f);
   std::fill(frame[0][1].begin(), frame[0][1].end(), 100.0f);
   EXPECT_FALSE(mc.UpdateDetection(frame));
-  EXPECT_FALSE(mc.IsMultiChannelContentDetected());
+  EXPECT_FALSE(mc.IsProperMultiChannelContentDetected());
 
   EXPECT_FALSE(mc.UpdateDetection(frame));
 }
@@ -81,13 +86,14 @@ TEST(MultiChannelContentDetector, DetectionWhenStereo) {
       /*detect_stereo_content=*/true,
       /*num_render_input_channels=*/2,
       /*detection_threshold=*/0.0f,
-      /*stereo_detection_timeout_threshold_seconds=*/0);
+      /*stereo_detection_timeout_threshold_seconds=*/0,
+      /*stereo_detection_hysteresis_seconds=*/0.0f);
   std::vector<std::vector<std::vector<float>>> frame(
       1, std::vector<std::vector<float>>(2, std::vector<float>(160, 0.0f)));
   std::fill(frame[0][0].begin(), frame[0][0].end(), 100.0f);
   std::fill(frame[0][1].begin(), frame[0][1].end(), 101.0f);
   EXPECT_TRUE(mc.UpdateDetection(frame));
-  EXPECT_TRUE(mc.IsMultiChannelContentDetected());
+  EXPECT_TRUE(mc.IsProperMultiChannelContentDetected());
 
   EXPECT_FALSE(mc.UpdateDetection(frame));
 }
@@ -97,14 +103,15 @@ TEST(MultiChannelContentDetector, DetectionWhenStereoAfterAWhile) {
       /*detect_stereo_content=*/true,
       /*num_render_input_channels=*/2,
       /*detection_threshold=*/0.0f,
-      /*stereo_detection_timeout_threshold_seconds=*/0);
+      /*stereo_detection_timeout_threshold_seconds=*/0,
+      /*stereo_detection_hysteresis_seconds=*/0.0f);
   std::vector<std::vector<std::vector<float>>> frame(
       1, std::vector<std::vector<float>>(2, std::vector<float>(160, 0.0f)));
 
   std::fill(frame[0][0].begin(), frame[0][0].end(), 100.0f);
   std::fill(frame[0][1].begin(), frame[0][1].end(), 100.0f);
   EXPECT_FALSE(mc.UpdateDetection(frame));
-  EXPECT_FALSE(mc.IsMultiChannelContentDetected());
+  EXPECT_FALSE(mc.IsProperMultiChannelContentDetected());
 
   EXPECT_FALSE(mc.UpdateDetection(frame));
 
@@ -112,7 +119,7 @@ TEST(MultiChannelContentDetector, DetectionWhenStereoAfterAWhile) {
   std::fill(frame[0][1].begin(), frame[0][1].end(), 101.0f);
 
   EXPECT_TRUE(mc.UpdateDetection(frame));
-  EXPECT_TRUE(mc.IsMultiChannelContentDetected());
+  EXPECT_TRUE(mc.IsProperMultiChannelContentDetected());
 
   EXPECT_FALSE(mc.UpdateDetection(frame));
 }
@@ -123,14 +130,15 @@ TEST(MultiChannelContentDetector, DetectionWithStereoBelowThreshold) {
       /*detect_stereo_content=*/true,
       /*num_render_input_channels=*/2,
       /*detection_threshold=*/kThreshold,
-      /*stereo_detection_timeout_threshold_seconds=*/0);
+      /*stereo_detection_timeout_threshold_seconds=*/0,
+      /*stereo_detection_hysteresis_seconds=*/0.0f);
   std::vector<std::vector<std::vector<float>>> frame(
       1, std::vector<std::vector<float>>(2, std::vector<float>(160, 0.0f)));
   std::fill(frame[0][0].begin(), frame[0][0].end(), 100.0f);
   std::fill(frame[0][1].begin(), frame[0][1].end(), 100.0f + kThreshold);
 
   EXPECT_FALSE(mc.UpdateDetection(frame));
-  EXPECT_FALSE(mc.IsMultiChannelContentDetected());
+  EXPECT_FALSE(mc.IsProperMultiChannelContentDetected());
 
   EXPECT_FALSE(mc.UpdateDetection(frame));
 }
@@ -141,14 +149,15 @@ TEST(MultiChannelContentDetector, DetectionWithStereoAboveThreshold) {
       /*detect_stereo_content=*/true,
       /*num_render_input_channels=*/2,
       /*detection_threshold=*/kThreshold,
-      /*stereo_detection_timeout_threshold_seconds=*/0);
+      /*stereo_detection_timeout_threshold_seconds=*/0,
+      /*stereo_detection_hysteresis_seconds=*/0.0f);
   std::vector<std::vector<std::vector<float>>> frame(
       1, std::vector<std::vector<float>>(2, std::vector<float>(160, 0.0f)));
   std::fill(frame[0][0].begin(), frame[0][0].end(), 100.0f);
   std::fill(frame[0][1].begin(), frame[0][1].end(), 100.0f + kThreshold + 0.1f);
 
   EXPECT_TRUE(mc.UpdateDetection(frame));
-  EXPECT_TRUE(mc.IsMultiChannelContentDetected());
+  EXPECT_TRUE(mc.IsProperMultiChannelContentDetected());
 
   EXPECT_FALSE(mc.UpdateDetection(frame));
 }
@@ -166,16 +175,16 @@ TEST_P(MultiChannelContentDetectorTimeoutBehavior,
        TimeOutBehaviorForNonTrueStereo) {
   constexpr int kNumFramesPerSecond = 100;
   const bool detect_stereo_content = std::get<0>(GetParam());
-  const int stereo_stereo_detection_timeout_threshold_seconds =
+  const int stereo_detection_timeout_threshold_seconds =
       std::get<1>(GetParam());
   const int stereo_detection_timeout_threshold_frames =
-      stereo_stereo_detection_timeout_threshold_seconds * kNumFramesPerSecond;
+      stereo_detection_timeout_threshold_seconds * kNumFramesPerSecond;
 
-  MultiChannelContentDetector mc(
-      detect_stereo_content,
-      /*num_render_input_channels=*/2,
-      /*detection_threshold=*/0.0f,
-      stereo_stereo_detection_timeout_threshold_seconds);
+  MultiChannelContentDetector mc(detect_stereo_content,
+                                 /*num_render_input_channels=*/2,
+                                 /*detection_threshold=*/0.0f,
+                                 stereo_detection_timeout_threshold_seconds,
+                                 /*stereo_detection_hysteresis_seconds=*/0.0f);
   std::vector<std::vector<std::vector<float>>> true_stereo_frame = {
       {std::vector<float>(160, 100.0f), std::vector<float>(160, 101.0f)}};
 
@@ -186,9 +195,9 @@ TEST_P(MultiChannelContentDetectorTimeoutBehavior,
   for (int k = 0; k < 10; ++k) {
     EXPECT_FALSE(mc.UpdateDetection(fake_stereo_frame));
     if (detect_stereo_content) {
-      EXPECT_FALSE(mc.IsMultiChannelContentDetected());
+      EXPECT_FALSE(mc.IsProperMultiChannelContentDetected());
     } else {
-      EXPECT_TRUE(mc.IsMultiChannelContentDetected());
+      EXPECT_TRUE(mc.IsProperMultiChannelContentDetected());
     }
   }
 
@@ -198,21 +207,21 @@ TEST_P(MultiChannelContentDetectorTimeoutBehavior,
   } else {
     EXPECT_FALSE(mc.UpdateDetection(true_stereo_frame));
   }
-  EXPECT_TRUE(mc.IsMultiChannelContentDetected());
+  EXPECT_TRUE(mc.IsProperMultiChannelContentDetected());
 
   // Pass fake stereo frames until any timeouts are about to occur.
   for (int k = 0; k < stereo_detection_timeout_threshold_frames - 1; ++k) {
     EXPECT_FALSE(mc.UpdateDetection(fake_stereo_frame));
-    EXPECT_TRUE(mc.IsMultiChannelContentDetected());
+    EXPECT_TRUE(mc.IsProperMultiChannelContentDetected());
   }
 
   // Pass a fake stereo frame and verify that any timeouts properly occur.
   if (detect_stereo_content && stereo_detection_timeout_threshold_frames > 0) {
     EXPECT_TRUE(mc.UpdateDetection(fake_stereo_frame));
-    EXPECT_FALSE(mc.IsMultiChannelContentDetected());
+    EXPECT_FALSE(mc.IsProperMultiChannelContentDetected());
   } else {
     EXPECT_FALSE(mc.UpdateDetection(fake_stereo_frame));
-    EXPECT_TRUE(mc.IsMultiChannelContentDetected());
+    EXPECT_TRUE(mc.IsProperMultiChannelContentDetected());
   }
 
   // Pass fake stereo frames and verify the behavior after any timeout.
@@ -220,11 +229,129 @@ TEST_P(MultiChannelContentDetectorTimeoutBehavior,
     EXPECT_FALSE(mc.UpdateDetection(fake_stereo_frame));
     if (detect_stereo_content &&
         stereo_detection_timeout_threshold_frames > 0) {
-      EXPECT_FALSE(mc.IsMultiChannelContentDetected());
+      EXPECT_FALSE(mc.IsProperMultiChannelContentDetected());
+    } else {
+      EXPECT_TRUE(mc.IsProperMultiChannelContentDetected());
+    }
+  }
+}
+
+class MultiChannelContentDetectorHysteresisBehavior
+    : public ::testing::Test,
+      public ::testing::WithParamInterface<std::tuple<bool, float>> {};
+
+INSTANTIATE_TEST_SUITE_P(
+    MultiChannelContentDetector,
+    MultiChannelContentDetectorHysteresisBehavior,
+    ::testing::Combine(::testing::Values(false, true),
+                       ::testing::Values(0.0f, 0.1f, 0.2f)));
+
+TEST_P(MultiChannelContentDetectorHysteresisBehavior,
+       PeriodBeforeStereoDetectionIsTriggered) {
+  constexpr int kNumFramesPerSecond = 100;
+  const bool detect_stereo_content = std::get<0>(GetParam());
+  const int stereo_detection_hysteresis_seconds = std::get<1>(GetParam());
+  const int stereo_detection_hysteresis_frames =
+      stereo_detection_hysteresis_seconds * kNumFramesPerSecond;
+
+  MultiChannelContentDetector mc(
+      detect_stereo_content,
+      /*num_render_input_channels=*/2,
+      /*detection_threshold=*/0.0f,
+      /*stereo_detection_timeout_threshold_seconds=*/0,
+      stereo_detection_hysteresis_seconds);
+  std::vector<std::vector<std::vector<float>>> true_stereo_frame = {
+      {std::vector<float>(160, 100.0f), std::vector<float>(160, 101.0f)}};
+
+  std::vector<std::vector<std::vector<float>>> fake_stereo_frame = {
+      {std::vector<float>(160, 100.0f), std::vector<float>(160, 100.0f)}};
+
+  // Pass fake stereo frames and verify the content detection.
+  for (int k = 0; k < 10; ++k) {
+    EXPECT_FALSE(mc.UpdateDetection(fake_stereo_frame));
+    if (detect_stereo_content) {
+      EXPECT_FALSE(mc.IsProperMultiChannelContentDetected());
+    } else {
+      EXPECT_TRUE(mc.IsProperMultiChannelContentDetected());
+    }
+    EXPECT_FALSE(mc.IsTemporaryMultiChannelContentDetectedForTesting());
+  }
+
+  // Pass a two true stereo frames and verify that they are properly detected.
+  ASSERT_TRUE(stereo_detection_hysteresis_frames > 2 ||
+              stereo_detection_hysteresis_frames == 0);
+  for (int k = 0; k < 2; ++k) {
+    if (detect_stereo_content) {
+      if (stereo_detection_hysteresis_seconds == 0.0f) {
+        if (k == 0) {
+          EXPECT_TRUE(mc.UpdateDetection(true_stereo_frame));
+        } else {
+          EXPECT_FALSE(mc.UpdateDetection(true_stereo_frame));
+        }
+        EXPECT_TRUE(mc.IsProperMultiChannelContentDetected());
+        EXPECT_FALSE(mc.IsTemporaryMultiChannelContentDetectedForTesting());
+      } else {
+        EXPECT_FALSE(mc.UpdateDetection(true_stereo_frame));
+        EXPECT_FALSE(mc.IsProperMultiChannelContentDetected());
+        EXPECT_TRUE(mc.IsTemporaryMultiChannelContentDetectedForTesting());
+      }
     } else {
-      EXPECT_TRUE(mc.IsMultiChannelContentDetected());
+      EXPECT_FALSE(mc.UpdateDetection(true_stereo_frame));
+      EXPECT_TRUE(mc.IsProperMultiChannelContentDetected());
+      EXPECT_FALSE(mc.IsTemporaryMultiChannelContentDetectedForTesting());
     }
   }
+
+  if (stereo_detection_hysteresis_seconds == 0.0f) {
+    return;
+  }
+
+  // Pass true stereo frames until any timeouts are about to occur.
+  for (int k = 0; k < stereo_detection_hysteresis_frames - 3; ++k) {
+    if (detect_stereo_content) {
+      EXPECT_FALSE(mc.UpdateDetection(true_stereo_frame));
+      EXPECT_FALSE(mc.IsProperMultiChannelContentDetected());
+      EXPECT_TRUE(mc.IsTemporaryMultiChannelContentDetectedForTesting());
+    } else {
+      EXPECT_FALSE(mc.UpdateDetection(true_stereo_frame));
+      EXPECT_TRUE(mc.IsProperMultiChannelContentDetected());
+      EXPECT_FALSE(mc.IsTemporaryMultiChannelContentDetectedForTesting());
+    }
+  }
+
+  // Pass a true stereo frame and verify that it is properly detected.
+  if (detect_stereo_content) {
+    EXPECT_TRUE(mc.UpdateDetection(true_stereo_frame));
+    EXPECT_TRUE(mc.IsProperMultiChannelContentDetected());
+    EXPECT_FALSE(mc.IsTemporaryMultiChannelContentDetectedForTesting());
+  } else {
+    EXPECT_FALSE(mc.UpdateDetection(true_stereo_frame));
+    EXPECT_TRUE(mc.IsProperMultiChannelContentDetected());
+    EXPECT_FALSE(mc.IsTemporaryMultiChannelContentDetectedForTesting());
+  }
+
+  // Pass an additional true stereo frame and verify that it is properly
+  // detected.
+  if (detect_stereo_content) {
+    EXPECT_FALSE(mc.UpdateDetection(true_stereo_frame));
+    EXPECT_TRUE(mc.IsProperMultiChannelContentDetected());
+    EXPECT_FALSE(mc.IsTemporaryMultiChannelContentDetectedForTesting());
+  } else {
+    EXPECT_FALSE(mc.UpdateDetection(true_stereo_frame));
+    EXPECT_TRUE(mc.IsProperMultiChannelContentDetected());
+    EXPECT_FALSE(mc.IsTemporaryMultiChannelContentDetectedForTesting());
+  }
+
+  // Pass a fake stereo frame and verify that it is properly detected.
+  if (detect_stereo_content) {
+    EXPECT_FALSE(mc.UpdateDetection(fake_stereo_frame));
+    EXPECT_TRUE(mc.IsProperMultiChannelContentDetected());
+    EXPECT_FALSE(mc.IsTemporaryMultiChannelContentDetectedForTesting());
+  } else {
+    EXPECT_FALSE(mc.UpdateDetection(fake_stereo_frame));
+    EXPECT_TRUE(mc.IsProperMultiChannelContentDetected());
+    EXPECT_FALSE(mc.IsTemporaryMultiChannelContentDetectedForTesting());
+  }
 }
 
 }  // namespace webrtc
author	Sam Zackrisson <saza@webrtc.org>	2022-04-08 16:14:06 +0200
committer	WebRTC LUCI CQ <webrtc-scoped@luci-project-accounts.iam.gserviceaccount.com>	2022-04-08 17:01:08 +0000
commit	cf7f7f9fa0a1996f42074a6c398ee76f0ee9e79e (patch)
tree	1c2b3c12ffffe65d2448a872a176d2b2f078afc1 /modules/audio_processing/aec3
parent	fa07b43074440003a13f5974826b2ca2d0410328 (diff)
download	webrtc-cf7f7f9fa0a1996f42074a6c398ee76f0ee9e79e.tar.gz