aboutsummaryrefslogtreecommitdiff
path: root/modules/audio_processing/aec3
diff options
context:
space:
mode:
authorSam Zackrisson <saza@webrtc.org>2022-04-08 16:14:06 +0200
committerWebRTC LUCI CQ <webrtc-scoped@luci-project-accounts.iam.gserviceaccount.com>2022-04-08 17:01:08 +0000
commitcf7f7f9fa0a1996f42074a6c398ee76f0ee9e79e (patch)
tree1c2b3c12ffffe65d2448a872a176d2b2f078afc1 /modules/audio_processing/aec3
parentfa07b43074440003a13f5974826b2ca2d0410328 (diff)
downloadwebrtc-cf7f7f9fa0a1996f42074a6c398ee76f0ee9e79e.tar.gz
AEC3: Add hysteresis period before entering stereo processing
Even if playout audio is only very briefly stereo, the AEC will enter stereo processing mode. To save CPU and improve AEC performance, this CL adds a hysteresis period before treating playout as stereo. The feature is enabled by default in the AEC3 config. Bug: chromium:1295710 Change-Id: I29116ab2e7823e25a02aa3b66a1c619f1d966d9e Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/258479 Reviewed-by: Per Ã…hgren <peah@webrtc.org> Commit-Queue: Sam Zackrisson <saza@webrtc.org> Cr-Commit-Position: refs/heads/main@{#36503}
Diffstat (limited to 'modules/audio_processing/aec3')
-rw-r--r--modules/audio_processing/aec3/echo_canceller3.cc8
-rw-r--r--modules/audio_processing/aec3/echo_canceller3.h4
-rw-r--r--modules/audio_processing/aec3/echo_canceller3_unittest.cc60
-rw-r--r--modules/audio_processing/aec3/multi_channel_content_detector.cc60
-rw-r--r--modules/audio_processing/aec3/multi_channel_content_detector.h21
-rw-r--r--modules/audio_processing/aec3/multi_channel_content_detector_unittest.cc197
6 files changed, 285 insertions, 65 deletions
diff --git a/modules/audio_processing/aec3/echo_canceller3.cc b/modules/audio_processing/aec3/echo_canceller3.cc
index a74c37e0c9..992e295dfb 100644
--- a/modules/audio_processing/aec3/echo_canceller3.cc
+++ b/modules/audio_processing/aec3/echo_canceller3.cc
@@ -704,7 +704,9 @@ EchoCanceller3::EchoCanceller3(
config_selector_.active_config()
.multi_channel.stereo_detection_threshold,
config_selector_.active_config()
- .multi_channel.stereo_detection_timeout_threshold_seconds),
+ .multi_channel.stereo_detection_timeout_threshold_seconds,
+ config_selector_.active_config()
+ .multi_channel.stereo_detection_hysteresis_seconds),
output_framer_(num_bands_, num_capture_channels_),
capture_blocker_(num_bands_, num_capture_channels_),
render_transfer_queue_(
@@ -772,12 +774,12 @@ void EchoCanceller3::Initialize() {
RTC_DCHECK_RUNS_SERIALIZED(&capture_race_checker_);
num_render_channels_to_aec_ =
- multichannel_content_detector_.IsMultiChannelContentDetected()
+ multichannel_content_detector_.IsProperMultiChannelContentDetected()
? num_render_input_channels_
: 1;
config_selector_.Update(
- multichannel_content_detector_.IsMultiChannelContentDetected());
+ multichannel_content_detector_.IsProperMultiChannelContentDetected());
for (std::vector<std::vector<float>>& block_band : render_block_) {
block_band.resize(num_render_channels_to_aec_);
diff --git a/modules/audio_processing/aec3/echo_canceller3.h b/modules/audio_processing/aec3/echo_canceller3.h
index ba5895f34a..831a7c738a 100644
--- a/modules/audio_processing/aec3/echo_canceller3.h
+++ b/modules/audio_processing/aec3/echo_canceller3.h
@@ -144,6 +144,8 @@ class EchoCanceller3 : public EchoControl {
FRIEND_TEST_ALL_PREFIXES(EchoCanceller3,
DetectionOfProperStereoUsingThreshold);
FRIEND_TEST_ALL_PREFIXES(EchoCanceller3,
+ DetectionOfProperStereoUsingHysteresis);
+ FRIEND_TEST_ALL_PREFIXES(EchoCanceller3,
StereoContentDetectionForMonoSignals);
class RenderWriter;
@@ -158,7 +160,7 @@ class EchoCanceller3 : public EchoControl {
// Only for testing. Returns whether stereo processing is active.
bool StereoRenderProcessingActiveForTesting() const {
- return multichannel_content_detector_.IsMultiChannelContentDetected();
+ return multichannel_content_detector_.IsProperMultiChannelContentDetected();
}
// Only for testing.
diff --git a/modules/audio_processing/aec3/echo_canceller3_unittest.cc b/modules/audio_processing/aec3/echo_canceller3_unittest.cc
index 81cefb6f92..9a2df48dde 100644
--- a/modules/audio_processing/aec3/echo_canceller3_unittest.cc
+++ b/modules/audio_processing/aec3/echo_canceller3_unittest.cc
@@ -947,6 +947,7 @@ TEST(EchoCanceller3, DetectionOfProperStereo) {
mono_config.multi_channel.detect_stereo_content = true;
mono_config.multi_channel.stereo_detection_threshold = 0.0f;
+ mono_config.multi_channel.stereo_detection_hysteresis_seconds = 0.0f;
multichannel_config = mono_config;
mono_config.filter.coarse_initial.length_blocks = kNumBlocksForMonoConfig;
multichannel_config->filter.coarse_initial.length_blocks =
@@ -994,6 +995,7 @@ TEST(EchoCanceller3, DetectionOfProperStereoUsingThreshold) {
mono_config.multi_channel.detect_stereo_content = true;
mono_config.multi_channel.stereo_detection_threshold =
kStereoDetectionThreshold;
+ mono_config.multi_channel.stereo_detection_hysteresis_seconds = 0.0f;
multichannel_config = mono_config;
mono_config.filter.coarse_initial.length_blocks = kNumBlocksForMonoConfig;
multichannel_config->filter.coarse_initial.length_blocks =
@@ -1024,6 +1026,64 @@ TEST(EchoCanceller3, DetectionOfProperStereoUsingThreshold) {
kNumBlocksForSurroundConfig);
}
+TEST(EchoCanceller3, DetectionOfProperStereoUsingHysteresis) {
+ constexpr int kSampleRateHz = 16000;
+ constexpr int kNumChannels = 2;
+ AudioBuffer buffer(/*input_rate=*/kSampleRateHz,
+ /*input_num_channels=*/kNumChannels,
+ /*input_rate=*/kSampleRateHz,
+ /*buffer_num_channels=*/kNumChannels,
+ /*output_rate=*/kSampleRateHz,
+ /*output_num_channels=*/kNumChannels);
+
+ constexpr size_t kNumBlocksForMonoConfig = 1;
+ constexpr size_t kNumBlocksForSurroundConfig = 2;
+ EchoCanceller3Config mono_config;
+ absl::optional<EchoCanceller3Config> surround_config;
+
+ mono_config.multi_channel.detect_stereo_content = true;
+ mono_config.multi_channel.stereo_detection_hysteresis_seconds = 0.5f;
+ surround_config = mono_config;
+ mono_config.filter.coarse_initial.length_blocks = kNumBlocksForMonoConfig;
+ surround_config->filter.coarse_initial.length_blocks =
+ kNumBlocksForSurroundConfig;
+
+ EchoCanceller3 aec3(mono_config, surround_config,
+ /*sample_rate_hz=*/kSampleRateHz,
+ /*num_render_channels=*/kNumChannels,
+ /*num_capture_input_channels=*/kNumChannels);
+
+ EXPECT_FALSE(aec3.StereoRenderProcessingActiveForTesting());
+ EXPECT_EQ(
+ aec3.GetActiveConfigForTesting().filter.coarse_initial.length_blocks,
+ kNumBlocksForMonoConfig);
+
+ RunAecInStereo(buffer, aec3, 100.0f, 100.0f);
+ EXPECT_FALSE(aec3.StereoRenderProcessingActiveForTesting());
+ EXPECT_EQ(
+ aec3.GetActiveConfigForTesting().filter.coarse_initial.length_blocks,
+ kNumBlocksForMonoConfig);
+
+ constexpr int kNumFramesPerSecond = 100;
+ for (int k = 0;
+ k < static_cast<int>(
+ kNumFramesPerSecond *
+ mono_config.multi_channel.stereo_detection_hysteresis_seconds);
+ ++k) {
+ RunAecInStereo(buffer, aec3, 100.0f, 101.0f);
+ EXPECT_FALSE(aec3.StereoRenderProcessingActiveForTesting());
+ EXPECT_EQ(
+ aec3.GetActiveConfigForTesting().filter.coarse_initial.length_blocks,
+ kNumBlocksForMonoConfig);
+ }
+
+ RunAecInStereo(buffer, aec3, 100.0f, 101.0f);
+ EXPECT_TRUE(aec3.StereoRenderProcessingActiveForTesting());
+ EXPECT_EQ(
+ aec3.GetActiveConfigForTesting().filter.coarse_initial.length_blocks,
+ kNumBlocksForSurroundConfig);
+}
+
TEST(EchoCanceller3, StereoContentDetectionForMonoSignals) {
constexpr int kSampleRateHz = 16000;
constexpr int kNumChannels = 2;
diff --git a/modules/audio_processing/aec3/multi_channel_content_detector.cc b/modules/audio_processing/aec3/multi_channel_content_detector.cc
index 270316ebc1..8d1bd9108c 100644
--- a/modules/audio_processing/aec3/multi_channel_content_detector.cc
+++ b/modules/audio_processing/aec3/multi_channel_content_detector.cc
@@ -13,6 +13,8 @@
#include <cmath>
+#include "rtc_base/checks.h"
+
namespace webrtc {
namespace {
@@ -23,8 +25,8 @@ constexpr int kNumFramesPerSecond = 100;
// whether the signal is a proper stereo signal. To allow for differences
// introduced by hardware drivers, a threshold `detection_threshold` is used for
// the detection.
-bool IsProperStereo(const std::vector<std::vector<std::vector<float>>>& frame,
- float detection_threshold) {
+bool HasStereoContent(const std::vector<std::vector<std::vector<float>>>& frame,
+ float detection_threshold) {
if (frame[0].size() < 2) {
return false;
}
@@ -46,7 +48,8 @@ MultiChannelContentDetector::MultiChannelContentDetector(
bool detect_stereo_content,
int num_render_input_channels,
float detection_threshold,
- int stereo_detection_timeout_threshold_seconds)
+ int stereo_detection_timeout_threshold_seconds,
+ float stereo_detection_hysteresis_seconds)
: detect_stereo_content_(detect_stereo_content),
detection_threshold_(detection_threshold),
detection_timeout_threshold_frames_(
@@ -54,29 +57,46 @@ MultiChannelContentDetector::MultiChannelContentDetector(
? absl::make_optional(stereo_detection_timeout_threshold_seconds *
kNumFramesPerSecond)
: absl::nullopt),
- proper_multichannel_content_detected_(!detect_stereo_content &&
- num_render_input_channels > 1) {}
+ stereo_detection_hysteresis_frames_(static_cast<int>(
+ stereo_detection_hysteresis_seconds * kNumFramesPerSecond)),
+ persistent_multichannel_content_detected_(
+ !detect_stereo_content && num_render_input_channels > 1) {}
bool MultiChannelContentDetector::UpdateDetection(
const std::vector<std::vector<std::vector<float>>>& frame) {
- if (!detect_stereo_content_)
+ if (!detect_stereo_content_) {
+ RTC_DCHECK_EQ(frame[0].size() > 1,
+ persistent_multichannel_content_detected_);
return false;
+ }
- const bool previous_proper_multichannel_content_detected =
- proper_multichannel_content_detected_;
-
- if (IsProperStereo(frame, detection_threshold_)) {
- proper_multichannel_content_detected_ = true;
- frames_since_stereo_detected_ = 0;
- } else {
- ++frames_since_stereo_detected_;
- if (detection_timeout_threshold_frames_ &&
- frames_since_stereo_detected_ >= *detection_timeout_threshold_frames_) {
- proper_multichannel_content_detected_ = false;
- }
+ const bool previous_persistent_multichannel_content_detected =
+ persistent_multichannel_content_detected_;
+ const bool stereo_detected_in_frame =
+ HasStereoContent(frame, detection_threshold_);
+
+ consecutive_frames_with_stereo_ =
+ stereo_detected_in_frame ? consecutive_frames_with_stereo_ + 1 : 0;
+ frames_since_stereo_detected_last_ =
+ stereo_detected_in_frame ? 0 : frames_since_stereo_detected_last_ + 1;
+
+ // Detect persistent multichannel content.
+ if (consecutive_frames_with_stereo_ > stereo_detection_hysteresis_frames_) {
+ persistent_multichannel_content_detected_ = true;
+ }
+ if (detection_timeout_threshold_frames_.has_value() &&
+ frames_since_stereo_detected_last_ >=
+ *detection_timeout_threshold_frames_) {
+ persistent_multichannel_content_detected_ = false;
}
- return previous_proper_multichannel_content_detected !=
- proper_multichannel_content_detected_;
+
+ // Detect temporary multichannel content.
+ temporary_multichannel_content_detected_ =
+ persistent_multichannel_content_detected_ ? false
+ : stereo_detected_in_frame;
+
+ return previous_persistent_multichannel_content_detected !=
+ persistent_multichannel_content_detected_;
}
} // namespace webrtc
diff --git a/modules/audio_processing/aec3/multi_channel_content_detector.h b/modules/audio_processing/aec3/multi_channel_content_detector.h
index e4f3f09ba1..3120502258 100644
--- a/modules/audio_processing/aec3/multi_channel_content_detector.h
+++ b/modules/audio_processing/aec3/multi_channel_content_detector.h
@@ -31,24 +31,33 @@ class MultiChannelContentDetector {
MultiChannelContentDetector(bool detect_stereo_content,
int num_render_input_channels,
float detection_threshold,
- int stereo_detection_timeout_threshold_seconds);
+ int stereo_detection_timeout_threshold_seconds,
+ float stereo_detection_hysteresis_seconds);
// Compares the left and right channels in the render `frame` to determine
// whether the signal is a proper multichannel signal. Returns a bool
- // indicating whether a change in the multichannel was detected.
+ // indicating whether a change in the proper multichannel content was
+ // detected.
bool UpdateDetection(
const std::vector<std::vector<std::vector<float>>>& frame);
- bool IsMultiChannelContentDetected() const {
- return proper_multichannel_content_detected_;
+ bool IsProperMultiChannelContentDetected() const {
+ return persistent_multichannel_content_detected_;
+ }
+
+ bool IsTemporaryMultiChannelContentDetectedForTesting() const {
+ return temporary_multichannel_content_detected_;
}
private:
const bool detect_stereo_content_;
const float detection_threshold_;
const absl::optional<int> detection_timeout_threshold_frames_;
- bool proper_multichannel_content_detected_;
- int frames_since_stereo_detected_ = 0;
+ const int stereo_detection_hysteresis_frames_;
+ bool persistent_multichannel_content_detected_;
+ bool temporary_multichannel_content_detected_ = false;
+ int64_t frames_since_stereo_detected_last_ = 0;
+ int64_t consecutive_frames_with_stereo_ = 0;
};
} // namespace webrtc
diff --git a/modules/audio_processing/aec3/multi_channel_content_detector_unittest.cc b/modules/audio_processing/aec3/multi_channel_content_detector_unittest.cc
index cb495e859f..0857bee4bd 100644
--- a/modules/audio_processing/aec3/multi_channel_content_detector_unittest.cc
+++ b/modules/audio_processing/aec3/multi_channel_content_detector_unittest.cc
@@ -19,8 +19,9 @@ TEST(MultiChannelContentDetector, HandlingOfMono) {
/*detect_stereo_content=*/true,
/*num_render_input_channels=*/1,
/*detection_threshold=*/0.0f,
- /*stereo_detection_timeout_threshold_seconds=*/0);
- EXPECT_FALSE(mc.IsMultiChannelContentDetected());
+ /*stereo_detection_timeout_threshold_seconds=*/0,
+ /*stereo_detection_hysteresis_seconds=*/0.0f);
+ EXPECT_FALSE(mc.IsProperMultiChannelContentDetected());
}
TEST(MultiChannelContentDetector, HandlingOfMonoAndDetectionOff) {
@@ -28,8 +29,9 @@ TEST(MultiChannelContentDetector, HandlingOfMonoAndDetectionOff) {
/*detect_stereo_content=*/false,
/*num_render_input_channels=*/1,
/*detection_threshold=*/0.0f,
- /*stereo_detection_timeout_threshold_seconds=*/0);
- EXPECT_FALSE(mc.IsMultiChannelContentDetected());
+ /*stereo_detection_timeout_threshold_seconds=*/0,
+ /*stereo_detection_hysteresis_seconds=*/0.0f);
+ EXPECT_FALSE(mc.IsProperMultiChannelContentDetected());
}
TEST(MultiChannelContentDetector, HandlingOfDetectionOff) {
@@ -37,8 +39,9 @@ TEST(MultiChannelContentDetector, HandlingOfDetectionOff) {
/*detect_stereo_content=*/false,
/*num_render_input_channels=*/2,
/*detection_threshold=*/0.0f,
- /*stereo_detection_timeout_threshold_seconds=*/0);
- EXPECT_TRUE(mc.IsMultiChannelContentDetected());
+ /*stereo_detection_timeout_threshold_seconds=*/0,
+ /*stereo_detection_hysteresis_seconds=*/0.0f);
+ EXPECT_TRUE(mc.IsProperMultiChannelContentDetected());
std::vector<std::vector<std::vector<float>>> frame(
1, std::vector<std::vector<float>>(2, std::vector<float>(160, 0.0f)));
@@ -46,7 +49,7 @@ TEST(MultiChannelContentDetector, HandlingOfDetectionOff) {
std::fill(frame[0][1].begin(), frame[0][1].end(), 101.0f);
EXPECT_FALSE(mc.UpdateDetection(frame));
- EXPECT_TRUE(mc.IsMultiChannelContentDetected());
+ EXPECT_TRUE(mc.IsProperMultiChannelContentDetected());
EXPECT_FALSE(mc.UpdateDetection(frame));
}
@@ -56,8 +59,9 @@ TEST(MultiChannelContentDetector, InitialDetectionOfStereo) {
/*detect_stereo_content=*/true,
/*num_render_input_channels=*/2,
/*detection_threshold=*/0.0f,
- /*stereo_detection_timeout_threshold_seconds=*/0);
- EXPECT_FALSE(mc.IsMultiChannelContentDetected());
+ /*stereo_detection_timeout_threshold_seconds=*/0,
+ /*stereo_detection_hysteresis_seconds=*/0.0f);
+ EXPECT_FALSE(mc.IsProperMultiChannelContentDetected());
}
TEST(MultiChannelContentDetector, DetectionWhenFakeStereo) {
@@ -65,13 +69,14 @@ TEST(MultiChannelContentDetector, DetectionWhenFakeStereo) {
/*detect_stereo_content=*/true,
/*num_render_input_channels=*/2,
/*detection_threshold=*/0.0f,
- /*stereo_detection_timeout_threshold_seconds=*/0);
+ /*stereo_detection_timeout_threshold_seconds=*/0,
+ /*stereo_detection_hysteresis_seconds=*/0.0f);
std::vector<std::vector<std::vector<float>>> frame(
1, std::vector<std::vector<float>>(2, std::vector<float>(160, 0.0f)));
std::fill(frame[0][0].begin(), frame[0][0].end(), 100.0f);
std::fill(frame[0][1].begin(), frame[0][1].end(), 100.0f);
EXPECT_FALSE(mc.UpdateDetection(frame));
- EXPECT_FALSE(mc.IsMultiChannelContentDetected());
+ EXPECT_FALSE(mc.IsProperMultiChannelContentDetected());
EXPECT_FALSE(mc.UpdateDetection(frame));
}
@@ -81,13 +86,14 @@ TEST(MultiChannelContentDetector, DetectionWhenStereo) {
/*detect_stereo_content=*/true,
/*num_render_input_channels=*/2,
/*detection_threshold=*/0.0f,
- /*stereo_detection_timeout_threshold_seconds=*/0);
+ /*stereo_detection_timeout_threshold_seconds=*/0,
+ /*stereo_detection_hysteresis_seconds=*/0.0f);
std::vector<std::vector<std::vector<float>>> frame(
1, std::vector<std::vector<float>>(2, std::vector<float>(160, 0.0f)));
std::fill(frame[0][0].begin(), frame[0][0].end(), 100.0f);
std::fill(frame[0][1].begin(), frame[0][1].end(), 101.0f);
EXPECT_TRUE(mc.UpdateDetection(frame));
- EXPECT_TRUE(mc.IsMultiChannelContentDetected());
+ EXPECT_TRUE(mc.IsProperMultiChannelContentDetected());
EXPECT_FALSE(mc.UpdateDetection(frame));
}
@@ -97,14 +103,15 @@ TEST(MultiChannelContentDetector, DetectionWhenStereoAfterAWhile) {
/*detect_stereo_content=*/true,
/*num_render_input_channels=*/2,
/*detection_threshold=*/0.0f,
- /*stereo_detection_timeout_threshold_seconds=*/0);
+ /*stereo_detection_timeout_threshold_seconds=*/0,
+ /*stereo_detection_hysteresis_seconds=*/0.0f);
std::vector<std::vector<std::vector<float>>> frame(
1, std::vector<std::vector<float>>(2, std::vector<float>(160, 0.0f)));
std::fill(frame[0][0].begin(), frame[0][0].end(), 100.0f);
std::fill(frame[0][1].begin(), frame[0][1].end(), 100.0f);
EXPECT_FALSE(mc.UpdateDetection(frame));
- EXPECT_FALSE(mc.IsMultiChannelContentDetected());
+ EXPECT_FALSE(mc.IsProperMultiChannelContentDetected());
EXPECT_FALSE(mc.UpdateDetection(frame));
@@ -112,7 +119,7 @@ TEST(MultiChannelContentDetector, DetectionWhenStereoAfterAWhile) {
std::fill(frame[0][1].begin(), frame[0][1].end(), 101.0f);
EXPECT_TRUE(mc.UpdateDetection(frame));
- EXPECT_TRUE(mc.IsMultiChannelContentDetected());
+ EXPECT_TRUE(mc.IsProperMultiChannelContentDetected());
EXPECT_FALSE(mc.UpdateDetection(frame));
}
@@ -123,14 +130,15 @@ TEST(MultiChannelContentDetector, DetectionWithStereoBelowThreshold) {
/*detect_stereo_content=*/true,
/*num_render_input_channels=*/2,
/*detection_threshold=*/kThreshold,
- /*stereo_detection_timeout_threshold_seconds=*/0);
+ /*stereo_detection_timeout_threshold_seconds=*/0,
+ /*stereo_detection_hysteresis_seconds=*/0.0f);
std::vector<std::vector<std::vector<float>>> frame(
1, std::vector<std::vector<float>>(2, std::vector<float>(160, 0.0f)));
std::fill(frame[0][0].begin(), frame[0][0].end(), 100.0f);
std::fill(frame[0][1].begin(), frame[0][1].end(), 100.0f + kThreshold);
EXPECT_FALSE(mc.UpdateDetection(frame));
- EXPECT_FALSE(mc.IsMultiChannelContentDetected());
+ EXPECT_FALSE(mc.IsProperMultiChannelContentDetected());
EXPECT_FALSE(mc.UpdateDetection(frame));
}
@@ -141,14 +149,15 @@ TEST(MultiChannelContentDetector, DetectionWithStereoAboveThreshold) {
/*detect_stereo_content=*/true,
/*num_render_input_channels=*/2,
/*detection_threshold=*/kThreshold,
- /*stereo_detection_timeout_threshold_seconds=*/0);
+ /*stereo_detection_timeout_threshold_seconds=*/0,
+ /*stereo_detection_hysteresis_seconds=*/0.0f);
std::vector<std::vector<std::vector<float>>> frame(
1, std::vector<std::vector<float>>(2, std::vector<float>(160, 0.0f)));
std::fill(frame[0][0].begin(), frame[0][0].end(), 100.0f);
std::fill(frame[0][1].begin(), frame[0][1].end(), 100.0f + kThreshold + 0.1f);
EXPECT_TRUE(mc.UpdateDetection(frame));
- EXPECT_TRUE(mc.IsMultiChannelContentDetected());
+ EXPECT_TRUE(mc.IsProperMultiChannelContentDetected());
EXPECT_FALSE(mc.UpdateDetection(frame));
}
@@ -166,16 +175,16 @@ TEST_P(MultiChannelContentDetectorTimeoutBehavior,
TimeOutBehaviorForNonTrueStereo) {
constexpr int kNumFramesPerSecond = 100;
const bool detect_stereo_content = std::get<0>(GetParam());
- const int stereo_stereo_detection_timeout_threshold_seconds =
+ const int stereo_detection_timeout_threshold_seconds =
std::get<1>(GetParam());
const int stereo_detection_timeout_threshold_frames =
- stereo_stereo_detection_timeout_threshold_seconds * kNumFramesPerSecond;
+ stereo_detection_timeout_threshold_seconds * kNumFramesPerSecond;
- MultiChannelContentDetector mc(
- detect_stereo_content,
- /*num_render_input_channels=*/2,
- /*detection_threshold=*/0.0f,
- stereo_stereo_detection_timeout_threshold_seconds);
+ MultiChannelContentDetector mc(detect_stereo_content,
+ /*num_render_input_channels=*/2,
+ /*detection_threshold=*/0.0f,
+ stereo_detection_timeout_threshold_seconds,
+ /*stereo_detection_hysteresis_seconds=*/0.0f);
std::vector<std::vector<std::vector<float>>> true_stereo_frame = {
{std::vector<float>(160, 100.0f), std::vector<float>(160, 101.0f)}};
@@ -186,9 +195,9 @@ TEST_P(MultiChannelContentDetectorTimeoutBehavior,
for (int k = 0; k < 10; ++k) {
EXPECT_FALSE(mc.UpdateDetection(fake_stereo_frame));
if (detect_stereo_content) {
- EXPECT_FALSE(mc.IsMultiChannelContentDetected());
+ EXPECT_FALSE(mc.IsProperMultiChannelContentDetected());
} else {
- EXPECT_TRUE(mc.IsMultiChannelContentDetected());
+ EXPECT_TRUE(mc.IsProperMultiChannelContentDetected());
}
}
@@ -198,21 +207,21 @@ TEST_P(MultiChannelContentDetectorTimeoutBehavior,
} else {
EXPECT_FALSE(mc.UpdateDetection(true_stereo_frame));
}
- EXPECT_TRUE(mc.IsMultiChannelContentDetected());
+ EXPECT_TRUE(mc.IsProperMultiChannelContentDetected());
// Pass fake stereo frames until any timeouts are about to occur.
for (int k = 0; k < stereo_detection_timeout_threshold_frames - 1; ++k) {
EXPECT_FALSE(mc.UpdateDetection(fake_stereo_frame));
- EXPECT_TRUE(mc.IsMultiChannelContentDetected());
+ EXPECT_TRUE(mc.IsProperMultiChannelContentDetected());
}
// Pass a fake stereo frame and verify that any timeouts properly occur.
if (detect_stereo_content && stereo_detection_timeout_threshold_frames > 0) {
EXPECT_TRUE(mc.UpdateDetection(fake_stereo_frame));
- EXPECT_FALSE(mc.IsMultiChannelContentDetected());
+ EXPECT_FALSE(mc.IsProperMultiChannelContentDetected());
} else {
EXPECT_FALSE(mc.UpdateDetection(fake_stereo_frame));
- EXPECT_TRUE(mc.IsMultiChannelContentDetected());
+ EXPECT_TRUE(mc.IsProperMultiChannelContentDetected());
}
// Pass fake stereo frames and verify the behavior after any timeout.
@@ -220,11 +229,129 @@ TEST_P(MultiChannelContentDetectorTimeoutBehavior,
EXPECT_FALSE(mc.UpdateDetection(fake_stereo_frame));
if (detect_stereo_content &&
stereo_detection_timeout_threshold_frames > 0) {
- EXPECT_FALSE(mc.IsMultiChannelContentDetected());
+ EXPECT_FALSE(mc.IsProperMultiChannelContentDetected());
+ } else {
+ EXPECT_TRUE(mc.IsProperMultiChannelContentDetected());
+ }
+ }
+}
+
+class MultiChannelContentDetectorHysteresisBehavior
+ : public ::testing::Test,
+ public ::testing::WithParamInterface<std::tuple<bool, float>> {};
+
+INSTANTIATE_TEST_SUITE_P(
+ MultiChannelContentDetector,
+ MultiChannelContentDetectorHysteresisBehavior,
+ ::testing::Combine(::testing::Values(false, true),
+ ::testing::Values(0.0f, 0.1f, 0.2f)));
+
+TEST_P(MultiChannelContentDetectorHysteresisBehavior,
+ PeriodBeforeStereoDetectionIsTriggered) {
+ constexpr int kNumFramesPerSecond = 100;
+ const bool detect_stereo_content = std::get<0>(GetParam());
+ const int stereo_detection_hysteresis_seconds = std::get<1>(GetParam());
+ const int stereo_detection_hysteresis_frames =
+ stereo_detection_hysteresis_seconds * kNumFramesPerSecond;
+
+ MultiChannelContentDetector mc(
+ detect_stereo_content,
+ /*num_render_input_channels=*/2,
+ /*detection_threshold=*/0.0f,
+ /*stereo_detection_timeout_threshold_seconds=*/0,
+ stereo_detection_hysteresis_seconds);
+ std::vector<std::vector<std::vector<float>>> true_stereo_frame = {
+ {std::vector<float>(160, 100.0f), std::vector<float>(160, 101.0f)}};
+
+ std::vector<std::vector<std::vector<float>>> fake_stereo_frame = {
+ {std::vector<float>(160, 100.0f), std::vector<float>(160, 100.0f)}};
+
+ // Pass fake stereo frames and verify the content detection.
+ for (int k = 0; k < 10; ++k) {
+ EXPECT_FALSE(mc.UpdateDetection(fake_stereo_frame));
+ if (detect_stereo_content) {
+ EXPECT_FALSE(mc.IsProperMultiChannelContentDetected());
+ } else {
+ EXPECT_TRUE(mc.IsProperMultiChannelContentDetected());
+ }
+ EXPECT_FALSE(mc.IsTemporaryMultiChannelContentDetectedForTesting());
+ }
+
+ // Pass a two true stereo frames and verify that they are properly detected.
+ ASSERT_TRUE(stereo_detection_hysteresis_frames > 2 ||
+ stereo_detection_hysteresis_frames == 0);
+ for (int k = 0; k < 2; ++k) {
+ if (detect_stereo_content) {
+ if (stereo_detection_hysteresis_seconds == 0.0f) {
+ if (k == 0) {
+ EXPECT_TRUE(mc.UpdateDetection(true_stereo_frame));
+ } else {
+ EXPECT_FALSE(mc.UpdateDetection(true_stereo_frame));
+ }
+ EXPECT_TRUE(mc.IsProperMultiChannelContentDetected());
+ EXPECT_FALSE(mc.IsTemporaryMultiChannelContentDetectedForTesting());
+ } else {
+ EXPECT_FALSE(mc.UpdateDetection(true_stereo_frame));
+ EXPECT_FALSE(mc.IsProperMultiChannelContentDetected());
+ EXPECT_TRUE(mc.IsTemporaryMultiChannelContentDetectedForTesting());
+ }
} else {
- EXPECT_TRUE(mc.IsMultiChannelContentDetected());
+ EXPECT_FALSE(mc.UpdateDetection(true_stereo_frame));
+ EXPECT_TRUE(mc.IsProperMultiChannelContentDetected());
+ EXPECT_FALSE(mc.IsTemporaryMultiChannelContentDetectedForTesting());
}
}
+
+ if (stereo_detection_hysteresis_seconds == 0.0f) {
+ return;
+ }
+
+ // Pass true stereo frames until any timeouts are about to occur.
+ for (int k = 0; k < stereo_detection_hysteresis_frames - 3; ++k) {
+ if (detect_stereo_content) {
+ EXPECT_FALSE(mc.UpdateDetection(true_stereo_frame));
+ EXPECT_FALSE(mc.IsProperMultiChannelContentDetected());
+ EXPECT_TRUE(mc.IsTemporaryMultiChannelContentDetectedForTesting());
+ } else {
+ EXPECT_FALSE(mc.UpdateDetection(true_stereo_frame));
+ EXPECT_TRUE(mc.IsProperMultiChannelContentDetected());
+ EXPECT_FALSE(mc.IsTemporaryMultiChannelContentDetectedForTesting());
+ }
+ }
+
+ // Pass a true stereo frame and verify that it is properly detected.
+ if (detect_stereo_content) {
+ EXPECT_TRUE(mc.UpdateDetection(true_stereo_frame));
+ EXPECT_TRUE(mc.IsProperMultiChannelContentDetected());
+ EXPECT_FALSE(mc.IsTemporaryMultiChannelContentDetectedForTesting());
+ } else {
+ EXPECT_FALSE(mc.UpdateDetection(true_stereo_frame));
+ EXPECT_TRUE(mc.IsProperMultiChannelContentDetected());
+ EXPECT_FALSE(mc.IsTemporaryMultiChannelContentDetectedForTesting());
+ }
+
+ // Pass an additional true stereo frame and verify that it is properly
+ // detected.
+ if (detect_stereo_content) {
+ EXPECT_FALSE(mc.UpdateDetection(true_stereo_frame));
+ EXPECT_TRUE(mc.IsProperMultiChannelContentDetected());
+ EXPECT_FALSE(mc.IsTemporaryMultiChannelContentDetectedForTesting());
+ } else {
+ EXPECT_FALSE(mc.UpdateDetection(true_stereo_frame));
+ EXPECT_TRUE(mc.IsProperMultiChannelContentDetected());
+ EXPECT_FALSE(mc.IsTemporaryMultiChannelContentDetectedForTesting());
+ }
+
+ // Pass a fake stereo frame and verify that it is properly detected.
+ if (detect_stereo_content) {
+ EXPECT_FALSE(mc.UpdateDetection(fake_stereo_frame));
+ EXPECT_TRUE(mc.IsProperMultiChannelContentDetected());
+ EXPECT_FALSE(mc.IsTemporaryMultiChannelContentDetectedForTesting());
+ } else {
+ EXPECT_FALSE(mc.UpdateDetection(fake_stereo_frame));
+ EXPECT_TRUE(mc.IsProperMultiChannelContentDetected());
+ EXPECT_FALSE(mc.IsTemporaryMultiChannelContentDetectedForTesting());
+ }
}
} // namespace webrtc