aboutsummaryrefslogtreecommitdiff
path: root/api/audio
diff options
context:
space:
mode:
Diffstat (limited to 'api/audio')
-rw-r--r--api/audio/BUILD.gn22
-rw-r--r--api/audio/audio_frame.cc26
-rw-r--r--api/audio/audio_frame.h14
-rw-r--r--api/audio/audio_frame_processor.h43
-rw-r--r--api/audio/audio_mixer.h6
-rw-r--r--api/audio/channel_layout.cc2
-rw-r--r--api/audio/echo_canceller3_config.cc8
-rw-r--r--api/audio/echo_canceller3_config.h27
-rw-r--r--api/audio/echo_canceller3_config_json.cc132
-rw-r--r--api/audio/echo_canceller3_factory.cc3
-rw-r--r--api/audio/echo_control.h7
-rw-r--r--api/audio/echo_detector_creator.cc4
-rw-r--r--api/audio/test/BUILD.gn1
-rw-r--r--api/audio/test/audio_frame_unittest.cc50
-rw-r--r--api/audio/test/echo_canceller3_config_json_unittest.cc23
15 files changed, 246 insertions, 122 deletions
diff --git a/api/audio/BUILD.gn b/api/audio/BUILD.gn
index 117e5cc0ab..4832751b5f 100644
--- a/api/audio/BUILD.gn
+++ b/api/audio/BUILD.gn
@@ -20,17 +20,25 @@ rtc_library("audio_frame_api") {
deps = [
"..:rtp_packet_info",
"../../rtc_base:checks",
- "../../rtc_base:rtc_base_approved",
+ "../../rtc_base:logging",
+ "../../rtc_base:macromagic",
+ "../../rtc_base:timeutils",
]
}
+rtc_source_set("audio_frame_processor") {
+ visibility = [ "*" ]
+ sources = [ "audio_frame_processor.h" ]
+}
+
rtc_source_set("audio_mixer_api") {
visibility = [ "*" ]
sources = [ "audio_mixer.h" ]
deps = [
":audio_frame_api",
- "../../rtc_base:rtc_base_approved",
+ "..:make_ref_counted",
+ "../../rtc_base:refcount",
]
}
@@ -42,7 +50,6 @@ rtc_library("aec3_config") {
]
deps = [
"../../rtc_base:checks",
- "../../rtc_base:rtc_base_approved",
"../../rtc_base:safe_minmax",
"../../rtc_base/system:rtc_export",
]
@@ -58,8 +65,9 @@ rtc_library("aec3_config_json") {
deps = [
":aec3_config",
"../../rtc_base:checks",
- "../../rtc_base:rtc_base_approved",
+ "../../rtc_base:logging",
"../../rtc_base:rtc_json",
+ "../../rtc_base:stringutils",
"../../rtc_base/system:rtc_export",
]
absl_deps = [ "//third_party/abseil-cpp/absl/strings" ]
@@ -77,7 +85,6 @@ rtc_library("aec3_factory") {
":aec3_config",
":echo_control",
"../../modules/audio_processing/aec3",
- "../../rtc_base:rtc_base_approved",
"../../rtc_base/system:rtc_export",
]
}
@@ -90,14 +97,15 @@ rtc_source_set("echo_control") {
rtc_source_set("echo_detector_creator") {
visibility = [ "*" ]
+ allow_poison = [ "default_echo_detector" ]
sources = [
"echo_detector_creator.cc",
"echo_detector_creator.h",
]
deps = [
+ "..:make_ref_counted",
"../../api:scoped_refptr",
"../../modules/audio_processing:api",
- "../../modules/audio_processing:audio_processing",
- "../../rtc_base:refcount",
+ "../../modules/audio_processing:residual_echo_detector",
]
}
diff --git a/api/audio/audio_frame.cc b/api/audio/audio_frame.cc
index c6e5cf4dd6..3e12006386 100644
--- a/api/audio/audio_frame.cc
+++ b/api/audio/audio_frame.cc
@@ -11,8 +11,6 @@
#include "api/audio/audio_frame.h"
#include <string.h>
-#include <algorithm>
-#include <utility>
#include "rtc_base/checks.h"
#include "rtc_base/time_utils.h"
@@ -24,35 +22,13 @@ AudioFrame::AudioFrame() {
static_assert(sizeof(data_) == kMaxDataSizeBytes, "kMaxDataSizeBytes");
}
-void swap(AudioFrame& a, AudioFrame& b) {
- using std::swap;
- swap(a.timestamp_, b.timestamp_);
- swap(a.elapsed_time_ms_, b.elapsed_time_ms_);
- swap(a.ntp_time_ms_, b.ntp_time_ms_);
- swap(a.samples_per_channel_, b.samples_per_channel_);
- swap(a.sample_rate_hz_, b.sample_rate_hz_);
- swap(a.num_channels_, b.num_channels_);
- swap(a.channel_layout_, b.channel_layout_);
- swap(a.speech_type_, b.speech_type_);
- swap(a.vad_activity_, b.vad_activity_);
- swap(a.profile_timestamp_ms_, b.profile_timestamp_ms_);
- swap(a.packet_infos_, b.packet_infos_);
- const size_t length_a = a.samples_per_channel_ * a.num_channels_;
- const size_t length_b = b.samples_per_channel_ * b.num_channels_;
- RTC_DCHECK_LE(length_a, AudioFrame::kMaxDataSizeSamples);
- RTC_DCHECK_LE(length_b, AudioFrame::kMaxDataSizeSamples);
- std::swap_ranges(a.data_, a.data_ + std::max(length_a, length_b), b.data_);
- swap(a.muted_, b.muted_);
- swap(a.absolute_capture_timestamp_ms_, b.absolute_capture_timestamp_ms_);
-}
-
void AudioFrame::Reset() {
ResetWithoutMuting();
muted_ = true;
}
void AudioFrame::ResetWithoutMuting() {
- // TODO(wu): Zero is a valid value for |timestamp_|. We should initialize
+ // TODO(wu): Zero is a valid value for `timestamp_`. We should initialize
// to an invalid value, or add a new member to indicate invalidity.
timestamp_ = 0;
elapsed_time_ms_ = -1;
diff --git a/api/audio/audio_frame.h b/api/audio/audio_frame.h
index 78539f57eb..d5dcb5f788 100644
--- a/api/audio/audio_frame.h
+++ b/api/audio/audio_frame.h
@@ -14,11 +14,8 @@
#include <stddef.h>
#include <stdint.h>
-#include <utility>
-
#include "api/audio/channel_layout.h"
#include "api/rtp_packet_infos.h"
-#include "rtc_base/constructor_magic.h"
namespace webrtc {
@@ -60,7 +57,8 @@ class AudioFrame {
AudioFrame();
- friend void swap(AudioFrame& a, AudioFrame& b);
+ AudioFrame(const AudioFrame&) = delete;
+ AudioFrame& operator=(const AudioFrame&) = delete;
// Resets all members to their default state.
void Reset();
@@ -139,7 +137,7 @@ class AudioFrame {
int64_t profile_timestamp_ms_ = 0;
// Information about packets used to assemble this audio frame. This is needed
- // by |SourceTracker| when the frame is delivered to the RTCRtpReceiver's
+ // by `SourceTracker` when the frame is delivered to the RTCRtpReceiver's
// MediaStreamTrack, in order to implement getContributingSources(). See:
// https://w3c.github.io/webrtc-pc/#dom-rtcrtpreceiver-getcontributingsources
//
@@ -149,7 +147,7 @@ class AudioFrame {
// sync buffer is the small sample-holding buffer located after the audio
// decoder and before where samples are assembled into output frames.
//
- // |RtpPacketInfos| may also be empty if the audio samples did not come from
+ // `RtpPacketInfos` may also be empty if the audio samples did not come from
// RTP packets. E.g. if the audio were locally generated by packet loss
// concealment, comfort noise generation, etc.
RtpPacketInfos packet_infos_;
@@ -165,11 +163,9 @@ class AudioFrame {
// Absolute capture timestamp when this audio frame was originally captured.
// This is only valid for audio frames captured on this machine. The absolute
- // capture timestamp of a received frame is found in |packet_infos_|.
+ // capture timestamp of a received frame is found in `packet_infos_`.
// This timestamp MUST be based on the same clock as rtc::TimeMillis().
absl::optional<int64_t> absolute_capture_timestamp_ms_;
-
- RTC_DISALLOW_COPY_AND_ASSIGN(AudioFrame);
};
} // namespace webrtc
diff --git a/api/audio/audio_frame_processor.h b/api/audio/audio_frame_processor.h
new file mode 100644
index 0000000000..cb65c4817e
--- /dev/null
+++ b/api/audio/audio_frame_processor.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef API_AUDIO_AUDIO_FRAME_PROCESSOR_H_
+#define API_AUDIO_AUDIO_FRAME_PROCESSOR_H_
+
+#include <functional>
+#include <memory>
+
+namespace webrtc {
+
+class AudioFrame;
+
+// If passed into PeerConnectionFactory, will be used for additional
+// processing of captured audio frames, performed before encoding.
+// Implementations must be thread-safe.
+class AudioFrameProcessor {
+ public:
+ using OnAudioFrameCallback = std::function<void(std::unique_ptr<AudioFrame>)>;
+ virtual ~AudioFrameProcessor() = default;
+
+ // Processes the frame received from WebRTC, is called by WebRTC off the
+ // realtime audio capturing path. AudioFrameProcessor must reply with
+ // processed frames by calling `sink_callback` if it was provided in SetSink()
+ // call. `sink_callback` can be called in the context of Process().
+ virtual void Process(std::unique_ptr<AudioFrame> frame) = 0;
+
+ // Atomically replaces the current sink with the new one. Before the
+ // first call to this function, or if the provided `sink_callback` is nullptr,
+ // processed frames are simply discarded.
+ virtual void SetSink(OnAudioFrameCallback sink_callback) = 0;
+};
+
+} // namespace webrtc
+
+#endif // API_AUDIO_AUDIO_FRAME_PROCESSOR_H_
diff --git a/api/audio/audio_mixer.h b/api/audio/audio_mixer.h
index b290cfacf0..3483df22bc 100644
--- a/api/audio/audio_mixer.h
+++ b/api/audio/audio_mixer.h
@@ -35,9 +35,9 @@ class AudioMixer : public rtc::RefCountInterface {
kError, // The audio_frame will not be used.
};
- // Overwrites |audio_frame|. The data_ field is overwritten with
+ // Overwrites `audio_frame`. The data_ field is overwritten with
// 10 ms of new audio (either 1 or 2 interleaved channels) at
- // |sample_rate_hz|. All fields in |audio_frame| must be updated.
+ // `sample_rate_hz`. All fields in `audio_frame` must be updated.
virtual AudioFrameInfo GetAudioFrameWithInfo(int sample_rate_hz,
AudioFrame* audio_frame) = 0;
@@ -66,7 +66,7 @@ class AudioMixer : public rtc::RefCountInterface {
// should mix at a rate that doesn't cause quality loss of the
// sources' audio. The mixing rate is one of the rates listed in
// AudioProcessing::NativeRate. All fields in
- // |audio_frame_for_mixing| must be updated.
+ // `audio_frame_for_mixing` must be updated.
virtual void Mix(size_t number_of_channels,
AudioFrame* audio_frame_for_mixing) = 0;
diff --git a/api/audio/channel_layout.cc b/api/audio/channel_layout.cc
index 567f4d9b26..e4ae356fab 100644
--- a/api/audio/channel_layout.cc
+++ b/api/audio/channel_layout.cc
@@ -275,7 +275,7 @@ const char* ChannelLayoutToString(ChannelLayout layout) {
case CHANNEL_LAYOUT_BITSTREAM:
return "BITSTREAM";
}
- RTC_NOTREACHED() << "Invalid channel layout provided: " << layout;
+ RTC_DCHECK_NOTREACHED() << "Invalid channel layout provided: " << layout;
return "";
}
diff --git a/api/audio/echo_canceller3_config.cc b/api/audio/echo_canceller3_config.cc
index aeb809efa9..0224c712b4 100644
--- a/api/audio/echo_canceller3_config.cc
+++ b/api/audio/echo_canceller3_config.cc
@@ -153,6 +153,7 @@ bool EchoCanceller3Config::Validate(EchoCanceller3Config* config) {
res = res & Limit(&c->filter.config_change_duration_blocks, 0, 100000);
res = res & Limit(&c->filter.initial_state_seconds, 0.f, 100.f);
+ res = res & Limit(&c->filter.coarse_reset_hangover_blocks, 0, 250000);
res = res & Limit(&c->erle.min, 1.f, 100000.f);
res = res & Limit(&c->erle.max_l, 1.f, 100000.f);
@@ -165,6 +166,7 @@ bool EchoCanceller3Config::Validate(EchoCanceller3Config* config) {
res = res & Limit(&c->ep_strength.default_gain, 0.f, 1000000.f);
res = res & Limit(&c->ep_strength.default_len, -1.f, 1.f);
+ res = res & Limit(&c->ep_strength.nearend_len, -1.0f, 1.0f);
res =
res & Limit(&c->echo_audibility.low_render_limit, 0.f, 32768.f * 32768.f);
@@ -228,6 +230,12 @@ bool EchoCanceller3Config::Validate(EchoCanceller3Config* config) {
res =
res & Limit(&c->suppressor.nearend_tuning.max_dec_factor_lf, 0.f, 100.f);
+ res = res & Limit(&c->suppressor.last_permanent_lf_smoothing_band, 0, 64);
+ res = res & Limit(&c->suppressor.last_lf_smoothing_band, 0, 64);
+ res = res & Limit(&c->suppressor.last_lf_band, 0, 63);
+ res = res &
+ Limit(&c->suppressor.first_hf_band, c->suppressor.last_lf_band + 1, 64);
+
res = res & Limit(&c->suppressor.dominant_nearend_detection.enr_threshold,
0.f, 1000000.f);
res = res & Limit(&c->suppressor.dominant_nearend_detection.snr_threshold,
diff --git a/api/audio/echo_canceller3_config.h b/api/audio/echo_canceller3_config.h
index a505625538..4b1c7fbc47 100644
--- a/api/audio/echo_canceller3_config.h
+++ b/api/audio/echo_canceller3_config.h
@@ -43,6 +43,7 @@ struct RTC_EXPORT EchoCanceller3Config {
size_t hysteresis_limit_blocks = 1;
size_t fixed_capture_delay_samples = 0;
float delay_estimate_smoothing = 0.7f;
+ float delay_estimate_smoothing_delay_found = 0.7f;
float delay_candidate_detection_threshold = 0.2f;
struct DelaySelectionThresholds {
int initial;
@@ -58,6 +59,7 @@ struct RTC_EXPORT EchoCanceller3Config {
};
AlignmentMixing render_alignment_mixing = {false, true, 10000.f, true};
AlignmentMixing capture_alignment_mixing = {false, true, 10000.f, false};
+ bool detect_pre_echo = true;
} delay;
struct Filter {
@@ -86,9 +88,11 @@ struct RTC_EXPORT EchoCanceller3Config {
size_t config_change_duration_blocks = 250;
float initial_state_seconds = 2.5f;
+ int coarse_reset_hangover_blocks = 25;
bool conservative_initial_phase = false;
bool enable_coarse_filter_output_usage = true;
bool use_linear_filter = true;
+ bool high_pass_filter_echo_reference = false;
bool export_linear_aec_output = false;
} filter;
@@ -105,8 +109,11 @@ struct RTC_EXPORT EchoCanceller3Config {
struct EpStrength {
float default_gain = 1.f;
float default_len = 0.83f;
+ float nearend_len = 0.83f;
bool echo_can_saturate = true;
bool bounded_erl = false;
+ bool erle_onset_compensation_in_dominant_nearend = false;
+ bool use_conservative_tail_frequency_response = true;
} ep_strength;
struct EchoAudibility {
@@ -143,6 +150,7 @@ struct RTC_EXPORT EchoCanceller3Config {
float noise_gate_slope = 0.3f;
size_t render_pre_window_size = 1;
size_t render_post_window_size = 1;
+ bool model_reverb_in_nonlinear_mode = true;
} echo_model;
struct ComfortNoise {
@@ -189,6 +197,12 @@ struct RTC_EXPORT EchoCanceller3Config {
2.0f,
0.25f);
+ bool lf_smoothing_during_initial_phase = true;
+ int last_permanent_lf_smoothing_band = 0;
+ int last_lf_smoothing_band = 5;
+ int last_lf_band = 5;
+ int first_hf_band = 8;
+
struct DominantNearendDetection {
float enr_threshold = .25f;
float enr_exit_threshold = 10.f;
@@ -196,6 +210,7 @@ struct RTC_EXPORT EchoCanceller3Config {
int hold_duration = 50;
int trigger_threshold = 12;
bool use_during_initial_phase = true;
+ bool use_unbounded_echo_spectrum = true;
} dominant_nearend_detection;
struct SubbandNearendDetection {
@@ -215,12 +230,20 @@ struct RTC_EXPORT EchoCanceller3Config {
struct HighBandsSuppression {
float enr_threshold = 1.f;
float max_gain_during_echo = 1.f;
- float anti_howling_activation_threshold = 25.f;
- float anti_howling_gain = 0.01f;
+ float anti_howling_activation_threshold = 400.f;
+ float anti_howling_gain = 1.f;
} high_bands_suppression;
float floor_first_increase = 0.00001f;
+ bool conservative_hf_suppression = false;
} suppressor;
+
+ struct MultiChannel {
+ bool detect_stereo_content = true;
+ float stereo_detection_threshold = 0.0f;
+ int stereo_detection_timeout_threshold_seconds = 300;
+ float stereo_detection_hysteresis_seconds = 2.0f;
+ } multi_channel;
};
} // namespace webrtc
diff --git a/api/audio/echo_canceller3_config_json.cc b/api/audio/echo_canceller3_config_json.cc
index f5c1249674..96e45ffe6d 100644
--- a/api/audio/echo_canceller3_config_json.cc
+++ b/api/audio/echo_canceller3_config_json.cc
@@ -11,6 +11,7 @@
#include <stddef.h>
+#include <memory>
#include <string>
#include <vector>
@@ -156,9 +157,14 @@ void Aec3ConfigFromJsonString(absl::string_view json_string,
*parsing_successful = true;
Json::Value root;
- bool success = Json::Reader().parse(std::string(json_string), root);
+ Json::CharReaderBuilder builder;
+ std::string error_message;
+ std::unique_ptr<Json::CharReader> reader(builder.newCharReader());
+ bool success =
+ reader->parse(json_string.data(), json_string.data() + json_string.size(),
+ &root, &error_message);
if (!success) {
- RTC_LOG(LS_ERROR) << "Incorrect JSON format: " << json_string;
+ RTC_LOG(LS_ERROR) << "Incorrect JSON format: " << error_message;
*parsing_successful = false;
return;
}
@@ -191,6 +197,8 @@ void Aec3ConfigFromJsonString(absl::string_view json_string,
&cfg.delay.fixed_capture_delay_samples);
ReadParam(section, "delay_estimate_smoothing",
&cfg.delay.delay_estimate_smoothing);
+ ReadParam(section, "delay_estimate_smoothing_delay_found",
+ &cfg.delay.delay_estimate_smoothing_delay_found);
ReadParam(section, "delay_candidate_detection_threshold",
&cfg.delay.delay_candidate_detection_threshold);
@@ -212,6 +220,7 @@ void Aec3ConfigFromJsonString(absl::string_view json_string,
&cfg.delay.render_alignment_mixing);
ReadParam(section, "capture_alignment_mixing",
&cfg.delay.capture_alignment_mixing);
+ ReadParam(section, "detect_pre_echo", &cfg.delay.detect_pre_echo);
}
if (rtc::GetValueFromJsonObject(aec3_root, "filter", &section)) {
@@ -223,11 +232,15 @@ void Aec3ConfigFromJsonString(absl::string_view json_string,
&cfg.filter.config_change_duration_blocks);
ReadParam(section, "initial_state_seconds",
&cfg.filter.initial_state_seconds);
+ ReadParam(section, "coarse_reset_hangover_blocks",
+ &cfg.filter.coarse_reset_hangover_blocks);
ReadParam(section, "conservative_initial_phase",
&cfg.filter.conservative_initial_phase);
ReadParam(section, "enable_coarse_filter_output_usage",
&cfg.filter.enable_coarse_filter_output_usage);
ReadParam(section, "use_linear_filter", &cfg.filter.use_linear_filter);
+ ReadParam(section, "high_pass_filter_echo_reference",
+ &cfg.filter.high_pass_filter_echo_reference);
ReadParam(section, "export_linear_aec_output",
&cfg.filter.export_linear_aec_output);
}
@@ -247,8 +260,13 @@ void Aec3ConfigFromJsonString(absl::string_view json_string,
if (rtc::GetValueFromJsonObject(aec3_root, "ep_strength", &section)) {
ReadParam(section, "default_gain", &cfg.ep_strength.default_gain);
ReadParam(section, "default_len", &cfg.ep_strength.default_len);
+ ReadParam(section, "nearend_len", &cfg.ep_strength.nearend_len);
ReadParam(section, "echo_can_saturate", &cfg.ep_strength.echo_can_saturate);
ReadParam(section, "bounded_erl", &cfg.ep_strength.bounded_erl);
+ ReadParam(section, "erle_onset_compensation_in_dominant_nearend",
+ &cfg.ep_strength.erle_onset_compensation_in_dominant_nearend);
+ ReadParam(section, "use_conservative_tail_frequency_response",
+ &cfg.ep_strength.use_conservative_tail_frequency_response);
}
if (rtc::GetValueFromJsonObject(aec3_root, "echo_audibility", &section)) {
@@ -302,6 +320,8 @@ void Aec3ConfigFromJsonString(absl::string_view json_string,
&cfg.echo_model.render_pre_window_size);
ReadParam(section, "render_post_window_size",
&cfg.echo_model.render_post_window_size);
+ ReadParam(section, "model_reverb_in_nonlinear_mode",
+ &cfg.echo_model.model_reverb_in_nonlinear_mode);
}
if (rtc::GetValueFromJsonObject(aec3_root, "comfort_noise", &section)) {
@@ -331,6 +351,15 @@ void Aec3ConfigFromJsonString(absl::string_view json_string,
&cfg.suppressor.nearend_tuning.max_dec_factor_lf);
}
+ ReadParam(section, "lf_smoothing_during_initial_phase",
+ &cfg.suppressor.lf_smoothing_during_initial_phase);
+ ReadParam(section, "last_permanent_lf_smoothing_band",
+ &cfg.suppressor.last_permanent_lf_smoothing_band);
+ ReadParam(section, "last_lf_smoothing_band",
+ &cfg.suppressor.last_lf_smoothing_band);
+ ReadParam(section, "last_lf_band", &cfg.suppressor.last_lf_band);
+ ReadParam(section, "first_hf_band", &cfg.suppressor.first_hf_band);
+
if (rtc::GetValueFromJsonObject(section, "dominant_nearend_detection",
&subsection)) {
ReadParam(subsection, "enr_threshold",
@@ -346,6 +375,9 @@ void Aec3ConfigFromJsonString(absl::string_view json_string,
ReadParam(
subsection, "use_during_initial_phase",
&cfg.suppressor.dominant_nearend_detection.use_during_initial_phase);
+ ReadParam(subsection, "use_unbounded_echo_spectrum",
+ &cfg.suppressor.dominant_nearend_detection
+ .use_unbounded_echo_spectrum);
}
if (rtc::GetValueFromJsonObject(section, "subband_nearend_detection",
@@ -381,6 +413,19 @@ void Aec3ConfigFromJsonString(absl::string_view json_string,
ReadParam(section, "floor_first_increase",
&cfg.suppressor.floor_first_increase);
+ ReadParam(section, "conservative_hf_suppression",
+ &cfg.suppressor.conservative_hf_suppression);
+ }
+
+ if (rtc::GetValueFromJsonObject(aec3_root, "multi_channel", &section)) {
+ ReadParam(section, "detect_stereo_content",
+ &cfg.multi_channel.detect_stereo_content);
+ ReadParam(section, "stereo_detection_threshold",
+ &cfg.multi_channel.stereo_detection_threshold);
+ ReadParam(section, "stereo_detection_timeout_threshold_seconds",
+ &cfg.multi_channel.stereo_detection_timeout_threshold_seconds);
+ ReadParam(section, "stereo_detection_hysteresis_seconds",
+ &cfg.multi_channel.stereo_detection_hysteresis_seconds);
}
}
@@ -415,6 +460,8 @@ std::string Aec3ConfigToJsonString(const EchoCanceller3Config& config) {
<< config.delay.fixed_capture_delay_samples << ",";
ost << "\"delay_estimate_smoothing\": "
<< config.delay.delay_estimate_smoothing << ",";
+ ost << "\"delay_estimate_smoothing_delay_found\": "
+ << config.delay.delay_estimate_smoothing_delay_found << ",";
ost << "\"delay_candidate_detection_threshold\": "
<< config.delay.delay_candidate_detection_threshold << ",";
@@ -459,7 +506,9 @@ std::string Aec3ConfigToJsonString(const EchoCanceller3Config& config) {
<< (config.delay.capture_alignment_mixing.prefer_first_two_channels
? "true"
: "false");
- ost << "}";
+ ost << "},";
+ ost << "\"detect_pre_echo\": "
+ << (config.delay.detect_pre_echo ? "true" : "false");
ost << "},";
ost << "\"filter\": {";
@@ -498,6 +547,8 @@ std::string Aec3ConfigToJsonString(const EchoCanceller3Config& config) {
<< config.filter.config_change_duration_blocks << ",";
ost << "\"initial_state_seconds\": " << config.filter.initial_state_seconds
<< ",";
+ ost << "\"coarse_reset_hangover_blocks\": "
+ << config.filter.coarse_reset_hangover_blocks << ",";
ost << "\"conservative_initial_phase\": "
<< (config.filter.conservative_initial_phase ? "true" : "false") << ",";
ost << "\"enable_coarse_filter_output_usage\": "
@@ -505,6 +556,9 @@ std::string Aec3ConfigToJsonString(const EchoCanceller3Config& config) {
<< ",";
ost << "\"use_linear_filter\": "
<< (config.filter.use_linear_filter ? "true" : "false") << ",";
+ ost << "\"high_pass_filter_echo_reference\": "
+ << (config.filter.high_pass_filter_echo_reference ? "true" : "false")
+ << ",";
ost << "\"export_linear_aec_output\": "
<< (config.filter.export_linear_aec_output ? "true" : "false");
@@ -526,11 +580,20 @@ std::string Aec3ConfigToJsonString(const EchoCanceller3Config& config) {
ost << "\"ep_strength\": {";
ost << "\"default_gain\": " << config.ep_strength.default_gain << ",";
ost << "\"default_len\": " << config.ep_strength.default_len << ",";
+ ost << "\"nearend_len\": " << config.ep_strength.nearend_len << ",";
ost << "\"echo_can_saturate\": "
<< (config.ep_strength.echo_can_saturate ? "true" : "false") << ",";
ost << "\"bounded_erl\": "
- << (config.ep_strength.bounded_erl ? "true" : "false");
-
+ << (config.ep_strength.bounded_erl ? "true" : "false") << ",";
+ ost << "\"erle_onset_compensation_in_dominant_nearend\": "
+ << (config.ep_strength.erle_onset_compensation_in_dominant_nearend
+ ? "true"
+ : "false")
+ << ",";
+ ost << "\"use_conservative_tail_frequency_response\": "
+ << (config.ep_strength.use_conservative_tail_frequency_response
+ ? "true"
+ : "false");
ost << "},";
ost << "\"echo_audibility\": {";
@@ -585,7 +648,9 @@ std::string Aec3ConfigToJsonString(const EchoCanceller3Config& config) {
ost << "\"render_pre_window_size\": "
<< config.echo_model.render_pre_window_size << ",";
ost << "\"render_post_window_size\": "
- << config.echo_model.render_post_window_size;
+ << config.echo_model.render_post_window_size << ",";
+ ost << "\"model_reverb_in_nonlinear_mode\": "
+ << (config.echo_model.model_reverb_in_nonlinear_mode ? "true" : "false");
ost << "},";
ost << "\"comfort_noise\": {";
@@ -627,20 +692,30 @@ std::string Aec3ConfigToJsonString(const EchoCanceller3Config& config) {
ost << "\"max_dec_factor_lf\": "
<< config.suppressor.nearend_tuning.max_dec_factor_lf;
ost << "},";
- ost << "\"dominant_nearend_detection\": {";
- ost << "\"enr_threshold\": "
- << config.suppressor.dominant_nearend_detection.enr_threshold << ",";
- ost << "\"enr_exit_threshold\": "
- << config.suppressor.dominant_nearend_detection.enr_exit_threshold << ",";
- ost << "\"snr_threshold\": "
- << config.suppressor.dominant_nearend_detection.snr_threshold << ",";
- ost << "\"hold_duration\": "
- << config.suppressor.dominant_nearend_detection.hold_duration << ",";
- ost << "\"trigger_threshold\": "
- << config.suppressor.dominant_nearend_detection.trigger_threshold << ",";
- ost << "\"use_during_initial_phase\": "
- << config.suppressor.dominant_nearend_detection.use_during_initial_phase;
- ost << "},";
+ ost << "\"lf_smoothing_during_initial_phase\": "
+ << (config.suppressor.lf_smoothing_during_initial_phase ? "true"
+ : "false")
+ << ",";
+ ost << "\"last_permanent_lf_smoothing_band\": "
+ << config.suppressor.last_permanent_lf_smoothing_band << ",";
+ ost << "\"last_lf_smoothing_band\": "
+ << config.suppressor.last_lf_smoothing_band << ",";
+ ost << "\"last_lf_band\": " << config.suppressor.last_lf_band << ",";
+ ost << "\"first_hf_band\": " << config.suppressor.first_hf_band << ",";
+ {
+ const auto& dnd = config.suppressor.dominant_nearend_detection;
+ ost << "\"dominant_nearend_detection\": {";
+ ost << "\"enr_threshold\": " << dnd.enr_threshold << ",";
+ ost << "\"enr_exit_threshold\": " << dnd.enr_exit_threshold << ",";
+ ost << "\"snr_threshold\": " << dnd.snr_threshold << ",";
+ ost << "\"hold_duration\": " << dnd.hold_duration << ",";
+ ost << "\"trigger_threshold\": " << dnd.trigger_threshold << ",";
+ ost << "\"use_during_initial_phase\": " << dnd.use_during_initial_phase
+ << ",";
+ ost << "\"use_unbounded_echo_spectrum\": "
+ << dnd.use_unbounded_echo_spectrum;
+ ost << "},";
+ }
ost << "\"subband_nearend_detection\": {";
ost << "\"nearend_average_blocks\": "
<< config.suppressor.subband_nearend_detection.nearend_average_blocks
@@ -672,8 +747,23 @@ std::string Aec3ConfigToJsonString(const EchoCanceller3Config& config) {
ost << "\"anti_howling_gain\": "
<< config.suppressor.high_bands_suppression.anti_howling_gain;
ost << "},";
- ost << "\"floor_first_increase\": " << config.suppressor.floor_first_increase;
+ ost << "\"floor_first_increase\": " << config.suppressor.floor_first_increase
+ << ",";
+ ost << "\"conservative_hf_suppression\": "
+ << config.suppressor.conservative_hf_suppression;
+ ost << "},";
+
+ ost << "\"multi_channel\": {";
+ ost << "\"detect_stereo_content\": "
+ << (config.multi_channel.detect_stereo_content ? "true" : "false") << ",";
+ ost << "\"stereo_detection_threshold\": "
+ << config.multi_channel.stereo_detection_threshold << ",";
+ ost << "\"stereo_detection_timeout_threshold_seconds\": "
+ << config.multi_channel.stereo_detection_timeout_threshold_seconds << ",";
+ ost << "\"stereo_detection_hysteresis_seconds\": "
+ << config.multi_channel.stereo_detection_hysteresis_seconds;
ost << "}";
+
ost << "}";
ost << "}";
diff --git a/api/audio/echo_canceller3_factory.cc b/api/audio/echo_canceller3_factory.cc
index d65a7262fa..284b117bea 100644
--- a/api/audio/echo_canceller3_factory.cc
+++ b/api/audio/echo_canceller3_factory.cc
@@ -25,7 +25,8 @@ std::unique_ptr<EchoControl> EchoCanceller3Factory::Create(
int num_render_channels,
int num_capture_channels) {
return std::make_unique<EchoCanceller3>(
- config_, sample_rate_hz, num_render_channels, num_capture_channels);
+ config_, /*multichannel_config=*/absl::nullopt, sample_rate_hz,
+ num_render_channels, num_capture_channels);
}
} // namespace webrtc
diff --git a/api/audio/echo_control.h b/api/audio/echo_control.h
index 8d567bf2b8..74fbc27b12 100644
--- a/api/audio/echo_control.h
+++ b/api/audio/echo_control.h
@@ -48,6 +48,13 @@ class EchoControl {
// Provides an optional external estimate of the audio buffer delay.
virtual void SetAudioBufferDelay(int delay_ms) = 0;
+ // Specifies whether the capture output will be used. The purpose of this is
+ // to allow the echo controller to deactivate some of the processing when the
+ // resulting output is anyway not used, for instance when the endpoint is
+ // muted.
+ // TODO(b/177830919): Make pure virtual.
+ virtual void SetCaptureOutputUsage(bool capture_output_used) {}
+
// Returns wheter the signal is altered.
virtual bool ActiveProcessing() const = 0;
diff --git a/api/audio/echo_detector_creator.cc b/api/audio/echo_detector_creator.cc
index 4c3d9e61fe..15b7c51dca 100644
--- a/api/audio/echo_detector_creator.cc
+++ b/api/audio/echo_detector_creator.cc
@@ -9,13 +9,13 @@
*/
#include "api/audio/echo_detector_creator.h"
+#include "api/make_ref_counted.h"
#include "modules/audio_processing/residual_echo_detector.h"
-#include "rtc_base/ref_counted_object.h"
namespace webrtc {
rtc::scoped_refptr<EchoDetector> CreateEchoDetector() {
- return new rtc::RefCountedObject<ResidualEchoDetector>();
+ return rtc::make_ref_counted<ResidualEchoDetector>();
}
} // namespace webrtc
diff --git a/api/audio/test/BUILD.gn b/api/audio/test/BUILD.gn
index d62baf15b7..dfe8c32f80 100644
--- a/api/audio/test/BUILD.gn
+++ b/api/audio/test/BUILD.gn
@@ -24,7 +24,6 @@ if (rtc_include_tests) {
"..:aec3_config",
"..:aec3_config_json",
"..:audio_frame_api",
- "../../../rtc_base:rtc_base_approved",
"../../../test:test_support",
]
}
diff --git a/api/audio/test/audio_frame_unittest.cc b/api/audio/test/audio_frame_unittest.cc
index f8d3318274..dbf45ceabc 100644
--- a/api/audio/test/audio_frame_unittest.cc
+++ b/api/audio/test/audio_frame_unittest.cc
@@ -133,54 +133,4 @@ TEST(AudioFrameTest, CopyFrom) {
EXPECT_EQ(0, memcmp(frame2.data(), frame1.data(), sizeof(samples)));
}
-TEST(AudioFrameTest, SwapFrames) {
- AudioFrame frame1, frame2;
- int16_t samples1[kNumChannelsMono * kSamplesPerChannel];
- for (size_t i = 0; i < kNumChannelsMono * kSamplesPerChannel; ++i) {
- samples1[i] = i;
- }
- frame1.UpdateFrame(kTimestamp, samples1, kSamplesPerChannel, kSampleRateHz,
- AudioFrame::kPLC, AudioFrame::kVadActive,
- kNumChannelsMono);
- frame1.set_absolute_capture_timestamp_ms(12345678);
- const auto frame1_channel_layout = frame1.channel_layout();
-
- int16_t samples2[(kNumChannelsMono + 1) * (kSamplesPerChannel + 1)];
- for (size_t i = 0; i < (kNumChannelsMono + 1) * (kSamplesPerChannel + 1);
- ++i) {
- samples2[i] = 1000 + i;
- }
- frame2.UpdateFrame(kTimestamp + 1, samples2, kSamplesPerChannel + 1,
- kSampleRateHz + 1, AudioFrame::kNormalSpeech,
- AudioFrame::kVadPassive, kNumChannelsMono + 1);
- const auto frame2_channel_layout = frame2.channel_layout();
-
- swap(frame1, frame2);
-
- EXPECT_EQ(kTimestamp + 1, frame1.timestamp_);
- ASSERT_EQ(kSamplesPerChannel + 1, frame1.samples_per_channel_);
- EXPECT_EQ(kSampleRateHz + 1, frame1.sample_rate_hz_);
- EXPECT_EQ(AudioFrame::kNormalSpeech, frame1.speech_type_);
- EXPECT_EQ(AudioFrame::kVadPassive, frame1.vad_activity_);
- ASSERT_EQ(kNumChannelsMono + 1, frame1.num_channels_);
- for (size_t i = 0; i < (kNumChannelsMono + 1) * (kSamplesPerChannel + 1);
- ++i) {
- EXPECT_EQ(samples2[i], frame1.data()[i]);
- }
- EXPECT_FALSE(frame1.absolute_capture_timestamp_ms());
- EXPECT_EQ(frame2_channel_layout, frame1.channel_layout());
-
- EXPECT_EQ(kTimestamp, frame2.timestamp_);
- ASSERT_EQ(kSamplesPerChannel, frame2.samples_per_channel_);
- EXPECT_EQ(kSampleRateHz, frame2.sample_rate_hz_);
- EXPECT_EQ(AudioFrame::kPLC, frame2.speech_type_);
- EXPECT_EQ(AudioFrame::kVadActive, frame2.vad_activity_);
- ASSERT_EQ(kNumChannelsMono, frame2.num_channels_);
- for (size_t i = 0; i < kNumChannelsMono * kSamplesPerChannel; ++i) {
- EXPECT_EQ(samples1[i], frame2.data()[i]);
- }
- EXPECT_EQ(12345678, frame2.absolute_capture_timestamp_ms());
- EXPECT_EQ(frame1_channel_layout, frame2.channel_layout());
-}
-
} // namespace webrtc
diff --git a/api/audio/test/echo_canceller3_config_json_unittest.cc b/api/audio/test/echo_canceller3_config_json_unittest.cc
index a149c17a76..4146dda9fe 100644
--- a/api/audio/test/echo_canceller3_config_json_unittest.cc
+++ b/api/audio/test/echo_canceller3_config_json_unittest.cc
@@ -21,19 +21,29 @@ TEST(EchoCanceller3JsonHelpers, ToStringAndParseJson) {
cfg.delay.log_warning_on_delay_changes = true;
cfg.filter.refined.error_floor = 2.f;
cfg.filter.coarse_initial.length_blocks = 3u;
+ cfg.filter.high_pass_filter_echo_reference =
+ !cfg.filter.high_pass_filter_echo_reference;
cfg.comfort_noise.noise_floor_dbfs = 100.f;
+ cfg.echo_model.model_reverb_in_nonlinear_mode = false;
cfg.suppressor.normal_tuning.mask_hf.enr_suppress = .5f;
cfg.suppressor.subband_nearend_detection.nearend_average_blocks = 3;
cfg.suppressor.subband_nearend_detection.subband1 = {1, 3};
cfg.suppressor.subband_nearend_detection.subband1 = {4, 5};
cfg.suppressor.subband_nearend_detection.nearend_threshold = 2.f;
cfg.suppressor.subband_nearend_detection.snr_threshold = 100.f;
+ cfg.multi_channel.detect_stereo_content =
+ !cfg.multi_channel.detect_stereo_content;
+ cfg.multi_channel.stereo_detection_threshold += 1.0f;
+ cfg.multi_channel.stereo_detection_timeout_threshold_seconds += 1;
+ cfg.multi_channel.stereo_detection_hysteresis_seconds += 1;
std::string json_string = Aec3ConfigToJsonString(cfg);
EchoCanceller3Config cfg_transformed = Aec3ConfigFromJsonString(json_string);
// Expect unchanged values to remain default.
EXPECT_EQ(cfg.ep_strength.default_len,
cfg_transformed.ep_strength.default_len);
+ EXPECT_EQ(cfg.ep_strength.nearend_len,
+ cfg_transformed.ep_strength.nearend_len);
EXPECT_EQ(cfg.suppressor.normal_tuning.mask_lf.enr_suppress,
cfg_transformed.suppressor.normal_tuning.mask_lf.enr_suppress);
@@ -46,8 +56,12 @@ TEST(EchoCanceller3JsonHelpers, ToStringAndParseJson) {
cfg_transformed.filter.coarse_initial.length_blocks);
EXPECT_EQ(cfg.filter.refined.error_floor,
cfg_transformed.filter.refined.error_floor);
+ EXPECT_EQ(cfg.filter.high_pass_filter_echo_reference,
+ cfg_transformed.filter.high_pass_filter_echo_reference);
EXPECT_EQ(cfg.comfort_noise.noise_floor_dbfs,
cfg_transformed.comfort_noise.noise_floor_dbfs);
+ EXPECT_EQ(cfg.echo_model.model_reverb_in_nonlinear_mode,
+ cfg_transformed.echo_model.model_reverb_in_nonlinear_mode);
EXPECT_EQ(cfg.suppressor.normal_tuning.mask_hf.enr_suppress,
cfg_transformed.suppressor.normal_tuning.mask_hf.enr_suppress);
EXPECT_EQ(cfg.suppressor.subband_nearend_detection.nearend_average_blocks,
@@ -66,5 +80,14 @@ TEST(EchoCanceller3JsonHelpers, ToStringAndParseJson) {
cfg_transformed.suppressor.subband_nearend_detection.nearend_threshold);
EXPECT_EQ(cfg.suppressor.subband_nearend_detection.snr_threshold,
cfg_transformed.suppressor.subband_nearend_detection.snr_threshold);
+ EXPECT_EQ(cfg.multi_channel.detect_stereo_content,
+ cfg_transformed.multi_channel.detect_stereo_content);
+ EXPECT_EQ(cfg.multi_channel.stereo_detection_threshold,
+ cfg_transformed.multi_channel.stereo_detection_threshold);
+ EXPECT_EQ(
+ cfg.multi_channel.stereo_detection_timeout_threshold_seconds,
+ cfg_transformed.multi_channel.stereo_detection_timeout_threshold_seconds);
+ EXPECT_EQ(cfg.multi_channel.stereo_detection_hysteresis_seconds,
+ cfg_transformed.multi_channel.stereo_detection_hysteresis_seconds);
}
} // namespace webrtc