diff options
Diffstat (limited to 'api/audio')
-rw-r--r-- | api/audio/BUILD.gn | 22 | ||||
-rw-r--r-- | api/audio/audio_frame.cc | 26 | ||||
-rw-r--r-- | api/audio/audio_frame.h | 14 | ||||
-rw-r--r-- | api/audio/audio_frame_processor.h | 43 | ||||
-rw-r--r-- | api/audio/audio_mixer.h | 6 | ||||
-rw-r--r-- | api/audio/channel_layout.cc | 2 | ||||
-rw-r--r-- | api/audio/echo_canceller3_config.cc | 8 | ||||
-rw-r--r-- | api/audio/echo_canceller3_config.h | 27 | ||||
-rw-r--r-- | api/audio/echo_canceller3_config_json.cc | 132 | ||||
-rw-r--r-- | api/audio/echo_canceller3_factory.cc | 3 | ||||
-rw-r--r-- | api/audio/echo_control.h | 7 | ||||
-rw-r--r-- | api/audio/echo_detector_creator.cc | 4 | ||||
-rw-r--r-- | api/audio/test/BUILD.gn | 1 | ||||
-rw-r--r-- | api/audio/test/audio_frame_unittest.cc | 50 | ||||
-rw-r--r-- | api/audio/test/echo_canceller3_config_json_unittest.cc | 23 |
15 files changed, 246 insertions, 122 deletions
diff --git a/api/audio/BUILD.gn b/api/audio/BUILD.gn index 117e5cc0ab..4832751b5f 100644 --- a/api/audio/BUILD.gn +++ b/api/audio/BUILD.gn @@ -20,17 +20,25 @@ rtc_library("audio_frame_api") { deps = [ "..:rtp_packet_info", "../../rtc_base:checks", - "../../rtc_base:rtc_base_approved", + "../../rtc_base:logging", + "../../rtc_base:macromagic", + "../../rtc_base:timeutils", ] } +rtc_source_set("audio_frame_processor") { + visibility = [ "*" ] + sources = [ "audio_frame_processor.h" ] +} + rtc_source_set("audio_mixer_api") { visibility = [ "*" ] sources = [ "audio_mixer.h" ] deps = [ ":audio_frame_api", - "../../rtc_base:rtc_base_approved", + "..:make_ref_counted", + "../../rtc_base:refcount", ] } @@ -42,7 +50,6 @@ rtc_library("aec3_config") { ] deps = [ "../../rtc_base:checks", - "../../rtc_base:rtc_base_approved", "../../rtc_base:safe_minmax", "../../rtc_base/system:rtc_export", ] @@ -58,8 +65,9 @@ rtc_library("aec3_config_json") { deps = [ ":aec3_config", "../../rtc_base:checks", - "../../rtc_base:rtc_base_approved", + "../../rtc_base:logging", "../../rtc_base:rtc_json", + "../../rtc_base:stringutils", "../../rtc_base/system:rtc_export", ] absl_deps = [ "//third_party/abseil-cpp/absl/strings" ] @@ -77,7 +85,6 @@ rtc_library("aec3_factory") { ":aec3_config", ":echo_control", "../../modules/audio_processing/aec3", - "../../rtc_base:rtc_base_approved", "../../rtc_base/system:rtc_export", ] } @@ -90,14 +97,15 @@ rtc_source_set("echo_control") { rtc_source_set("echo_detector_creator") { visibility = [ "*" ] + allow_poison = [ "default_echo_detector" ] sources = [ "echo_detector_creator.cc", "echo_detector_creator.h", ] deps = [ + "..:make_ref_counted", "../../api:scoped_refptr", "../../modules/audio_processing:api", - "../../modules/audio_processing:audio_processing", - "../../rtc_base:refcount", + "../../modules/audio_processing:residual_echo_detector", ] } diff --git a/api/audio/audio_frame.cc b/api/audio/audio_frame.cc index c6e5cf4dd6..3e12006386 100644 --- a/api/audio/audio_frame.cc +++ b/api/audio/audio_frame.cc @@ -11,8 +11,6 @@ #include "api/audio/audio_frame.h" #include <string.h> -#include <algorithm> -#include <utility> #include "rtc_base/checks.h" #include "rtc_base/time_utils.h" @@ -24,35 +22,13 @@ AudioFrame::AudioFrame() { static_assert(sizeof(data_) == kMaxDataSizeBytes, "kMaxDataSizeBytes"); } -void swap(AudioFrame& a, AudioFrame& b) { - using std::swap; - swap(a.timestamp_, b.timestamp_); - swap(a.elapsed_time_ms_, b.elapsed_time_ms_); - swap(a.ntp_time_ms_, b.ntp_time_ms_); - swap(a.samples_per_channel_, b.samples_per_channel_); - swap(a.sample_rate_hz_, b.sample_rate_hz_); - swap(a.num_channels_, b.num_channels_); - swap(a.channel_layout_, b.channel_layout_); - swap(a.speech_type_, b.speech_type_); - swap(a.vad_activity_, b.vad_activity_); - swap(a.profile_timestamp_ms_, b.profile_timestamp_ms_); - swap(a.packet_infos_, b.packet_infos_); - const size_t length_a = a.samples_per_channel_ * a.num_channels_; - const size_t length_b = b.samples_per_channel_ * b.num_channels_; - RTC_DCHECK_LE(length_a, AudioFrame::kMaxDataSizeSamples); - RTC_DCHECK_LE(length_b, AudioFrame::kMaxDataSizeSamples); - std::swap_ranges(a.data_, a.data_ + std::max(length_a, length_b), b.data_); - swap(a.muted_, b.muted_); - swap(a.absolute_capture_timestamp_ms_, b.absolute_capture_timestamp_ms_); -} - void AudioFrame::Reset() { ResetWithoutMuting(); muted_ = true; } void AudioFrame::ResetWithoutMuting() { - // TODO(wu): Zero is a valid value for |timestamp_|. We should initialize + // TODO(wu): Zero is a valid value for `timestamp_`. We should initialize // to an invalid value, or add a new member to indicate invalidity. timestamp_ = 0; elapsed_time_ms_ = -1; diff --git a/api/audio/audio_frame.h b/api/audio/audio_frame.h index 78539f57eb..d5dcb5f788 100644 --- a/api/audio/audio_frame.h +++ b/api/audio/audio_frame.h @@ -14,11 +14,8 @@ #include <stddef.h> #include <stdint.h> -#include <utility> - #include "api/audio/channel_layout.h" #include "api/rtp_packet_infos.h" -#include "rtc_base/constructor_magic.h" namespace webrtc { @@ -60,7 +57,8 @@ class AudioFrame { AudioFrame(); - friend void swap(AudioFrame& a, AudioFrame& b); + AudioFrame(const AudioFrame&) = delete; + AudioFrame& operator=(const AudioFrame&) = delete; // Resets all members to their default state. void Reset(); @@ -139,7 +137,7 @@ class AudioFrame { int64_t profile_timestamp_ms_ = 0; // Information about packets used to assemble this audio frame. This is needed - // by |SourceTracker| when the frame is delivered to the RTCRtpReceiver's + // by `SourceTracker` when the frame is delivered to the RTCRtpReceiver's // MediaStreamTrack, in order to implement getContributingSources(). See: // https://w3c.github.io/webrtc-pc/#dom-rtcrtpreceiver-getcontributingsources // @@ -149,7 +147,7 @@ class AudioFrame { // sync buffer is the small sample-holding buffer located after the audio // decoder and before where samples are assembled into output frames. // - // |RtpPacketInfos| may also be empty if the audio samples did not come from + // `RtpPacketInfos` may also be empty if the audio samples did not come from // RTP packets. E.g. if the audio were locally generated by packet loss // concealment, comfort noise generation, etc. RtpPacketInfos packet_infos_; @@ -165,11 +163,9 @@ class AudioFrame { // Absolute capture timestamp when this audio frame was originally captured. // This is only valid for audio frames captured on this machine. The absolute - // capture timestamp of a received frame is found in |packet_infos_|. + // capture timestamp of a received frame is found in `packet_infos_`. // This timestamp MUST be based on the same clock as rtc::TimeMillis(). absl::optional<int64_t> absolute_capture_timestamp_ms_; - - RTC_DISALLOW_COPY_AND_ASSIGN(AudioFrame); }; } // namespace webrtc diff --git a/api/audio/audio_frame_processor.h b/api/audio/audio_frame_processor.h new file mode 100644 index 0000000000..cb65c4817e --- /dev/null +++ b/api/audio/audio_frame_processor.h @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef API_AUDIO_AUDIO_FRAME_PROCESSOR_H_ +#define API_AUDIO_AUDIO_FRAME_PROCESSOR_H_ + +#include <functional> +#include <memory> + +namespace webrtc { + +class AudioFrame; + +// If passed into PeerConnectionFactory, will be used for additional +// processing of captured audio frames, performed before encoding. +// Implementations must be thread-safe. +class AudioFrameProcessor { + public: + using OnAudioFrameCallback = std::function<void(std::unique_ptr<AudioFrame>)>; + virtual ~AudioFrameProcessor() = default; + + // Processes the frame received from WebRTC, is called by WebRTC off the + // realtime audio capturing path. AudioFrameProcessor must reply with + // processed frames by calling `sink_callback` if it was provided in SetSink() + // call. `sink_callback` can be called in the context of Process(). + virtual void Process(std::unique_ptr<AudioFrame> frame) = 0; + + // Atomically replaces the current sink with the new one. Before the + // first call to this function, or if the provided `sink_callback` is nullptr, + // processed frames are simply discarded. + virtual void SetSink(OnAudioFrameCallback sink_callback) = 0; +}; + +} // namespace webrtc + +#endif // API_AUDIO_AUDIO_FRAME_PROCESSOR_H_ diff --git a/api/audio/audio_mixer.h b/api/audio/audio_mixer.h index b290cfacf0..3483df22bc 100644 --- a/api/audio/audio_mixer.h +++ b/api/audio/audio_mixer.h @@ -35,9 +35,9 @@ class AudioMixer : public rtc::RefCountInterface { kError, // The audio_frame will not be used. }; - // Overwrites |audio_frame|. The data_ field is overwritten with + // Overwrites `audio_frame`. The data_ field is overwritten with // 10 ms of new audio (either 1 or 2 interleaved channels) at - // |sample_rate_hz|. All fields in |audio_frame| must be updated. + // `sample_rate_hz`. All fields in `audio_frame` must be updated. virtual AudioFrameInfo GetAudioFrameWithInfo(int sample_rate_hz, AudioFrame* audio_frame) = 0; @@ -66,7 +66,7 @@ class AudioMixer : public rtc::RefCountInterface { // should mix at a rate that doesn't cause quality loss of the // sources' audio. The mixing rate is one of the rates listed in // AudioProcessing::NativeRate. All fields in - // |audio_frame_for_mixing| must be updated. + // `audio_frame_for_mixing` must be updated. virtual void Mix(size_t number_of_channels, AudioFrame* audio_frame_for_mixing) = 0; diff --git a/api/audio/channel_layout.cc b/api/audio/channel_layout.cc index 567f4d9b26..e4ae356fab 100644 --- a/api/audio/channel_layout.cc +++ b/api/audio/channel_layout.cc @@ -275,7 +275,7 @@ const char* ChannelLayoutToString(ChannelLayout layout) { case CHANNEL_LAYOUT_BITSTREAM: return "BITSTREAM"; } - RTC_NOTREACHED() << "Invalid channel layout provided: " << layout; + RTC_DCHECK_NOTREACHED() << "Invalid channel layout provided: " << layout; return ""; } diff --git a/api/audio/echo_canceller3_config.cc b/api/audio/echo_canceller3_config.cc index aeb809efa9..0224c712b4 100644 --- a/api/audio/echo_canceller3_config.cc +++ b/api/audio/echo_canceller3_config.cc @@ -153,6 +153,7 @@ bool EchoCanceller3Config::Validate(EchoCanceller3Config* config) { res = res & Limit(&c->filter.config_change_duration_blocks, 0, 100000); res = res & Limit(&c->filter.initial_state_seconds, 0.f, 100.f); + res = res & Limit(&c->filter.coarse_reset_hangover_blocks, 0, 250000); res = res & Limit(&c->erle.min, 1.f, 100000.f); res = res & Limit(&c->erle.max_l, 1.f, 100000.f); @@ -165,6 +166,7 @@ bool EchoCanceller3Config::Validate(EchoCanceller3Config* config) { res = res & Limit(&c->ep_strength.default_gain, 0.f, 1000000.f); res = res & Limit(&c->ep_strength.default_len, -1.f, 1.f); + res = res & Limit(&c->ep_strength.nearend_len, -1.0f, 1.0f); res = res & Limit(&c->echo_audibility.low_render_limit, 0.f, 32768.f * 32768.f); @@ -228,6 +230,12 @@ bool EchoCanceller3Config::Validate(EchoCanceller3Config* config) { res = res & Limit(&c->suppressor.nearend_tuning.max_dec_factor_lf, 0.f, 100.f); + res = res & Limit(&c->suppressor.last_permanent_lf_smoothing_band, 0, 64); + res = res & Limit(&c->suppressor.last_lf_smoothing_band, 0, 64); + res = res & Limit(&c->suppressor.last_lf_band, 0, 63); + res = res & + Limit(&c->suppressor.first_hf_band, c->suppressor.last_lf_band + 1, 64); + res = res & Limit(&c->suppressor.dominant_nearend_detection.enr_threshold, 0.f, 1000000.f); res = res & Limit(&c->suppressor.dominant_nearend_detection.snr_threshold, diff --git a/api/audio/echo_canceller3_config.h b/api/audio/echo_canceller3_config.h index a505625538..4b1c7fbc47 100644 --- a/api/audio/echo_canceller3_config.h +++ b/api/audio/echo_canceller3_config.h @@ -43,6 +43,7 @@ struct RTC_EXPORT EchoCanceller3Config { size_t hysteresis_limit_blocks = 1; size_t fixed_capture_delay_samples = 0; float delay_estimate_smoothing = 0.7f; + float delay_estimate_smoothing_delay_found = 0.7f; float delay_candidate_detection_threshold = 0.2f; struct DelaySelectionThresholds { int initial; @@ -58,6 +59,7 @@ struct RTC_EXPORT EchoCanceller3Config { }; AlignmentMixing render_alignment_mixing = {false, true, 10000.f, true}; AlignmentMixing capture_alignment_mixing = {false, true, 10000.f, false}; + bool detect_pre_echo = true; } delay; struct Filter { @@ -86,9 +88,11 @@ struct RTC_EXPORT EchoCanceller3Config { size_t config_change_duration_blocks = 250; float initial_state_seconds = 2.5f; + int coarse_reset_hangover_blocks = 25; bool conservative_initial_phase = false; bool enable_coarse_filter_output_usage = true; bool use_linear_filter = true; + bool high_pass_filter_echo_reference = false; bool export_linear_aec_output = false; } filter; @@ -105,8 +109,11 @@ struct RTC_EXPORT EchoCanceller3Config { struct EpStrength { float default_gain = 1.f; float default_len = 0.83f; + float nearend_len = 0.83f; bool echo_can_saturate = true; bool bounded_erl = false; + bool erle_onset_compensation_in_dominant_nearend = false; + bool use_conservative_tail_frequency_response = true; } ep_strength; struct EchoAudibility { @@ -143,6 +150,7 @@ struct RTC_EXPORT EchoCanceller3Config { float noise_gate_slope = 0.3f; size_t render_pre_window_size = 1; size_t render_post_window_size = 1; + bool model_reverb_in_nonlinear_mode = true; } echo_model; struct ComfortNoise { @@ -189,6 +197,12 @@ struct RTC_EXPORT EchoCanceller3Config { 2.0f, 0.25f); + bool lf_smoothing_during_initial_phase = true; + int last_permanent_lf_smoothing_band = 0; + int last_lf_smoothing_band = 5; + int last_lf_band = 5; + int first_hf_band = 8; + struct DominantNearendDetection { float enr_threshold = .25f; float enr_exit_threshold = 10.f; @@ -196,6 +210,7 @@ struct RTC_EXPORT EchoCanceller3Config { int hold_duration = 50; int trigger_threshold = 12; bool use_during_initial_phase = true; + bool use_unbounded_echo_spectrum = true; } dominant_nearend_detection; struct SubbandNearendDetection { @@ -215,12 +230,20 @@ struct RTC_EXPORT EchoCanceller3Config { struct HighBandsSuppression { float enr_threshold = 1.f; float max_gain_during_echo = 1.f; - float anti_howling_activation_threshold = 25.f; - float anti_howling_gain = 0.01f; + float anti_howling_activation_threshold = 400.f; + float anti_howling_gain = 1.f; } high_bands_suppression; float floor_first_increase = 0.00001f; + bool conservative_hf_suppression = false; } suppressor; + + struct MultiChannel { + bool detect_stereo_content = true; + float stereo_detection_threshold = 0.0f; + int stereo_detection_timeout_threshold_seconds = 300; + float stereo_detection_hysteresis_seconds = 2.0f; + } multi_channel; }; } // namespace webrtc diff --git a/api/audio/echo_canceller3_config_json.cc b/api/audio/echo_canceller3_config_json.cc index f5c1249674..96e45ffe6d 100644 --- a/api/audio/echo_canceller3_config_json.cc +++ b/api/audio/echo_canceller3_config_json.cc @@ -11,6 +11,7 @@ #include <stddef.h> +#include <memory> #include <string> #include <vector> @@ -156,9 +157,14 @@ void Aec3ConfigFromJsonString(absl::string_view json_string, *parsing_successful = true; Json::Value root; - bool success = Json::Reader().parse(std::string(json_string), root); + Json::CharReaderBuilder builder; + std::string error_message; + std::unique_ptr<Json::CharReader> reader(builder.newCharReader()); + bool success = + reader->parse(json_string.data(), json_string.data() + json_string.size(), + &root, &error_message); if (!success) { - RTC_LOG(LS_ERROR) << "Incorrect JSON format: " << json_string; + RTC_LOG(LS_ERROR) << "Incorrect JSON format: " << error_message; *parsing_successful = false; return; } @@ -191,6 +197,8 @@ void Aec3ConfigFromJsonString(absl::string_view json_string, &cfg.delay.fixed_capture_delay_samples); ReadParam(section, "delay_estimate_smoothing", &cfg.delay.delay_estimate_smoothing); + ReadParam(section, "delay_estimate_smoothing_delay_found", + &cfg.delay.delay_estimate_smoothing_delay_found); ReadParam(section, "delay_candidate_detection_threshold", &cfg.delay.delay_candidate_detection_threshold); @@ -212,6 +220,7 @@ void Aec3ConfigFromJsonString(absl::string_view json_string, &cfg.delay.render_alignment_mixing); ReadParam(section, "capture_alignment_mixing", &cfg.delay.capture_alignment_mixing); + ReadParam(section, "detect_pre_echo", &cfg.delay.detect_pre_echo); } if (rtc::GetValueFromJsonObject(aec3_root, "filter", §ion)) { @@ -223,11 +232,15 @@ void Aec3ConfigFromJsonString(absl::string_view json_string, &cfg.filter.config_change_duration_blocks); ReadParam(section, "initial_state_seconds", &cfg.filter.initial_state_seconds); + ReadParam(section, "coarse_reset_hangover_blocks", + &cfg.filter.coarse_reset_hangover_blocks); ReadParam(section, "conservative_initial_phase", &cfg.filter.conservative_initial_phase); ReadParam(section, "enable_coarse_filter_output_usage", &cfg.filter.enable_coarse_filter_output_usage); ReadParam(section, "use_linear_filter", &cfg.filter.use_linear_filter); + ReadParam(section, "high_pass_filter_echo_reference", + &cfg.filter.high_pass_filter_echo_reference); ReadParam(section, "export_linear_aec_output", &cfg.filter.export_linear_aec_output); } @@ -247,8 +260,13 @@ void Aec3ConfigFromJsonString(absl::string_view json_string, if (rtc::GetValueFromJsonObject(aec3_root, "ep_strength", §ion)) { ReadParam(section, "default_gain", &cfg.ep_strength.default_gain); ReadParam(section, "default_len", &cfg.ep_strength.default_len); + ReadParam(section, "nearend_len", &cfg.ep_strength.nearend_len); ReadParam(section, "echo_can_saturate", &cfg.ep_strength.echo_can_saturate); ReadParam(section, "bounded_erl", &cfg.ep_strength.bounded_erl); + ReadParam(section, "erle_onset_compensation_in_dominant_nearend", + &cfg.ep_strength.erle_onset_compensation_in_dominant_nearend); + ReadParam(section, "use_conservative_tail_frequency_response", + &cfg.ep_strength.use_conservative_tail_frequency_response); } if (rtc::GetValueFromJsonObject(aec3_root, "echo_audibility", §ion)) { @@ -302,6 +320,8 @@ void Aec3ConfigFromJsonString(absl::string_view json_string, &cfg.echo_model.render_pre_window_size); ReadParam(section, "render_post_window_size", &cfg.echo_model.render_post_window_size); + ReadParam(section, "model_reverb_in_nonlinear_mode", + &cfg.echo_model.model_reverb_in_nonlinear_mode); } if (rtc::GetValueFromJsonObject(aec3_root, "comfort_noise", §ion)) { @@ -331,6 +351,15 @@ void Aec3ConfigFromJsonString(absl::string_view json_string, &cfg.suppressor.nearend_tuning.max_dec_factor_lf); } + ReadParam(section, "lf_smoothing_during_initial_phase", + &cfg.suppressor.lf_smoothing_during_initial_phase); + ReadParam(section, "last_permanent_lf_smoothing_band", + &cfg.suppressor.last_permanent_lf_smoothing_band); + ReadParam(section, "last_lf_smoothing_band", + &cfg.suppressor.last_lf_smoothing_band); + ReadParam(section, "last_lf_band", &cfg.suppressor.last_lf_band); + ReadParam(section, "first_hf_band", &cfg.suppressor.first_hf_band); + if (rtc::GetValueFromJsonObject(section, "dominant_nearend_detection", &subsection)) { ReadParam(subsection, "enr_threshold", @@ -346,6 +375,9 @@ void Aec3ConfigFromJsonString(absl::string_view json_string, ReadParam( subsection, "use_during_initial_phase", &cfg.suppressor.dominant_nearend_detection.use_during_initial_phase); + ReadParam(subsection, "use_unbounded_echo_spectrum", + &cfg.suppressor.dominant_nearend_detection + .use_unbounded_echo_spectrum); } if (rtc::GetValueFromJsonObject(section, "subband_nearend_detection", @@ -381,6 +413,19 @@ void Aec3ConfigFromJsonString(absl::string_view json_string, ReadParam(section, "floor_first_increase", &cfg.suppressor.floor_first_increase); + ReadParam(section, "conservative_hf_suppression", + &cfg.suppressor.conservative_hf_suppression); + } + + if (rtc::GetValueFromJsonObject(aec3_root, "multi_channel", §ion)) { + ReadParam(section, "detect_stereo_content", + &cfg.multi_channel.detect_stereo_content); + ReadParam(section, "stereo_detection_threshold", + &cfg.multi_channel.stereo_detection_threshold); + ReadParam(section, "stereo_detection_timeout_threshold_seconds", + &cfg.multi_channel.stereo_detection_timeout_threshold_seconds); + ReadParam(section, "stereo_detection_hysteresis_seconds", + &cfg.multi_channel.stereo_detection_hysteresis_seconds); } } @@ -415,6 +460,8 @@ std::string Aec3ConfigToJsonString(const EchoCanceller3Config& config) { << config.delay.fixed_capture_delay_samples << ","; ost << "\"delay_estimate_smoothing\": " << config.delay.delay_estimate_smoothing << ","; + ost << "\"delay_estimate_smoothing_delay_found\": " + << config.delay.delay_estimate_smoothing_delay_found << ","; ost << "\"delay_candidate_detection_threshold\": " << config.delay.delay_candidate_detection_threshold << ","; @@ -459,7 +506,9 @@ std::string Aec3ConfigToJsonString(const EchoCanceller3Config& config) { << (config.delay.capture_alignment_mixing.prefer_first_two_channels ? "true" : "false"); - ost << "}"; + ost << "},"; + ost << "\"detect_pre_echo\": " + << (config.delay.detect_pre_echo ? "true" : "false"); ost << "},"; ost << "\"filter\": {"; @@ -498,6 +547,8 @@ std::string Aec3ConfigToJsonString(const EchoCanceller3Config& config) { << config.filter.config_change_duration_blocks << ","; ost << "\"initial_state_seconds\": " << config.filter.initial_state_seconds << ","; + ost << "\"coarse_reset_hangover_blocks\": " + << config.filter.coarse_reset_hangover_blocks << ","; ost << "\"conservative_initial_phase\": " << (config.filter.conservative_initial_phase ? "true" : "false") << ","; ost << "\"enable_coarse_filter_output_usage\": " @@ -505,6 +556,9 @@ std::string Aec3ConfigToJsonString(const EchoCanceller3Config& config) { << ","; ost << "\"use_linear_filter\": " << (config.filter.use_linear_filter ? "true" : "false") << ","; + ost << "\"high_pass_filter_echo_reference\": " + << (config.filter.high_pass_filter_echo_reference ? "true" : "false") + << ","; ost << "\"export_linear_aec_output\": " << (config.filter.export_linear_aec_output ? "true" : "false"); @@ -526,11 +580,20 @@ std::string Aec3ConfigToJsonString(const EchoCanceller3Config& config) { ost << "\"ep_strength\": {"; ost << "\"default_gain\": " << config.ep_strength.default_gain << ","; ost << "\"default_len\": " << config.ep_strength.default_len << ","; + ost << "\"nearend_len\": " << config.ep_strength.nearend_len << ","; ost << "\"echo_can_saturate\": " << (config.ep_strength.echo_can_saturate ? "true" : "false") << ","; ost << "\"bounded_erl\": " - << (config.ep_strength.bounded_erl ? "true" : "false"); - + << (config.ep_strength.bounded_erl ? "true" : "false") << ","; + ost << "\"erle_onset_compensation_in_dominant_nearend\": " + << (config.ep_strength.erle_onset_compensation_in_dominant_nearend + ? "true" + : "false") + << ","; + ost << "\"use_conservative_tail_frequency_response\": " + << (config.ep_strength.use_conservative_tail_frequency_response + ? "true" + : "false"); ost << "},"; ost << "\"echo_audibility\": {"; @@ -585,7 +648,9 @@ std::string Aec3ConfigToJsonString(const EchoCanceller3Config& config) { ost << "\"render_pre_window_size\": " << config.echo_model.render_pre_window_size << ","; ost << "\"render_post_window_size\": " - << config.echo_model.render_post_window_size; + << config.echo_model.render_post_window_size << ","; + ost << "\"model_reverb_in_nonlinear_mode\": " + << (config.echo_model.model_reverb_in_nonlinear_mode ? "true" : "false"); ost << "},"; ost << "\"comfort_noise\": {"; @@ -627,20 +692,30 @@ std::string Aec3ConfigToJsonString(const EchoCanceller3Config& config) { ost << "\"max_dec_factor_lf\": " << config.suppressor.nearend_tuning.max_dec_factor_lf; ost << "},"; - ost << "\"dominant_nearend_detection\": {"; - ost << "\"enr_threshold\": " - << config.suppressor.dominant_nearend_detection.enr_threshold << ","; - ost << "\"enr_exit_threshold\": " - << config.suppressor.dominant_nearend_detection.enr_exit_threshold << ","; - ost << "\"snr_threshold\": " - << config.suppressor.dominant_nearend_detection.snr_threshold << ","; - ost << "\"hold_duration\": " - << config.suppressor.dominant_nearend_detection.hold_duration << ","; - ost << "\"trigger_threshold\": " - << config.suppressor.dominant_nearend_detection.trigger_threshold << ","; - ost << "\"use_during_initial_phase\": " - << config.suppressor.dominant_nearend_detection.use_during_initial_phase; - ost << "},"; + ost << "\"lf_smoothing_during_initial_phase\": " + << (config.suppressor.lf_smoothing_during_initial_phase ? "true" + : "false") + << ","; + ost << "\"last_permanent_lf_smoothing_band\": " + << config.suppressor.last_permanent_lf_smoothing_band << ","; + ost << "\"last_lf_smoothing_band\": " + << config.suppressor.last_lf_smoothing_band << ","; + ost << "\"last_lf_band\": " << config.suppressor.last_lf_band << ","; + ost << "\"first_hf_band\": " << config.suppressor.first_hf_band << ","; + { + const auto& dnd = config.suppressor.dominant_nearend_detection; + ost << "\"dominant_nearend_detection\": {"; + ost << "\"enr_threshold\": " << dnd.enr_threshold << ","; + ost << "\"enr_exit_threshold\": " << dnd.enr_exit_threshold << ","; + ost << "\"snr_threshold\": " << dnd.snr_threshold << ","; + ost << "\"hold_duration\": " << dnd.hold_duration << ","; + ost << "\"trigger_threshold\": " << dnd.trigger_threshold << ","; + ost << "\"use_during_initial_phase\": " << dnd.use_during_initial_phase + << ","; + ost << "\"use_unbounded_echo_spectrum\": " + << dnd.use_unbounded_echo_spectrum; + ost << "},"; + } ost << "\"subband_nearend_detection\": {"; ost << "\"nearend_average_blocks\": " << config.suppressor.subband_nearend_detection.nearend_average_blocks @@ -672,8 +747,23 @@ std::string Aec3ConfigToJsonString(const EchoCanceller3Config& config) { ost << "\"anti_howling_gain\": " << config.suppressor.high_bands_suppression.anti_howling_gain; ost << "},"; - ost << "\"floor_first_increase\": " << config.suppressor.floor_first_increase; + ost << "\"floor_first_increase\": " << config.suppressor.floor_first_increase + << ","; + ost << "\"conservative_hf_suppression\": " + << config.suppressor.conservative_hf_suppression; + ost << "},"; + + ost << "\"multi_channel\": {"; + ost << "\"detect_stereo_content\": " + << (config.multi_channel.detect_stereo_content ? "true" : "false") << ","; + ost << "\"stereo_detection_threshold\": " + << config.multi_channel.stereo_detection_threshold << ","; + ost << "\"stereo_detection_timeout_threshold_seconds\": " + << config.multi_channel.stereo_detection_timeout_threshold_seconds << ","; + ost << "\"stereo_detection_hysteresis_seconds\": " + << config.multi_channel.stereo_detection_hysteresis_seconds; ost << "}"; + ost << "}"; ost << "}"; diff --git a/api/audio/echo_canceller3_factory.cc b/api/audio/echo_canceller3_factory.cc index d65a7262fa..284b117bea 100644 --- a/api/audio/echo_canceller3_factory.cc +++ b/api/audio/echo_canceller3_factory.cc @@ -25,7 +25,8 @@ std::unique_ptr<EchoControl> EchoCanceller3Factory::Create( int num_render_channels, int num_capture_channels) { return std::make_unique<EchoCanceller3>( - config_, sample_rate_hz, num_render_channels, num_capture_channels); + config_, /*multichannel_config=*/absl::nullopt, sample_rate_hz, + num_render_channels, num_capture_channels); } } // namespace webrtc diff --git a/api/audio/echo_control.h b/api/audio/echo_control.h index 8d567bf2b8..74fbc27b12 100644 --- a/api/audio/echo_control.h +++ b/api/audio/echo_control.h @@ -48,6 +48,13 @@ class EchoControl { // Provides an optional external estimate of the audio buffer delay. virtual void SetAudioBufferDelay(int delay_ms) = 0; + // Specifies whether the capture output will be used. The purpose of this is + // to allow the echo controller to deactivate some of the processing when the + // resulting output is anyway not used, for instance when the endpoint is + // muted. + // TODO(b/177830919): Make pure virtual. + virtual void SetCaptureOutputUsage(bool capture_output_used) {} + // Returns wheter the signal is altered. virtual bool ActiveProcessing() const = 0; diff --git a/api/audio/echo_detector_creator.cc b/api/audio/echo_detector_creator.cc index 4c3d9e61fe..15b7c51dca 100644 --- a/api/audio/echo_detector_creator.cc +++ b/api/audio/echo_detector_creator.cc @@ -9,13 +9,13 @@ */ #include "api/audio/echo_detector_creator.h" +#include "api/make_ref_counted.h" #include "modules/audio_processing/residual_echo_detector.h" -#include "rtc_base/ref_counted_object.h" namespace webrtc { rtc::scoped_refptr<EchoDetector> CreateEchoDetector() { - return new rtc::RefCountedObject<ResidualEchoDetector>(); + return rtc::make_ref_counted<ResidualEchoDetector>(); } } // namespace webrtc diff --git a/api/audio/test/BUILD.gn b/api/audio/test/BUILD.gn index d62baf15b7..dfe8c32f80 100644 --- a/api/audio/test/BUILD.gn +++ b/api/audio/test/BUILD.gn @@ -24,7 +24,6 @@ if (rtc_include_tests) { "..:aec3_config", "..:aec3_config_json", "..:audio_frame_api", - "../../../rtc_base:rtc_base_approved", "../../../test:test_support", ] } diff --git a/api/audio/test/audio_frame_unittest.cc b/api/audio/test/audio_frame_unittest.cc index f8d3318274..dbf45ceabc 100644 --- a/api/audio/test/audio_frame_unittest.cc +++ b/api/audio/test/audio_frame_unittest.cc @@ -133,54 +133,4 @@ TEST(AudioFrameTest, CopyFrom) { EXPECT_EQ(0, memcmp(frame2.data(), frame1.data(), sizeof(samples))); } -TEST(AudioFrameTest, SwapFrames) { - AudioFrame frame1, frame2; - int16_t samples1[kNumChannelsMono * kSamplesPerChannel]; - for (size_t i = 0; i < kNumChannelsMono * kSamplesPerChannel; ++i) { - samples1[i] = i; - } - frame1.UpdateFrame(kTimestamp, samples1, kSamplesPerChannel, kSampleRateHz, - AudioFrame::kPLC, AudioFrame::kVadActive, - kNumChannelsMono); - frame1.set_absolute_capture_timestamp_ms(12345678); - const auto frame1_channel_layout = frame1.channel_layout(); - - int16_t samples2[(kNumChannelsMono + 1) * (kSamplesPerChannel + 1)]; - for (size_t i = 0; i < (kNumChannelsMono + 1) * (kSamplesPerChannel + 1); - ++i) { - samples2[i] = 1000 + i; - } - frame2.UpdateFrame(kTimestamp + 1, samples2, kSamplesPerChannel + 1, - kSampleRateHz + 1, AudioFrame::kNormalSpeech, - AudioFrame::kVadPassive, kNumChannelsMono + 1); - const auto frame2_channel_layout = frame2.channel_layout(); - - swap(frame1, frame2); - - EXPECT_EQ(kTimestamp + 1, frame1.timestamp_); - ASSERT_EQ(kSamplesPerChannel + 1, frame1.samples_per_channel_); - EXPECT_EQ(kSampleRateHz + 1, frame1.sample_rate_hz_); - EXPECT_EQ(AudioFrame::kNormalSpeech, frame1.speech_type_); - EXPECT_EQ(AudioFrame::kVadPassive, frame1.vad_activity_); - ASSERT_EQ(kNumChannelsMono + 1, frame1.num_channels_); - for (size_t i = 0; i < (kNumChannelsMono + 1) * (kSamplesPerChannel + 1); - ++i) { - EXPECT_EQ(samples2[i], frame1.data()[i]); - } - EXPECT_FALSE(frame1.absolute_capture_timestamp_ms()); - EXPECT_EQ(frame2_channel_layout, frame1.channel_layout()); - - EXPECT_EQ(kTimestamp, frame2.timestamp_); - ASSERT_EQ(kSamplesPerChannel, frame2.samples_per_channel_); - EXPECT_EQ(kSampleRateHz, frame2.sample_rate_hz_); - EXPECT_EQ(AudioFrame::kPLC, frame2.speech_type_); - EXPECT_EQ(AudioFrame::kVadActive, frame2.vad_activity_); - ASSERT_EQ(kNumChannelsMono, frame2.num_channels_); - for (size_t i = 0; i < kNumChannelsMono * kSamplesPerChannel; ++i) { - EXPECT_EQ(samples1[i], frame2.data()[i]); - } - EXPECT_EQ(12345678, frame2.absolute_capture_timestamp_ms()); - EXPECT_EQ(frame1_channel_layout, frame2.channel_layout()); -} - } // namespace webrtc diff --git a/api/audio/test/echo_canceller3_config_json_unittest.cc b/api/audio/test/echo_canceller3_config_json_unittest.cc index a149c17a76..4146dda9fe 100644 --- a/api/audio/test/echo_canceller3_config_json_unittest.cc +++ b/api/audio/test/echo_canceller3_config_json_unittest.cc @@ -21,19 +21,29 @@ TEST(EchoCanceller3JsonHelpers, ToStringAndParseJson) { cfg.delay.log_warning_on_delay_changes = true; cfg.filter.refined.error_floor = 2.f; cfg.filter.coarse_initial.length_blocks = 3u; + cfg.filter.high_pass_filter_echo_reference = + !cfg.filter.high_pass_filter_echo_reference; cfg.comfort_noise.noise_floor_dbfs = 100.f; + cfg.echo_model.model_reverb_in_nonlinear_mode = false; cfg.suppressor.normal_tuning.mask_hf.enr_suppress = .5f; cfg.suppressor.subband_nearend_detection.nearend_average_blocks = 3; cfg.suppressor.subband_nearend_detection.subband1 = {1, 3}; cfg.suppressor.subband_nearend_detection.subband1 = {4, 5}; cfg.suppressor.subband_nearend_detection.nearend_threshold = 2.f; cfg.suppressor.subband_nearend_detection.snr_threshold = 100.f; + cfg.multi_channel.detect_stereo_content = + !cfg.multi_channel.detect_stereo_content; + cfg.multi_channel.stereo_detection_threshold += 1.0f; + cfg.multi_channel.stereo_detection_timeout_threshold_seconds += 1; + cfg.multi_channel.stereo_detection_hysteresis_seconds += 1; std::string json_string = Aec3ConfigToJsonString(cfg); EchoCanceller3Config cfg_transformed = Aec3ConfigFromJsonString(json_string); // Expect unchanged values to remain default. EXPECT_EQ(cfg.ep_strength.default_len, cfg_transformed.ep_strength.default_len); + EXPECT_EQ(cfg.ep_strength.nearend_len, + cfg_transformed.ep_strength.nearend_len); EXPECT_EQ(cfg.suppressor.normal_tuning.mask_lf.enr_suppress, cfg_transformed.suppressor.normal_tuning.mask_lf.enr_suppress); @@ -46,8 +56,12 @@ TEST(EchoCanceller3JsonHelpers, ToStringAndParseJson) { cfg_transformed.filter.coarse_initial.length_blocks); EXPECT_EQ(cfg.filter.refined.error_floor, cfg_transformed.filter.refined.error_floor); + EXPECT_EQ(cfg.filter.high_pass_filter_echo_reference, + cfg_transformed.filter.high_pass_filter_echo_reference); EXPECT_EQ(cfg.comfort_noise.noise_floor_dbfs, cfg_transformed.comfort_noise.noise_floor_dbfs); + EXPECT_EQ(cfg.echo_model.model_reverb_in_nonlinear_mode, + cfg_transformed.echo_model.model_reverb_in_nonlinear_mode); EXPECT_EQ(cfg.suppressor.normal_tuning.mask_hf.enr_suppress, cfg_transformed.suppressor.normal_tuning.mask_hf.enr_suppress); EXPECT_EQ(cfg.suppressor.subband_nearend_detection.nearend_average_blocks, @@ -66,5 +80,14 @@ TEST(EchoCanceller3JsonHelpers, ToStringAndParseJson) { cfg_transformed.suppressor.subband_nearend_detection.nearend_threshold); EXPECT_EQ(cfg.suppressor.subband_nearend_detection.snr_threshold, cfg_transformed.suppressor.subband_nearend_detection.snr_threshold); + EXPECT_EQ(cfg.multi_channel.detect_stereo_content, + cfg_transformed.multi_channel.detect_stereo_content); + EXPECT_EQ(cfg.multi_channel.stereo_detection_threshold, + cfg_transformed.multi_channel.stereo_detection_threshold); + EXPECT_EQ( + cfg.multi_channel.stereo_detection_timeout_threshold_seconds, + cfg_transformed.multi_channel.stereo_detection_timeout_threshold_seconds); + EXPECT_EQ(cfg.multi_channel.stereo_detection_hysteresis_seconds, + cfg_transformed.multi_channel.stereo_detection_hysteresis_seconds); } } // namespace webrtc |