diff options
Diffstat (limited to 'modules/audio_mixer')
-rw-r--r-- | modules/audio_mixer/BUILD.gn | 6 | ||||
-rw-r--r-- | modules/audio_mixer/OWNERS.webrtc | 2 | ||||
-rw-r--r-- | modules/audio_mixer/audio_mixer_impl.cc | 27 | ||||
-rw-r--r-- | modules/audio_mixer/audio_mixer_impl.h | 14 | ||||
-rw-r--r-- | modules/audio_mixer/audio_mixer_impl_unittest.cc | 164 | ||||
-rw-r--r-- | modules/audio_mixer/frame_combiner.cc | 29 | ||||
-rw-r--r-- | modules/audio_mixer/frame_combiner_unittest.cc | 69 | ||||
-rw-r--r-- | modules/audio_mixer/g3doc/index.md | 54 |
8 files changed, 332 insertions, 33 deletions
diff --git a/modules/audio_mixer/BUILD.gn b/modules/audio_mixer/BUILD.gn index 739d62d705..d51be4af04 100644 --- a/modules/audio_mixer/BUILD.gn +++ b/modules/audio_mixer/BUILD.gn @@ -39,6 +39,7 @@ rtc_library("audio_mixer_impl") { deps = [ ":audio_frame_manipulator", "../../api:array_view", + "../../api:rtp_packet_info", "../../api:scoped_refptr", "../../api/audio:audio_frame_api", "../../api/audio:audio_mixer_api", @@ -46,6 +47,7 @@ rtc_library("audio_mixer_impl") { "../../common_audio", "../../rtc_base:checks", "../../rtc_base:rtc_base_approved", + "../../rtc_base:safe_conversions", "../../rtc_base/synchronization:mutex", "../../system_wrappers", "../../system_wrappers:metrics", @@ -104,13 +106,15 @@ if (rtc_include_tests) { "audio_mixer_impl_unittest.cc", "frame_combiner_unittest.cc", ] - + absl_deps = [ "//third_party/abseil-cpp/absl/types:optional" ] deps = [ ":audio_frame_manipulator", ":audio_mixer_impl", ":audio_mixer_test_utils", "../../api:array_view", + "../../api:rtp_packet_info", "../../api/audio:audio_mixer_api", + "../../api/units:timestamp", "../../audio/utility:audio_frame_operations", "../../rtc_base:checks", "../../rtc_base:rtc_base_approved", diff --git a/modules/audio_mixer/OWNERS.webrtc b/modules/audio_mixer/OWNERS.webrtc index b33d599697..5edc304ab3 100644 --- a/modules/audio_mixer/OWNERS.webrtc +++ b/modules/audio_mixer/OWNERS.webrtc @@ -1,2 +1,2 @@ -aleloi@webrtc.org +alessiob@webrtc.org henrik.lundin@webrtc.org diff --git a/modules/audio_mixer/audio_mixer_impl.cc b/modules/audio_mixer/audio_mixer_impl.cc index 04a8bcf723..8cebc38779 100644 --- a/modules/audio_mixer/audio_mixer_impl.cc +++ b/modules/audio_mixer/audio_mixer_impl.cc @@ -126,30 +126,33 @@ struct AudioMixerImpl::HelperContainers { AudioMixerImpl::AudioMixerImpl( std::unique_ptr<OutputRateCalculator> output_rate_calculator, - bool use_limiter) - : output_rate_calculator_(std::move(output_rate_calculator)), + bool use_limiter, + int max_sources_to_mix) + : max_sources_to_mix_(max_sources_to_mix), + output_rate_calculator_(std::move(output_rate_calculator)), audio_source_list_(), helper_containers_(std::make_unique<HelperContainers>()), frame_combiner_(use_limiter) { - const int kTypicalMaxNumberOfMixedStreams = 3; - audio_source_list_.reserve(kTypicalMaxNumberOfMixedStreams); - helper_containers_->resize(kTypicalMaxNumberOfMixedStreams); + RTC_CHECK_GE(max_sources_to_mix, 1) << "At least one source must be mixed"; + audio_source_list_.reserve(max_sources_to_mix); + helper_containers_->resize(max_sources_to_mix); } AudioMixerImpl::~AudioMixerImpl() {} -rtc::scoped_refptr<AudioMixerImpl> AudioMixerImpl::Create() { +rtc::scoped_refptr<AudioMixerImpl> AudioMixerImpl::Create( + int max_sources_to_mix) { return Create(std::unique_ptr<DefaultOutputRateCalculator>( new DefaultOutputRateCalculator()), - true); + /*use_limiter=*/true, max_sources_to_mix); } rtc::scoped_refptr<AudioMixerImpl> AudioMixerImpl::Create( std::unique_ptr<OutputRateCalculator> output_rate_calculator, - bool use_limiter) { - return rtc::scoped_refptr<AudioMixerImpl>( - new rtc::RefCountedObject<AudioMixerImpl>( - std::move(output_rate_calculator), use_limiter)); + bool use_limiter, + int max_sources_to_mix) { + return rtc::make_ref_counted<AudioMixerImpl>( + std::move(output_rate_calculator), use_limiter, max_sources_to_mix); } void AudioMixerImpl::Mix(size_t number_of_channels, @@ -219,7 +222,7 @@ rtc::ArrayView<AudioFrame* const> AudioMixerImpl::GetAudioFromSources( std::sort(audio_source_mixing_data_view.begin(), audio_source_mixing_data_view.end(), ShouldMixBefore); - int max_audio_frame_counter = kMaximumAmountOfMixedAudioSources; + int max_audio_frame_counter = max_sources_to_mix_; int ramp_list_lengh = 0; int audio_to_mix_count = 0; // Go through list in order and put unmuted frames in result list. diff --git a/modules/audio_mixer/audio_mixer_impl.h b/modules/audio_mixer/audio_mixer_impl.h index 0a13082725..737fcbdc43 100644 --- a/modules/audio_mixer/audio_mixer_impl.h +++ b/modules/audio_mixer/audio_mixer_impl.h @@ -35,13 +35,16 @@ class AudioMixerImpl : public AudioMixer { // AudioProcessing only accepts 10 ms frames. static const int kFrameDurationInMs = 10; - enum : int { kMaximumAmountOfMixedAudioSources = 3 }; - static rtc::scoped_refptr<AudioMixerImpl> Create(); + static const int kDefaultNumberOfMixedAudioSources = 3; + + static rtc::scoped_refptr<AudioMixerImpl> Create( + int max_sources_to_mix = kDefaultNumberOfMixedAudioSources); static rtc::scoped_refptr<AudioMixerImpl> Create( std::unique_ptr<OutputRateCalculator> output_rate_calculator, - bool use_limiter); + bool use_limiter, + int max_sources_to_mix = kDefaultNumberOfMixedAudioSources); ~AudioMixerImpl() override; @@ -60,7 +63,8 @@ class AudioMixerImpl : public AudioMixer { protected: AudioMixerImpl(std::unique_ptr<OutputRateCalculator> output_rate_calculator, - bool use_limiter); + bool use_limiter, + int max_sources_to_mix); private: struct HelperContainers; @@ -76,6 +80,8 @@ class AudioMixerImpl : public AudioMixer { // checks that mixing is done sequentially. mutable Mutex mutex_; + const int max_sources_to_mix_; + std::unique_ptr<OutputRateCalculator> output_rate_calculator_; // List of all audio sources. diff --git a/modules/audio_mixer/audio_mixer_impl_unittest.cc b/modules/audio_mixer/audio_mixer_impl_unittest.cc index c2f02fbfbd..61aa74e0a1 100644 --- a/modules/audio_mixer/audio_mixer_impl_unittest.cc +++ b/modules/audio_mixer/audio_mixer_impl_unittest.cc @@ -12,12 +12,18 @@ #include <string.h> +#include <cstdint> #include <limits> #include <memory> #include <string> #include <utility> +#include <vector> +#include "absl/types/optional.h" #include "api/audio/audio_mixer.h" +#include "api/rtp_packet_info.h" +#include "api/rtp_packet_infos.h" +#include "api/units/timestamp.h" #include "modules/audio_mixer/default_output_rate_calculator.h" #include "rtc_base/checks.h" #include "rtc_base/strings/string_builder.h" @@ -29,6 +35,7 @@ using ::testing::_; using ::testing::Exactly; using ::testing::Invoke; using ::testing::Return; +using ::testing::UnorderedElementsAre; namespace webrtc { @@ -87,6 +94,10 @@ class MockMixerAudioSource : public ::testing::NiceMock<AudioMixer::Source> { fake_audio_frame_info_ = audio_frame_info; } + void set_packet_infos(const RtpPacketInfos& packet_infos) { + packet_infos_ = packet_infos; + } + private: AudioFrameInfo FakeAudioFrameWithInfo(int sample_rate_hz, AudioFrame* audio_frame) { @@ -94,11 +105,13 @@ class MockMixerAudioSource : public ::testing::NiceMock<AudioMixer::Source> { audio_frame->sample_rate_hz_ = sample_rate_hz; audio_frame->samples_per_channel_ = rtc::CheckedDivExact(sample_rate_hz, 100); + audio_frame->packet_infos_ = packet_infos_; return fake_info(); } AudioFrame fake_frame_; AudioFrameInfo fake_audio_frame_info_; + RtpPacketInfos packet_infos_; }; class CustomRateCalculator : public OutputRateCalculator { @@ -160,7 +173,7 @@ void MixMonoAtGivenNativeRate(int native_sample_rate, TEST(AudioMixer, LargestEnergyVadActiveMixed) { constexpr int kAudioSources = - AudioMixerImpl::kMaximumAmountOfMixedAudioSources + 3; + AudioMixerImpl::kDefaultNumberOfMixedAudioSources + 3; const auto mixer = AudioMixerImpl::Create(); @@ -191,7 +204,7 @@ TEST(AudioMixer, LargestEnergyVadActiveMixed) { mixer->GetAudioSourceMixabilityStatusForTest(&participants[i]); if (i == kAudioSources - 1 || i < kAudioSources - 1 - - AudioMixerImpl::kMaximumAmountOfMixedAudioSources) { + AudioMixerImpl::kDefaultNumberOfMixedAudioSources) { EXPECT_FALSE(is_mixed) << "Mixing status of AudioSource #" << i << " wrong."; } else { @@ -322,7 +335,7 @@ TEST(AudioMixer, ParticipantNumberOfChannels) { // another participant with higher energy is added. TEST(AudioMixer, RampedOutSourcesShouldNotBeMarkedMixed) { constexpr int kAudioSources = - AudioMixerImpl::kMaximumAmountOfMixedAudioSources + 1; + AudioMixerImpl::kDefaultNumberOfMixedAudioSources + 1; const auto mixer = AudioMixerImpl::Create(); MockMixerAudioSource participants[kAudioSources]; @@ -399,7 +412,7 @@ TEST(AudioMixer, ConstructFromOtherThread) { TEST(AudioMixer, MutedShouldMixAfterUnmuted) { constexpr int kAudioSources = - AudioMixerImpl::kMaximumAmountOfMixedAudioSources + 1; + AudioMixerImpl::kDefaultNumberOfMixedAudioSources + 1; std::vector<AudioFrame> frames(kAudioSources); for (auto& frame : frames) { @@ -417,7 +430,7 @@ TEST(AudioMixer, MutedShouldMixAfterUnmuted) { TEST(AudioMixer, PassiveShouldMixAfterNormal) { constexpr int kAudioSources = - AudioMixerImpl::kMaximumAmountOfMixedAudioSources + 1; + AudioMixerImpl::kDefaultNumberOfMixedAudioSources + 1; std::vector<AudioFrame> frames(kAudioSources); for (auto& frame : frames) { @@ -435,7 +448,7 @@ TEST(AudioMixer, PassiveShouldMixAfterNormal) { TEST(AudioMixer, ActiveShouldMixBeforeLoud) { constexpr int kAudioSources = - AudioMixerImpl::kMaximumAmountOfMixedAudioSources + 1; + AudioMixerImpl::kDefaultNumberOfMixedAudioSources + 1; std::vector<AudioFrame> frames(kAudioSources); for (auto& frame : frames) { @@ -454,9 +467,52 @@ TEST(AudioMixer, ActiveShouldMixBeforeLoud) { MixAndCompare(frames, frame_info, expected_status); } +TEST(AudioMixer, ShouldMixUpToSpecifiedNumberOfSourcesToMix) { + constexpr int kAudioSources = 5; + constexpr int kSourcesToMix = 2; + + std::vector<AudioFrame> frames(kAudioSources); + for (auto& frame : frames) { + ResetFrame(&frame); + } + + std::vector<AudioMixer::Source::AudioFrameInfo> frame_info( + kAudioSources, AudioMixer::Source::AudioFrameInfo::kNormal); + // Set up to kSourceToMix sources with kVadActive so that they're mixed. + const std::vector<AudioFrame::VADActivity> kVadActivities = { + AudioFrame::kVadUnknown, AudioFrame::kVadPassive, AudioFrame::kVadPassive, + AudioFrame::kVadActive, AudioFrame::kVadActive}; + // Populate VAD and frame for all sources. + for (int i = 0; i < kAudioSources; i++) { + frames[i].vad_activity_ = kVadActivities[i]; + } + + std::vector<MockMixerAudioSource> participants(kAudioSources); + for (int i = 0; i < kAudioSources; ++i) { + participants[i].fake_frame()->CopyFrom(frames[i]); + participants[i].set_fake_info(frame_info[i]); + } + + const auto mixer = AudioMixerImpl::Create(kSourcesToMix); + for (int i = 0; i < kAudioSources; ++i) { + EXPECT_TRUE(mixer->AddSource(&participants[i])); + EXPECT_CALL(participants[i], GetAudioFrameWithInfo(kDefaultSampleRateHz, _)) + .Times(Exactly(1)); + } + + mixer->Mix(1, &frame_for_mixing); + + std::vector<bool> expected_status = {false, false, false, true, true}; + for (int i = 0; i < kAudioSources; ++i) { + EXPECT_EQ(expected_status[i], + mixer->GetAudioSourceMixabilityStatusForTest(&participants[i])) + << "Wrong mix status for source #" << i << " is wrong"; + } +} + TEST(AudioMixer, UnmutedShouldMixBeforeLoud) { constexpr int kAudioSources = - AudioMixerImpl::kMaximumAmountOfMixedAudioSources + 1; + AudioMixerImpl::kDefaultNumberOfMixedAudioSources + 1; std::vector<AudioFrame> frames(kAudioSources); for (auto& frame : frames) { @@ -595,6 +651,100 @@ TEST(AudioMixer, MultipleChannelsManyParticipants) { } } +TEST(AudioMixer, ShouldIncludeRtpPacketInfoFromAllMixedSources) { + const uint32_t kSsrc0 = 10; + const uint32_t kSsrc1 = 11; + const uint32_t kSsrc2 = 12; + const uint32_t kCsrc0 = 20; + const uint32_t kCsrc1 = 21; + const uint32_t kCsrc2 = 22; + const uint32_t kCsrc3 = 23; + const int kAudioLevel0 = 10; + const int kAudioLevel1 = 40; + const absl::optional<uint32_t> kAudioLevel2 = absl::nullopt; + const uint32_t kRtpTimestamp0 = 300; + const uint32_t kRtpTimestamp1 = 400; + const Timestamp kReceiveTime0 = Timestamp::Millis(10); + const Timestamp kReceiveTime1 = Timestamp::Millis(20); + + const RtpPacketInfo kPacketInfo0(kSsrc0, {kCsrc0, kCsrc1}, kRtpTimestamp0, + kAudioLevel0, absl::nullopt, kReceiveTime0); + const RtpPacketInfo kPacketInfo1(kSsrc1, {kCsrc2}, kRtpTimestamp1, + kAudioLevel1, absl::nullopt, kReceiveTime1); + const RtpPacketInfo kPacketInfo2(kSsrc2, {kCsrc3}, kRtpTimestamp1, + kAudioLevel2, absl::nullopt, kReceiveTime1); + + const auto mixer = AudioMixerImpl::Create(); + + MockMixerAudioSource source; + source.set_packet_infos(RtpPacketInfos({kPacketInfo0})); + mixer->AddSource(&source); + ResetFrame(source.fake_frame()); + mixer->Mix(1, &frame_for_mixing); + + MockMixerAudioSource other_source; + other_source.set_packet_infos(RtpPacketInfos({kPacketInfo1, kPacketInfo2})); + ResetFrame(other_source.fake_frame()); + mixer->AddSource(&other_source); + + mixer->Mix(/*number_of_channels=*/1, &frame_for_mixing); + + EXPECT_THAT(frame_for_mixing.packet_infos_, + UnorderedElementsAre(kPacketInfo0, kPacketInfo1, kPacketInfo2)); +} + +TEST(AudioMixer, MixerShouldIncludeRtpPacketInfoFromMixedSourcesOnly) { + const uint32_t kSsrc0 = 10; + const uint32_t kSsrc1 = 11; + const uint32_t kSsrc2 = 21; + const uint32_t kCsrc0 = 30; + const uint32_t kCsrc1 = 31; + const uint32_t kCsrc2 = 32; + const uint32_t kCsrc3 = 33; + const int kAudioLevel0 = 10; + const absl::optional<uint32_t> kAudioLevelMissing = absl::nullopt; + const uint32_t kRtpTimestamp0 = 300; + const uint32_t kRtpTimestamp1 = 400; + const Timestamp kReceiveTime0 = Timestamp::Millis(10); + const Timestamp kReceiveTime1 = Timestamp::Millis(20); + + const RtpPacketInfo kPacketInfo0(kSsrc0, {kCsrc0, kCsrc1}, kRtpTimestamp0, + kAudioLevel0, absl::nullopt, kReceiveTime0); + const RtpPacketInfo kPacketInfo1(kSsrc1, {kCsrc2}, kRtpTimestamp1, + kAudioLevelMissing, absl::nullopt, + kReceiveTime1); + const RtpPacketInfo kPacketInfo2(kSsrc2, {kCsrc3}, kRtpTimestamp1, + kAudioLevelMissing, absl::nullopt, + kReceiveTime1); + + const auto mixer = AudioMixerImpl::Create(/*max_sources_to_mix=*/2); + + MockMixerAudioSource source1; + source1.set_packet_infos(RtpPacketInfos({kPacketInfo0})); + mixer->AddSource(&source1); + ResetFrame(source1.fake_frame()); + mixer->Mix(1, &frame_for_mixing); + + MockMixerAudioSource source2; + source2.set_packet_infos(RtpPacketInfos({kPacketInfo1})); + ResetFrame(source2.fake_frame()); + mixer->AddSource(&source2); + + // The mixer prioritizes kVadActive over kVadPassive. + // We limit the number of sources to mix to 2 and set the third source's VAD + // activity to kVadPassive so that it will not be added to the mix. + MockMixerAudioSource source3; + source3.set_packet_infos(RtpPacketInfos({kPacketInfo2})); + ResetFrame(source3.fake_frame()); + source3.fake_frame()->vad_activity_ = AudioFrame::kVadPassive; + mixer->AddSource(&source3); + + mixer->Mix(/*number_of_channels=*/1, &frame_for_mixing); + + EXPECT_THAT(frame_for_mixing.packet_infos_, + UnorderedElementsAre(kPacketInfo0, kPacketInfo1)); +} + class HighOutputRateCalculator : public OutputRateCalculator { public: static const int kDefaultFrequency = 76000; diff --git a/modules/audio_mixer/frame_combiner.cc b/modules/audio_mixer/frame_combiner.cc index fb6f72af75..e31eea595f 100644 --- a/modules/audio_mixer/frame_combiner.cc +++ b/modules/audio_mixer/frame_combiner.cc @@ -16,8 +16,12 @@ #include <iterator> #include <memory> #include <string> +#include <utility> +#include <vector> #include "api/array_view.h" +#include "api/rtp_packet_info.h" +#include "api/rtp_packet_infos.h" #include "common_audio/include/audio_util.h" #include "modules/audio_mixer/audio_frame_manipulator.h" #include "modules/audio_mixer/audio_mixer_impl.h" @@ -26,6 +30,7 @@ #include "modules/audio_processing/logging/apm_data_dumper.h" #include "rtc_base/arraysize.h" #include "rtc_base/checks.h" +#include "rtc_base/numerics/safe_conversions.h" #include "system_wrappers/include/metrics.h" namespace webrtc { @@ -53,11 +58,23 @@ void SetAudioFrameFields(rtc::ArrayView<const AudioFrame* const> mix_list, if (mix_list.empty()) { audio_frame_for_mixing->elapsed_time_ms_ = -1; - } else if (mix_list.size() == 1) { + } else { audio_frame_for_mixing->timestamp_ = mix_list[0]->timestamp_; audio_frame_for_mixing->elapsed_time_ms_ = mix_list[0]->elapsed_time_ms_; audio_frame_for_mixing->ntp_time_ms_ = mix_list[0]->ntp_time_ms_; - audio_frame_for_mixing->packet_infos_ = mix_list[0]->packet_infos_; + std::vector<RtpPacketInfo> packet_infos; + for (const auto& frame : mix_list) { + audio_frame_for_mixing->timestamp_ = + std::min(audio_frame_for_mixing->timestamp_, frame->timestamp_); + audio_frame_for_mixing->ntp_time_ms_ = + std::min(audio_frame_for_mixing->ntp_time_ms_, frame->ntp_time_ms_); + audio_frame_for_mixing->elapsed_time_ms_ = std::max( + audio_frame_for_mixing->elapsed_time_ms_, frame->elapsed_time_ms_); + packet_infos.insert(packet_infos.end(), frame->packet_infos_.begin(), + frame->packet_infos_.end()); + } + audio_frame_for_mixing->packet_infos_ = + RtpPacketInfos(std::move(packet_infos)); } } @@ -207,10 +224,10 @@ void FrameCombiner::LogMixingStats( uma_logging_counter_ = 0; RTC_HISTOGRAM_COUNTS_100("WebRTC.Audio.AudioMixer.NumIncomingStreams", static_cast<int>(number_of_streams)); - RTC_HISTOGRAM_ENUMERATION( - "WebRTC.Audio.AudioMixer.NumIncomingActiveStreams", - static_cast<int>(mix_list.size()), - AudioMixerImpl::kMaximumAmountOfMixedAudioSources); + RTC_HISTOGRAM_COUNTS_LINEAR( + "WebRTC.Audio.AudioMixer.NumIncomingActiveStreams2", + rtc::dchecked_cast<int>(mix_list.size()), /*min=*/1, /*max=*/16, + /*bucket_count=*/16); using NativeRate = AudioProcessing::NativeRate; static constexpr NativeRate native_rates[] = { diff --git a/modules/audio_mixer/frame_combiner_unittest.cc b/modules/audio_mixer/frame_combiner_unittest.cc index 4b189a052e..fa1fef325c 100644 --- a/modules/audio_mixer/frame_combiner_unittest.cc +++ b/modules/audio_mixer/frame_combiner_unittest.cc @@ -15,8 +15,12 @@ #include <numeric> #include <string> #include <type_traits> +#include <vector> +#include "absl/types/optional.h" #include "api/array_view.h" +#include "api/rtp_packet_info.h" +#include "api/rtp_packet_infos.h" #include "audio/utility/audio_frame_operations.h" #include "modules/audio_mixer/gain_change_calculator.h" #include "modules/audio_mixer/sine_wave_generator.h" @@ -28,7 +32,13 @@ namespace webrtc { namespace { + +using ::testing::ElementsAreArray; +using ::testing::IsEmpty; +using ::testing::UnorderedElementsAreArray; + using LimiterType = FrameCombiner::LimiterType; + struct FrameCombinerConfig { bool use_limiter; int sample_rate_hz; @@ -57,9 +67,24 @@ std::string ProduceDebugText(const FrameCombinerConfig& config) { AudioFrame frame1; AudioFrame frame2; -AudioFrame audio_frame_for_mixing; void SetUpFrames(int sample_rate_hz, int number_of_channels) { + RtpPacketInfo packet_info1( + /*ssrc=*/1001, /*csrcs=*/{}, /*rtp_timestamp=*/1000, + /*audio_level=*/absl::nullopt, /*absolute_capture_time=*/absl::nullopt, + /*receive_time_ms=*/1); + RtpPacketInfo packet_info2( + /*ssrc=*/4004, /*csrcs=*/{}, /*rtp_timestamp=*/1234, + /*audio_level=*/absl::nullopt, /*absolute_capture_time=*/absl::nullopt, + /*receive_time_ms=*/2); + RtpPacketInfo packet_info3( + /*ssrc=*/7007, /*csrcs=*/{}, /*rtp_timestamp=*/1333, + /*audio_level=*/absl::nullopt, /*absolute_capture_time=*/absl::nullopt, + /*receive_time_ms=*/2); + + frame1.packet_infos_ = RtpPacketInfos({packet_info1}); + frame2.packet_infos_ = RtpPacketInfos({packet_info2, packet_info3}); + for (auto* frame : {&frame1, &frame2}) { frame->UpdateFrame(0, nullptr, rtc::CheckedDivExact(sample_rate_hz, 100), sample_rate_hz, AudioFrame::kNormalSpeech, @@ -81,6 +106,7 @@ TEST(FrameCombiner, BasicApiCallsLimiter) { ProduceDebugText(rate, number_of_channels, number_of_frames)); const std::vector<AudioFrame*> frames_to_combine( all_frames.begin(), all_frames.begin() + number_of_frames); + AudioFrame audio_frame_for_mixing; combiner.Combine(frames_to_combine, number_of_channels, rate, frames_to_combine.size(), &audio_frame_for_mixing); } @@ -88,6 +114,35 @@ TEST(FrameCombiner, BasicApiCallsLimiter) { } } +// The RtpPacketInfos field of the mixed packet should contain the union of the +// RtpPacketInfos from the frames that were actually mixed. +TEST(FrameCombiner, ContainsAllRtpPacketInfos) { + static constexpr int kSampleRateHz = 48000; + static constexpr int kNumChannels = 1; + FrameCombiner combiner(true); + const std::vector<AudioFrame*> all_frames = {&frame1, &frame2}; + SetUpFrames(kSampleRateHz, kNumChannels); + + for (const int number_of_frames : {0, 1, 2}) { + SCOPED_TRACE( + ProduceDebugText(kSampleRateHz, kNumChannels, number_of_frames)); + const std::vector<AudioFrame*> frames_to_combine( + all_frames.begin(), all_frames.begin() + number_of_frames); + + std::vector<RtpPacketInfo> packet_infos; + for (const auto& frame : frames_to_combine) { + packet_infos.insert(packet_infos.end(), frame->packet_infos_.begin(), + frame->packet_infos_.end()); + } + + AudioFrame audio_frame_for_mixing; + combiner.Combine(frames_to_combine, kNumChannels, kSampleRateHz, + frames_to_combine.size(), &audio_frame_for_mixing); + EXPECT_THAT(audio_frame_for_mixing.packet_infos_, + UnorderedElementsAreArray(packet_infos)); + } +} + // There are DCHECKs in place to check for invalid parameters. TEST(FrameCombinerDeathTest, DebugBuildCrashesWithManyChannels) { FrameCombiner combiner(true); @@ -105,6 +160,7 @@ TEST(FrameCombinerDeathTest, DebugBuildCrashesWithManyChannels) { ProduceDebugText(rate, number_of_channels, number_of_frames)); const std::vector<AudioFrame*> frames_to_combine( all_frames.begin(), all_frames.begin() + number_of_frames); + AudioFrame audio_frame_for_mixing; #if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) EXPECT_DEATH( combiner.Combine(frames_to_combine, number_of_channels, rate, @@ -134,6 +190,7 @@ TEST(FrameCombinerDeathTest, DebugBuildCrashesWithHighRate) { ProduceDebugText(rate, number_of_channels, number_of_frames)); const std::vector<AudioFrame*> frames_to_combine( all_frames.begin(), all_frames.begin() + number_of_frames); + AudioFrame audio_frame_for_mixing; #if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) EXPECT_DEATH( combiner.Combine(frames_to_combine, number_of_channels, rate, @@ -161,6 +218,7 @@ TEST(FrameCombiner, BasicApiCallsNoLimiter) { ProduceDebugText(rate, number_of_channels, number_of_frames)); const std::vector<AudioFrame*> frames_to_combine( all_frames.begin(), all_frames.begin() + number_of_frames); + AudioFrame audio_frame_for_mixing; combiner.Combine(frames_to_combine, number_of_channels, rate, frames_to_combine.size(), &audio_frame_for_mixing); } @@ -174,10 +232,11 @@ TEST(FrameCombiner, CombiningZeroFramesShouldProduceSilence) { for (const int number_of_channels : {1, 2}) { SCOPED_TRACE(ProduceDebugText(rate, number_of_channels, 0)); + AudioFrame audio_frame_for_mixing; + const std::vector<AudioFrame*> frames_to_combine; combiner.Combine(frames_to_combine, number_of_channels, rate, frames_to_combine.size(), &audio_frame_for_mixing); - const int16_t* audio_frame_for_mixing_data = audio_frame_for_mixing.data(); const std::vector<int16_t> mixed_data( @@ -186,6 +245,7 @@ TEST(FrameCombiner, CombiningZeroFramesShouldProduceSilence) { const std::vector<int16_t> expected(number_of_channels * rate / 100, 0); EXPECT_EQ(mixed_data, expected); + EXPECT_THAT(audio_frame_for_mixing.packet_infos_, IsEmpty()); } } } @@ -196,6 +256,8 @@ TEST(FrameCombiner, CombiningOneFrameShouldNotChangeFrame) { for (const int number_of_channels : {1, 2, 4, 8, 10}) { SCOPED_TRACE(ProduceDebugText(rate, number_of_channels, 1)); + AudioFrame audio_frame_for_mixing; + SetUpFrames(rate, number_of_channels); int16_t* frame1_data = frame1.mutable_data(); std::iota(frame1_data, frame1_data + number_of_channels * rate / 100, 0); @@ -212,6 +274,8 @@ TEST(FrameCombiner, CombiningOneFrameShouldNotChangeFrame) { std::vector<int16_t> expected(number_of_channels * rate / 100); std::iota(expected.begin(), expected.end(), 0); EXPECT_EQ(mixed_data, expected); + EXPECT_THAT(audio_frame_for_mixing.packet_infos_, + ElementsAreArray(frame1.packet_infos_)); } } } @@ -255,6 +319,7 @@ TEST(FrameCombiner, GainCurveIsSmoothForAlternatingNumberOfStreams) { // Ensures limiter is on if 'use_limiter'. constexpr size_t number_of_streams = 2; + AudioFrame audio_frame_for_mixing; combiner.Combine(frames_to_combine, config.number_of_channels, config.sample_rate_hz, number_of_streams, &audio_frame_for_mixing); diff --git a/modules/audio_mixer/g3doc/index.md b/modules/audio_mixer/g3doc/index.md new file mode 100644 index 0000000000..285530e95a --- /dev/null +++ b/modules/audio_mixer/g3doc/index.md @@ -0,0 +1,54 @@ +<?% config.freshness.owner = 'alessiob' %?> +<?% config.freshness.reviewed = '2021-04-21' %?> + +# The WebRTC Audio Mixer Module + +The WebRTC audio mixer module is responsible for mixing multiple incoming audio +streams (sources) into a single audio stream (mix). It works with 10 ms frames, +it supports sample rates up to 48 kHz and up to 8 audio channels. The API is +defined in +[`api/audio/audio_mixer.h`](https://source.chromium.org/chromium/chromium/src/+/master:third_party/webrtc/api/audio/audio_mixer.h) +and it includes the definition of +[`AudioMixer::Source`](https://source.chromium.org/search?q=symbol:AudioMixer::Source%20file:third_party%2Fwebrtc%2Fapi%2Faudio%2Faudio_mixer.h), +which describes an incoming audio stream, and the definition of +[`AudioMixer`](https://source.chromium.org/search?q=symbol:AudioMixer%20file:third_party%2Fwebrtc%2Fapi%2Faudio%2Faudio_mixer.h), +which operates on a collection of +[`AudioMixer::Source`](https://source.chromium.org/search?q=symbol:AudioMixer::Source%20file:third_party%2Fwebrtc%2Fapi%2Faudio%2Faudio_mixer.h) +objects to produce a mix. + +## AudioMixer::Source + +A source has different characteristic (e.g., sample rate, number of channels, +muted state) and it is identified by an SSRC[^1]. +[`AudioMixer::Source::GetAudioFrameWithInfo()`](https://source.chromium.org/search?q=symbol:AudioMixer::Source::GetAudioFrameWithInfo%20file:third_party%2Fwebrtc%2Fapi%2Faudio%2Faudio_mixer.h) +is used to retrieve the next 10 ms chunk of audio to be mixed. + +[^1]: A synchronization source (SSRC) is the source of a stream of RTP packets, + identified by a 32-bit numeric SSRC identifier carried in the RTP header + so as not to be dependent upon the network address (see + [RFC 3550](https://tools.ietf.org/html/rfc3550#section-3)). + +## AudioMixer + +The interface allows to add and remove sources and the +[`AudioMixer::Mix()`](https://source.chromium.org/search?q=symbol:AudioMixer::Mix%20file:third_party%2Fwebrtc%2Fapi%2Faudio%2Faudio_mixer.h) +method allows to generates a mix with the desired number of channels. + +## WebRTC implementation + +The interface is implemented in different parts of WebRTC: + +* [`AudioMixer::Source`](https://source.chromium.org/search?q=symbol:AudioMixer::Source%20file:third_party%2Fwebrtc%2Fapi%2Faudio%2Faudio_mixer.h): + [`audio/audio_receive_stream.h`](https://source.chromium.org/chromium/chromium/src/+/master:third_party/webrtc/audio/audio_receive_stream.h) +* [`AudioMixer`](https://source.chromium.org/search?q=symbol:AudioMixer%20file:third_party%2Fwebrtc%2Fapi%2Faudio%2Faudio_mixer.h): + [`modules/audio_mixer/audio_mixer_impl.h`](https://source.chromium.org/chromium/chromium/src/+/master:third_party/webrtc/modules/audio_mixer/audio_mixer_impl.h) + +[`AudioMixer`](https://source.chromium.org/search?q=symbol:AudioMixer%20file:third_party%2Fwebrtc%2Fapi%2Faudio%2Faudio_mixer.h) +is thread-safe. The output sample rate of the generated mix is automatically +assigned depending on the sample rate of the sources; whereas the number of +output channels is defined by the caller[^2]. Samples from the non-muted sources +are summed up and then a limiter is used to apply soft-clipping when needed. + +[^2]: [`audio/utility/channel_mixer.h`](https://source.chromium.org/chromium/chromium/src/+/master:third_party/webrtc/audio/utility/channel_mixer.h) + is used to mix channels in the non-trivial cases - i.e., if the number of + channels for a source or the mix is greater than 3. |