aboutsummaryrefslogtreecommitdiff
path: root/modules/audio_mixer
diff options
context:
space:
mode:
Diffstat (limited to 'modules/audio_mixer')
-rw-r--r--modules/audio_mixer/BUILD.gn6
-rw-r--r--modules/audio_mixer/OWNERS.webrtc2
-rw-r--r--modules/audio_mixer/audio_mixer_impl.cc27
-rw-r--r--modules/audio_mixer/audio_mixer_impl.h14
-rw-r--r--modules/audio_mixer/audio_mixer_impl_unittest.cc164
-rw-r--r--modules/audio_mixer/frame_combiner.cc29
-rw-r--r--modules/audio_mixer/frame_combiner_unittest.cc69
-rw-r--r--modules/audio_mixer/g3doc/index.md54
8 files changed, 332 insertions, 33 deletions
diff --git a/modules/audio_mixer/BUILD.gn b/modules/audio_mixer/BUILD.gn
index 739d62d705..d51be4af04 100644
--- a/modules/audio_mixer/BUILD.gn
+++ b/modules/audio_mixer/BUILD.gn
@@ -39,6 +39,7 @@ rtc_library("audio_mixer_impl") {
deps = [
":audio_frame_manipulator",
"../../api:array_view",
+ "../../api:rtp_packet_info",
"../../api:scoped_refptr",
"../../api/audio:audio_frame_api",
"../../api/audio:audio_mixer_api",
@@ -46,6 +47,7 @@ rtc_library("audio_mixer_impl") {
"../../common_audio",
"../../rtc_base:checks",
"../../rtc_base:rtc_base_approved",
+ "../../rtc_base:safe_conversions",
"../../rtc_base/synchronization:mutex",
"../../system_wrappers",
"../../system_wrappers:metrics",
@@ -104,13 +106,15 @@ if (rtc_include_tests) {
"audio_mixer_impl_unittest.cc",
"frame_combiner_unittest.cc",
]
-
+ absl_deps = [ "//third_party/abseil-cpp/absl/types:optional" ]
deps = [
":audio_frame_manipulator",
":audio_mixer_impl",
":audio_mixer_test_utils",
"../../api:array_view",
+ "../../api:rtp_packet_info",
"../../api/audio:audio_mixer_api",
+ "../../api/units:timestamp",
"../../audio/utility:audio_frame_operations",
"../../rtc_base:checks",
"../../rtc_base:rtc_base_approved",
diff --git a/modules/audio_mixer/OWNERS.webrtc b/modules/audio_mixer/OWNERS.webrtc
index b33d599697..5edc304ab3 100644
--- a/modules/audio_mixer/OWNERS.webrtc
+++ b/modules/audio_mixer/OWNERS.webrtc
@@ -1,2 +1,2 @@
-aleloi@webrtc.org
+alessiob@webrtc.org
henrik.lundin@webrtc.org
diff --git a/modules/audio_mixer/audio_mixer_impl.cc b/modules/audio_mixer/audio_mixer_impl.cc
index 04a8bcf723..8cebc38779 100644
--- a/modules/audio_mixer/audio_mixer_impl.cc
+++ b/modules/audio_mixer/audio_mixer_impl.cc
@@ -126,30 +126,33 @@ struct AudioMixerImpl::HelperContainers {
AudioMixerImpl::AudioMixerImpl(
std::unique_ptr<OutputRateCalculator> output_rate_calculator,
- bool use_limiter)
- : output_rate_calculator_(std::move(output_rate_calculator)),
+ bool use_limiter,
+ int max_sources_to_mix)
+ : max_sources_to_mix_(max_sources_to_mix),
+ output_rate_calculator_(std::move(output_rate_calculator)),
audio_source_list_(),
helper_containers_(std::make_unique<HelperContainers>()),
frame_combiner_(use_limiter) {
- const int kTypicalMaxNumberOfMixedStreams = 3;
- audio_source_list_.reserve(kTypicalMaxNumberOfMixedStreams);
- helper_containers_->resize(kTypicalMaxNumberOfMixedStreams);
+ RTC_CHECK_GE(max_sources_to_mix, 1) << "At least one source must be mixed";
+ audio_source_list_.reserve(max_sources_to_mix);
+ helper_containers_->resize(max_sources_to_mix);
}
AudioMixerImpl::~AudioMixerImpl() {}
-rtc::scoped_refptr<AudioMixerImpl> AudioMixerImpl::Create() {
+rtc::scoped_refptr<AudioMixerImpl> AudioMixerImpl::Create(
+ int max_sources_to_mix) {
return Create(std::unique_ptr<DefaultOutputRateCalculator>(
new DefaultOutputRateCalculator()),
- true);
+ /*use_limiter=*/true, max_sources_to_mix);
}
rtc::scoped_refptr<AudioMixerImpl> AudioMixerImpl::Create(
std::unique_ptr<OutputRateCalculator> output_rate_calculator,
- bool use_limiter) {
- return rtc::scoped_refptr<AudioMixerImpl>(
- new rtc::RefCountedObject<AudioMixerImpl>(
- std::move(output_rate_calculator), use_limiter));
+ bool use_limiter,
+ int max_sources_to_mix) {
+ return rtc::make_ref_counted<AudioMixerImpl>(
+ std::move(output_rate_calculator), use_limiter, max_sources_to_mix);
}
void AudioMixerImpl::Mix(size_t number_of_channels,
@@ -219,7 +222,7 @@ rtc::ArrayView<AudioFrame* const> AudioMixerImpl::GetAudioFromSources(
std::sort(audio_source_mixing_data_view.begin(),
audio_source_mixing_data_view.end(), ShouldMixBefore);
- int max_audio_frame_counter = kMaximumAmountOfMixedAudioSources;
+ int max_audio_frame_counter = max_sources_to_mix_;
int ramp_list_lengh = 0;
int audio_to_mix_count = 0;
// Go through list in order and put unmuted frames in result list.
diff --git a/modules/audio_mixer/audio_mixer_impl.h b/modules/audio_mixer/audio_mixer_impl.h
index 0a13082725..737fcbdc43 100644
--- a/modules/audio_mixer/audio_mixer_impl.h
+++ b/modules/audio_mixer/audio_mixer_impl.h
@@ -35,13 +35,16 @@ class AudioMixerImpl : public AudioMixer {
// AudioProcessing only accepts 10 ms frames.
static const int kFrameDurationInMs = 10;
- enum : int { kMaximumAmountOfMixedAudioSources = 3 };
- static rtc::scoped_refptr<AudioMixerImpl> Create();
+ static const int kDefaultNumberOfMixedAudioSources = 3;
+
+ static rtc::scoped_refptr<AudioMixerImpl> Create(
+ int max_sources_to_mix = kDefaultNumberOfMixedAudioSources);
static rtc::scoped_refptr<AudioMixerImpl> Create(
std::unique_ptr<OutputRateCalculator> output_rate_calculator,
- bool use_limiter);
+ bool use_limiter,
+ int max_sources_to_mix = kDefaultNumberOfMixedAudioSources);
~AudioMixerImpl() override;
@@ -60,7 +63,8 @@ class AudioMixerImpl : public AudioMixer {
protected:
AudioMixerImpl(std::unique_ptr<OutputRateCalculator> output_rate_calculator,
- bool use_limiter);
+ bool use_limiter,
+ int max_sources_to_mix);
private:
struct HelperContainers;
@@ -76,6 +80,8 @@ class AudioMixerImpl : public AudioMixer {
// checks that mixing is done sequentially.
mutable Mutex mutex_;
+ const int max_sources_to_mix_;
+
std::unique_ptr<OutputRateCalculator> output_rate_calculator_;
// List of all audio sources.
diff --git a/modules/audio_mixer/audio_mixer_impl_unittest.cc b/modules/audio_mixer/audio_mixer_impl_unittest.cc
index c2f02fbfbd..61aa74e0a1 100644
--- a/modules/audio_mixer/audio_mixer_impl_unittest.cc
+++ b/modules/audio_mixer/audio_mixer_impl_unittest.cc
@@ -12,12 +12,18 @@
#include <string.h>
+#include <cstdint>
#include <limits>
#include <memory>
#include <string>
#include <utility>
+#include <vector>
+#include "absl/types/optional.h"
#include "api/audio/audio_mixer.h"
+#include "api/rtp_packet_info.h"
+#include "api/rtp_packet_infos.h"
+#include "api/units/timestamp.h"
#include "modules/audio_mixer/default_output_rate_calculator.h"
#include "rtc_base/checks.h"
#include "rtc_base/strings/string_builder.h"
@@ -29,6 +35,7 @@ using ::testing::_;
using ::testing::Exactly;
using ::testing::Invoke;
using ::testing::Return;
+using ::testing::UnorderedElementsAre;
namespace webrtc {
@@ -87,6 +94,10 @@ class MockMixerAudioSource : public ::testing::NiceMock<AudioMixer::Source> {
fake_audio_frame_info_ = audio_frame_info;
}
+ void set_packet_infos(const RtpPacketInfos& packet_infos) {
+ packet_infos_ = packet_infos;
+ }
+
private:
AudioFrameInfo FakeAudioFrameWithInfo(int sample_rate_hz,
AudioFrame* audio_frame) {
@@ -94,11 +105,13 @@ class MockMixerAudioSource : public ::testing::NiceMock<AudioMixer::Source> {
audio_frame->sample_rate_hz_ = sample_rate_hz;
audio_frame->samples_per_channel_ =
rtc::CheckedDivExact(sample_rate_hz, 100);
+ audio_frame->packet_infos_ = packet_infos_;
return fake_info();
}
AudioFrame fake_frame_;
AudioFrameInfo fake_audio_frame_info_;
+ RtpPacketInfos packet_infos_;
};
class CustomRateCalculator : public OutputRateCalculator {
@@ -160,7 +173,7 @@ void MixMonoAtGivenNativeRate(int native_sample_rate,
TEST(AudioMixer, LargestEnergyVadActiveMixed) {
constexpr int kAudioSources =
- AudioMixerImpl::kMaximumAmountOfMixedAudioSources + 3;
+ AudioMixerImpl::kDefaultNumberOfMixedAudioSources + 3;
const auto mixer = AudioMixerImpl::Create();
@@ -191,7 +204,7 @@ TEST(AudioMixer, LargestEnergyVadActiveMixed) {
mixer->GetAudioSourceMixabilityStatusForTest(&participants[i]);
if (i == kAudioSources - 1 ||
i < kAudioSources - 1 -
- AudioMixerImpl::kMaximumAmountOfMixedAudioSources) {
+ AudioMixerImpl::kDefaultNumberOfMixedAudioSources) {
EXPECT_FALSE(is_mixed)
<< "Mixing status of AudioSource #" << i << " wrong.";
} else {
@@ -322,7 +335,7 @@ TEST(AudioMixer, ParticipantNumberOfChannels) {
// another participant with higher energy is added.
TEST(AudioMixer, RampedOutSourcesShouldNotBeMarkedMixed) {
constexpr int kAudioSources =
- AudioMixerImpl::kMaximumAmountOfMixedAudioSources + 1;
+ AudioMixerImpl::kDefaultNumberOfMixedAudioSources + 1;
const auto mixer = AudioMixerImpl::Create();
MockMixerAudioSource participants[kAudioSources];
@@ -399,7 +412,7 @@ TEST(AudioMixer, ConstructFromOtherThread) {
TEST(AudioMixer, MutedShouldMixAfterUnmuted) {
constexpr int kAudioSources =
- AudioMixerImpl::kMaximumAmountOfMixedAudioSources + 1;
+ AudioMixerImpl::kDefaultNumberOfMixedAudioSources + 1;
std::vector<AudioFrame> frames(kAudioSources);
for (auto& frame : frames) {
@@ -417,7 +430,7 @@ TEST(AudioMixer, MutedShouldMixAfterUnmuted) {
TEST(AudioMixer, PassiveShouldMixAfterNormal) {
constexpr int kAudioSources =
- AudioMixerImpl::kMaximumAmountOfMixedAudioSources + 1;
+ AudioMixerImpl::kDefaultNumberOfMixedAudioSources + 1;
std::vector<AudioFrame> frames(kAudioSources);
for (auto& frame : frames) {
@@ -435,7 +448,7 @@ TEST(AudioMixer, PassiveShouldMixAfterNormal) {
TEST(AudioMixer, ActiveShouldMixBeforeLoud) {
constexpr int kAudioSources =
- AudioMixerImpl::kMaximumAmountOfMixedAudioSources + 1;
+ AudioMixerImpl::kDefaultNumberOfMixedAudioSources + 1;
std::vector<AudioFrame> frames(kAudioSources);
for (auto& frame : frames) {
@@ -454,9 +467,52 @@ TEST(AudioMixer, ActiveShouldMixBeforeLoud) {
MixAndCompare(frames, frame_info, expected_status);
}
+TEST(AudioMixer, ShouldMixUpToSpecifiedNumberOfSourcesToMix) {
+ constexpr int kAudioSources = 5;
+ constexpr int kSourcesToMix = 2;
+
+ std::vector<AudioFrame> frames(kAudioSources);
+ for (auto& frame : frames) {
+ ResetFrame(&frame);
+ }
+
+ std::vector<AudioMixer::Source::AudioFrameInfo> frame_info(
+ kAudioSources, AudioMixer::Source::AudioFrameInfo::kNormal);
+ // Set up to kSourceToMix sources with kVadActive so that they're mixed.
+ const std::vector<AudioFrame::VADActivity> kVadActivities = {
+ AudioFrame::kVadUnknown, AudioFrame::kVadPassive, AudioFrame::kVadPassive,
+ AudioFrame::kVadActive, AudioFrame::kVadActive};
+ // Populate VAD and frame for all sources.
+ for (int i = 0; i < kAudioSources; i++) {
+ frames[i].vad_activity_ = kVadActivities[i];
+ }
+
+ std::vector<MockMixerAudioSource> participants(kAudioSources);
+ for (int i = 0; i < kAudioSources; ++i) {
+ participants[i].fake_frame()->CopyFrom(frames[i]);
+ participants[i].set_fake_info(frame_info[i]);
+ }
+
+ const auto mixer = AudioMixerImpl::Create(kSourcesToMix);
+ for (int i = 0; i < kAudioSources; ++i) {
+ EXPECT_TRUE(mixer->AddSource(&participants[i]));
+ EXPECT_CALL(participants[i], GetAudioFrameWithInfo(kDefaultSampleRateHz, _))
+ .Times(Exactly(1));
+ }
+
+ mixer->Mix(1, &frame_for_mixing);
+
+ std::vector<bool> expected_status = {false, false, false, true, true};
+ for (int i = 0; i < kAudioSources; ++i) {
+ EXPECT_EQ(expected_status[i],
+ mixer->GetAudioSourceMixabilityStatusForTest(&participants[i]))
+ << "Wrong mix status for source #" << i << " is wrong";
+ }
+}
+
TEST(AudioMixer, UnmutedShouldMixBeforeLoud) {
constexpr int kAudioSources =
- AudioMixerImpl::kMaximumAmountOfMixedAudioSources + 1;
+ AudioMixerImpl::kDefaultNumberOfMixedAudioSources + 1;
std::vector<AudioFrame> frames(kAudioSources);
for (auto& frame : frames) {
@@ -595,6 +651,100 @@ TEST(AudioMixer, MultipleChannelsManyParticipants) {
}
}
+TEST(AudioMixer, ShouldIncludeRtpPacketInfoFromAllMixedSources) {
+ const uint32_t kSsrc0 = 10;
+ const uint32_t kSsrc1 = 11;
+ const uint32_t kSsrc2 = 12;
+ const uint32_t kCsrc0 = 20;
+ const uint32_t kCsrc1 = 21;
+ const uint32_t kCsrc2 = 22;
+ const uint32_t kCsrc3 = 23;
+ const int kAudioLevel0 = 10;
+ const int kAudioLevel1 = 40;
+ const absl::optional<uint32_t> kAudioLevel2 = absl::nullopt;
+ const uint32_t kRtpTimestamp0 = 300;
+ const uint32_t kRtpTimestamp1 = 400;
+ const Timestamp kReceiveTime0 = Timestamp::Millis(10);
+ const Timestamp kReceiveTime1 = Timestamp::Millis(20);
+
+ const RtpPacketInfo kPacketInfo0(kSsrc0, {kCsrc0, kCsrc1}, kRtpTimestamp0,
+ kAudioLevel0, absl::nullopt, kReceiveTime0);
+ const RtpPacketInfo kPacketInfo1(kSsrc1, {kCsrc2}, kRtpTimestamp1,
+ kAudioLevel1, absl::nullopt, kReceiveTime1);
+ const RtpPacketInfo kPacketInfo2(kSsrc2, {kCsrc3}, kRtpTimestamp1,
+ kAudioLevel2, absl::nullopt, kReceiveTime1);
+
+ const auto mixer = AudioMixerImpl::Create();
+
+ MockMixerAudioSource source;
+ source.set_packet_infos(RtpPacketInfos({kPacketInfo0}));
+ mixer->AddSource(&source);
+ ResetFrame(source.fake_frame());
+ mixer->Mix(1, &frame_for_mixing);
+
+ MockMixerAudioSource other_source;
+ other_source.set_packet_infos(RtpPacketInfos({kPacketInfo1, kPacketInfo2}));
+ ResetFrame(other_source.fake_frame());
+ mixer->AddSource(&other_source);
+
+ mixer->Mix(/*number_of_channels=*/1, &frame_for_mixing);
+
+ EXPECT_THAT(frame_for_mixing.packet_infos_,
+ UnorderedElementsAre(kPacketInfo0, kPacketInfo1, kPacketInfo2));
+}
+
+TEST(AudioMixer, MixerShouldIncludeRtpPacketInfoFromMixedSourcesOnly) {
+ const uint32_t kSsrc0 = 10;
+ const uint32_t kSsrc1 = 11;
+ const uint32_t kSsrc2 = 21;
+ const uint32_t kCsrc0 = 30;
+ const uint32_t kCsrc1 = 31;
+ const uint32_t kCsrc2 = 32;
+ const uint32_t kCsrc3 = 33;
+ const int kAudioLevel0 = 10;
+ const absl::optional<uint32_t> kAudioLevelMissing = absl::nullopt;
+ const uint32_t kRtpTimestamp0 = 300;
+ const uint32_t kRtpTimestamp1 = 400;
+ const Timestamp kReceiveTime0 = Timestamp::Millis(10);
+ const Timestamp kReceiveTime1 = Timestamp::Millis(20);
+
+ const RtpPacketInfo kPacketInfo0(kSsrc0, {kCsrc0, kCsrc1}, kRtpTimestamp0,
+ kAudioLevel0, absl::nullopt, kReceiveTime0);
+ const RtpPacketInfo kPacketInfo1(kSsrc1, {kCsrc2}, kRtpTimestamp1,
+ kAudioLevelMissing, absl::nullopt,
+ kReceiveTime1);
+ const RtpPacketInfo kPacketInfo2(kSsrc2, {kCsrc3}, kRtpTimestamp1,
+ kAudioLevelMissing, absl::nullopt,
+ kReceiveTime1);
+
+ const auto mixer = AudioMixerImpl::Create(/*max_sources_to_mix=*/2);
+
+ MockMixerAudioSource source1;
+ source1.set_packet_infos(RtpPacketInfos({kPacketInfo0}));
+ mixer->AddSource(&source1);
+ ResetFrame(source1.fake_frame());
+ mixer->Mix(1, &frame_for_mixing);
+
+ MockMixerAudioSource source2;
+ source2.set_packet_infos(RtpPacketInfos({kPacketInfo1}));
+ ResetFrame(source2.fake_frame());
+ mixer->AddSource(&source2);
+
+ // The mixer prioritizes kVadActive over kVadPassive.
+ // We limit the number of sources to mix to 2 and set the third source's VAD
+ // activity to kVadPassive so that it will not be added to the mix.
+ MockMixerAudioSource source3;
+ source3.set_packet_infos(RtpPacketInfos({kPacketInfo2}));
+ ResetFrame(source3.fake_frame());
+ source3.fake_frame()->vad_activity_ = AudioFrame::kVadPassive;
+ mixer->AddSource(&source3);
+
+ mixer->Mix(/*number_of_channels=*/1, &frame_for_mixing);
+
+ EXPECT_THAT(frame_for_mixing.packet_infos_,
+ UnorderedElementsAre(kPacketInfo0, kPacketInfo1));
+}
+
class HighOutputRateCalculator : public OutputRateCalculator {
public:
static const int kDefaultFrequency = 76000;
diff --git a/modules/audio_mixer/frame_combiner.cc b/modules/audio_mixer/frame_combiner.cc
index fb6f72af75..e31eea595f 100644
--- a/modules/audio_mixer/frame_combiner.cc
+++ b/modules/audio_mixer/frame_combiner.cc
@@ -16,8 +16,12 @@
#include <iterator>
#include <memory>
#include <string>
+#include <utility>
+#include <vector>
#include "api/array_view.h"
+#include "api/rtp_packet_info.h"
+#include "api/rtp_packet_infos.h"
#include "common_audio/include/audio_util.h"
#include "modules/audio_mixer/audio_frame_manipulator.h"
#include "modules/audio_mixer/audio_mixer_impl.h"
@@ -26,6 +30,7 @@
#include "modules/audio_processing/logging/apm_data_dumper.h"
#include "rtc_base/arraysize.h"
#include "rtc_base/checks.h"
+#include "rtc_base/numerics/safe_conversions.h"
#include "system_wrappers/include/metrics.h"
namespace webrtc {
@@ -53,11 +58,23 @@ void SetAudioFrameFields(rtc::ArrayView<const AudioFrame* const> mix_list,
if (mix_list.empty()) {
audio_frame_for_mixing->elapsed_time_ms_ = -1;
- } else if (mix_list.size() == 1) {
+ } else {
audio_frame_for_mixing->timestamp_ = mix_list[0]->timestamp_;
audio_frame_for_mixing->elapsed_time_ms_ = mix_list[0]->elapsed_time_ms_;
audio_frame_for_mixing->ntp_time_ms_ = mix_list[0]->ntp_time_ms_;
- audio_frame_for_mixing->packet_infos_ = mix_list[0]->packet_infos_;
+ std::vector<RtpPacketInfo> packet_infos;
+ for (const auto& frame : mix_list) {
+ audio_frame_for_mixing->timestamp_ =
+ std::min(audio_frame_for_mixing->timestamp_, frame->timestamp_);
+ audio_frame_for_mixing->ntp_time_ms_ =
+ std::min(audio_frame_for_mixing->ntp_time_ms_, frame->ntp_time_ms_);
+ audio_frame_for_mixing->elapsed_time_ms_ = std::max(
+ audio_frame_for_mixing->elapsed_time_ms_, frame->elapsed_time_ms_);
+ packet_infos.insert(packet_infos.end(), frame->packet_infos_.begin(),
+ frame->packet_infos_.end());
+ }
+ audio_frame_for_mixing->packet_infos_ =
+ RtpPacketInfos(std::move(packet_infos));
}
}
@@ -207,10 +224,10 @@ void FrameCombiner::LogMixingStats(
uma_logging_counter_ = 0;
RTC_HISTOGRAM_COUNTS_100("WebRTC.Audio.AudioMixer.NumIncomingStreams",
static_cast<int>(number_of_streams));
- RTC_HISTOGRAM_ENUMERATION(
- "WebRTC.Audio.AudioMixer.NumIncomingActiveStreams",
- static_cast<int>(mix_list.size()),
- AudioMixerImpl::kMaximumAmountOfMixedAudioSources);
+ RTC_HISTOGRAM_COUNTS_LINEAR(
+ "WebRTC.Audio.AudioMixer.NumIncomingActiveStreams2",
+ rtc::dchecked_cast<int>(mix_list.size()), /*min=*/1, /*max=*/16,
+ /*bucket_count=*/16);
using NativeRate = AudioProcessing::NativeRate;
static constexpr NativeRate native_rates[] = {
diff --git a/modules/audio_mixer/frame_combiner_unittest.cc b/modules/audio_mixer/frame_combiner_unittest.cc
index 4b189a052e..fa1fef325c 100644
--- a/modules/audio_mixer/frame_combiner_unittest.cc
+++ b/modules/audio_mixer/frame_combiner_unittest.cc
@@ -15,8 +15,12 @@
#include <numeric>
#include <string>
#include <type_traits>
+#include <vector>
+#include "absl/types/optional.h"
#include "api/array_view.h"
+#include "api/rtp_packet_info.h"
+#include "api/rtp_packet_infos.h"
#include "audio/utility/audio_frame_operations.h"
#include "modules/audio_mixer/gain_change_calculator.h"
#include "modules/audio_mixer/sine_wave_generator.h"
@@ -28,7 +32,13 @@
namespace webrtc {
namespace {
+
+using ::testing::ElementsAreArray;
+using ::testing::IsEmpty;
+using ::testing::UnorderedElementsAreArray;
+
using LimiterType = FrameCombiner::LimiterType;
+
struct FrameCombinerConfig {
bool use_limiter;
int sample_rate_hz;
@@ -57,9 +67,24 @@ std::string ProduceDebugText(const FrameCombinerConfig& config) {
AudioFrame frame1;
AudioFrame frame2;
-AudioFrame audio_frame_for_mixing;
void SetUpFrames(int sample_rate_hz, int number_of_channels) {
+ RtpPacketInfo packet_info1(
+ /*ssrc=*/1001, /*csrcs=*/{}, /*rtp_timestamp=*/1000,
+ /*audio_level=*/absl::nullopt, /*absolute_capture_time=*/absl::nullopt,
+ /*receive_time_ms=*/1);
+ RtpPacketInfo packet_info2(
+ /*ssrc=*/4004, /*csrcs=*/{}, /*rtp_timestamp=*/1234,
+ /*audio_level=*/absl::nullopt, /*absolute_capture_time=*/absl::nullopt,
+ /*receive_time_ms=*/2);
+ RtpPacketInfo packet_info3(
+ /*ssrc=*/7007, /*csrcs=*/{}, /*rtp_timestamp=*/1333,
+ /*audio_level=*/absl::nullopt, /*absolute_capture_time=*/absl::nullopt,
+ /*receive_time_ms=*/2);
+
+ frame1.packet_infos_ = RtpPacketInfos({packet_info1});
+ frame2.packet_infos_ = RtpPacketInfos({packet_info2, packet_info3});
+
for (auto* frame : {&frame1, &frame2}) {
frame->UpdateFrame(0, nullptr, rtc::CheckedDivExact(sample_rate_hz, 100),
sample_rate_hz, AudioFrame::kNormalSpeech,
@@ -81,6 +106,7 @@ TEST(FrameCombiner, BasicApiCallsLimiter) {
ProduceDebugText(rate, number_of_channels, number_of_frames));
const std::vector<AudioFrame*> frames_to_combine(
all_frames.begin(), all_frames.begin() + number_of_frames);
+ AudioFrame audio_frame_for_mixing;
combiner.Combine(frames_to_combine, number_of_channels, rate,
frames_to_combine.size(), &audio_frame_for_mixing);
}
@@ -88,6 +114,35 @@ TEST(FrameCombiner, BasicApiCallsLimiter) {
}
}
+// The RtpPacketInfos field of the mixed packet should contain the union of the
+// RtpPacketInfos from the frames that were actually mixed.
+TEST(FrameCombiner, ContainsAllRtpPacketInfos) {
+ static constexpr int kSampleRateHz = 48000;
+ static constexpr int kNumChannels = 1;
+ FrameCombiner combiner(true);
+ const std::vector<AudioFrame*> all_frames = {&frame1, &frame2};
+ SetUpFrames(kSampleRateHz, kNumChannels);
+
+ for (const int number_of_frames : {0, 1, 2}) {
+ SCOPED_TRACE(
+ ProduceDebugText(kSampleRateHz, kNumChannels, number_of_frames));
+ const std::vector<AudioFrame*> frames_to_combine(
+ all_frames.begin(), all_frames.begin() + number_of_frames);
+
+ std::vector<RtpPacketInfo> packet_infos;
+ for (const auto& frame : frames_to_combine) {
+ packet_infos.insert(packet_infos.end(), frame->packet_infos_.begin(),
+ frame->packet_infos_.end());
+ }
+
+ AudioFrame audio_frame_for_mixing;
+ combiner.Combine(frames_to_combine, kNumChannels, kSampleRateHz,
+ frames_to_combine.size(), &audio_frame_for_mixing);
+ EXPECT_THAT(audio_frame_for_mixing.packet_infos_,
+ UnorderedElementsAreArray(packet_infos));
+ }
+}
+
// There are DCHECKs in place to check for invalid parameters.
TEST(FrameCombinerDeathTest, DebugBuildCrashesWithManyChannels) {
FrameCombiner combiner(true);
@@ -105,6 +160,7 @@ TEST(FrameCombinerDeathTest, DebugBuildCrashesWithManyChannels) {
ProduceDebugText(rate, number_of_channels, number_of_frames));
const std::vector<AudioFrame*> frames_to_combine(
all_frames.begin(), all_frames.begin() + number_of_frames);
+ AudioFrame audio_frame_for_mixing;
#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID)
EXPECT_DEATH(
combiner.Combine(frames_to_combine, number_of_channels, rate,
@@ -134,6 +190,7 @@ TEST(FrameCombinerDeathTest, DebugBuildCrashesWithHighRate) {
ProduceDebugText(rate, number_of_channels, number_of_frames));
const std::vector<AudioFrame*> frames_to_combine(
all_frames.begin(), all_frames.begin() + number_of_frames);
+ AudioFrame audio_frame_for_mixing;
#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID)
EXPECT_DEATH(
combiner.Combine(frames_to_combine, number_of_channels, rate,
@@ -161,6 +218,7 @@ TEST(FrameCombiner, BasicApiCallsNoLimiter) {
ProduceDebugText(rate, number_of_channels, number_of_frames));
const std::vector<AudioFrame*> frames_to_combine(
all_frames.begin(), all_frames.begin() + number_of_frames);
+ AudioFrame audio_frame_for_mixing;
combiner.Combine(frames_to_combine, number_of_channels, rate,
frames_to_combine.size(), &audio_frame_for_mixing);
}
@@ -174,10 +232,11 @@ TEST(FrameCombiner, CombiningZeroFramesShouldProduceSilence) {
for (const int number_of_channels : {1, 2}) {
SCOPED_TRACE(ProduceDebugText(rate, number_of_channels, 0));
+ AudioFrame audio_frame_for_mixing;
+
const std::vector<AudioFrame*> frames_to_combine;
combiner.Combine(frames_to_combine, number_of_channels, rate,
frames_to_combine.size(), &audio_frame_for_mixing);
-
const int16_t* audio_frame_for_mixing_data =
audio_frame_for_mixing.data();
const std::vector<int16_t> mixed_data(
@@ -186,6 +245,7 @@ TEST(FrameCombiner, CombiningZeroFramesShouldProduceSilence) {
const std::vector<int16_t> expected(number_of_channels * rate / 100, 0);
EXPECT_EQ(mixed_data, expected);
+ EXPECT_THAT(audio_frame_for_mixing.packet_infos_, IsEmpty());
}
}
}
@@ -196,6 +256,8 @@ TEST(FrameCombiner, CombiningOneFrameShouldNotChangeFrame) {
for (const int number_of_channels : {1, 2, 4, 8, 10}) {
SCOPED_TRACE(ProduceDebugText(rate, number_of_channels, 1));
+ AudioFrame audio_frame_for_mixing;
+
SetUpFrames(rate, number_of_channels);
int16_t* frame1_data = frame1.mutable_data();
std::iota(frame1_data, frame1_data + number_of_channels * rate / 100, 0);
@@ -212,6 +274,8 @@ TEST(FrameCombiner, CombiningOneFrameShouldNotChangeFrame) {
std::vector<int16_t> expected(number_of_channels * rate / 100);
std::iota(expected.begin(), expected.end(), 0);
EXPECT_EQ(mixed_data, expected);
+ EXPECT_THAT(audio_frame_for_mixing.packet_infos_,
+ ElementsAreArray(frame1.packet_infos_));
}
}
}
@@ -255,6 +319,7 @@ TEST(FrameCombiner, GainCurveIsSmoothForAlternatingNumberOfStreams) {
// Ensures limiter is on if 'use_limiter'.
constexpr size_t number_of_streams = 2;
+ AudioFrame audio_frame_for_mixing;
combiner.Combine(frames_to_combine, config.number_of_channels,
config.sample_rate_hz, number_of_streams,
&audio_frame_for_mixing);
diff --git a/modules/audio_mixer/g3doc/index.md b/modules/audio_mixer/g3doc/index.md
new file mode 100644
index 0000000000..285530e95a
--- /dev/null
+++ b/modules/audio_mixer/g3doc/index.md
@@ -0,0 +1,54 @@
+<?% config.freshness.owner = 'alessiob' %?>
+<?% config.freshness.reviewed = '2021-04-21' %?>
+
+# The WebRTC Audio Mixer Module
+
+The WebRTC audio mixer module is responsible for mixing multiple incoming audio
+streams (sources) into a single audio stream (mix). It works with 10 ms frames,
+it supports sample rates up to 48 kHz and up to 8 audio channels. The API is
+defined in
+[`api/audio/audio_mixer.h`](https://source.chromium.org/chromium/chromium/src/+/master:third_party/webrtc/api/audio/audio_mixer.h)
+and it includes the definition of
+[`AudioMixer::Source`](https://source.chromium.org/search?q=symbol:AudioMixer::Source%20file:third_party%2Fwebrtc%2Fapi%2Faudio%2Faudio_mixer.h),
+which describes an incoming audio stream, and the definition of
+[`AudioMixer`](https://source.chromium.org/search?q=symbol:AudioMixer%20file:third_party%2Fwebrtc%2Fapi%2Faudio%2Faudio_mixer.h),
+which operates on a collection of
+[`AudioMixer::Source`](https://source.chromium.org/search?q=symbol:AudioMixer::Source%20file:third_party%2Fwebrtc%2Fapi%2Faudio%2Faudio_mixer.h)
+objects to produce a mix.
+
+## AudioMixer::Source
+
+A source has different characteristic (e.g., sample rate, number of channels,
+muted state) and it is identified by an SSRC[^1].
+[`AudioMixer::Source::GetAudioFrameWithInfo()`](https://source.chromium.org/search?q=symbol:AudioMixer::Source::GetAudioFrameWithInfo%20file:third_party%2Fwebrtc%2Fapi%2Faudio%2Faudio_mixer.h)
+is used to retrieve the next 10 ms chunk of audio to be mixed.
+
+[^1]: A synchronization source (SSRC) is the source of a stream of RTP packets,
+ identified by a 32-bit numeric SSRC identifier carried in the RTP header
+ so as not to be dependent upon the network address (see
+ [RFC 3550](https://tools.ietf.org/html/rfc3550#section-3)).
+
+## AudioMixer
+
+The interface allows to add and remove sources and the
+[`AudioMixer::Mix()`](https://source.chromium.org/search?q=symbol:AudioMixer::Mix%20file:third_party%2Fwebrtc%2Fapi%2Faudio%2Faudio_mixer.h)
+method allows to generates a mix with the desired number of channels.
+
+## WebRTC implementation
+
+The interface is implemented in different parts of WebRTC:
+
+* [`AudioMixer::Source`](https://source.chromium.org/search?q=symbol:AudioMixer::Source%20file:third_party%2Fwebrtc%2Fapi%2Faudio%2Faudio_mixer.h):
+ [`audio/audio_receive_stream.h`](https://source.chromium.org/chromium/chromium/src/+/master:third_party/webrtc/audio/audio_receive_stream.h)
+* [`AudioMixer`](https://source.chromium.org/search?q=symbol:AudioMixer%20file:third_party%2Fwebrtc%2Fapi%2Faudio%2Faudio_mixer.h):
+ [`modules/audio_mixer/audio_mixer_impl.h`](https://source.chromium.org/chromium/chromium/src/+/master:third_party/webrtc/modules/audio_mixer/audio_mixer_impl.h)
+
+[`AudioMixer`](https://source.chromium.org/search?q=symbol:AudioMixer%20file:third_party%2Fwebrtc%2Fapi%2Faudio%2Faudio_mixer.h)
+is thread-safe. The output sample rate of the generated mix is automatically
+assigned depending on the sample rate of the sources; whereas the number of
+output channels is defined by the caller[^2]. Samples from the non-muted sources
+are summed up and then a limiter is used to apply soft-clipping when needed.
+
+[^2]: [`audio/utility/channel_mixer.h`](https://source.chromium.org/chromium/chromium/src/+/master:third_party/webrtc/audio/utility/channel_mixer.h)
+ is used to mix channels in the non-trivial cases - i.e., if the number of
+ channels for a source or the mix is greater than 3.