diff options
author | Michael Graczyk <mgraczyk@chromium.org> | 2015-07-22 21:06:11 -0700 |
---|---|---|
committer | Michael Graczyk <mgraczyk@chromium.org> | 2015-07-23 04:06:16 +0000 |
commit | c204754b7a0cc801c70e8ce6c689f57f6ce00b3b (patch) | |
tree | a49e7be1c50c59a40daff46563db8a5e2b01b3d5 | |
parent | 0b6a204b21b1360950593dcc80dc49682d637109 (diff) | |
download | webrtc-c204754b7a0cc801c70e8ce6c689f57f6ce00b3b.tar.gz |
Allow more than 2 input channels in AudioProcessing.
The number of output channels is constrained to be equal to either 1 or the
number of input channels.
R=aluebs@webrtc.org, andrew@webrtc.org, pbos@webrtc.org
Review URL: https://codereview.webrtc.org/1226093007 .
Cr-Commit-Position: refs/heads/master@{#9619}
-rw-r--r-- | talk/media/webrtc/fakewebrtcvoiceengine.h | 10 | ||||
-rw-r--r-- | webrtc/common_audio/audio_util.cc | 9 | ||||
-rw-r--r-- | webrtc/common_audio/audio_util_unittest.cc | 148 | ||||
-rw-r--r-- | webrtc/common_audio/include/audio_util.h | 79 | ||||
-rw-r--r-- | webrtc/common_audio/wav_file.cc | 5 | ||||
-rw-r--r-- | webrtc/modules/audio_processing/audio_buffer.cc | 88 | ||||
-rw-r--r-- | webrtc/modules/audio_processing/audio_buffer.h | 8 | ||||
-rw-r--r-- | webrtc/modules/audio_processing/audio_processing_impl.cc | 393 | ||||
-rw-r--r-- | webrtc/modules/audio_processing/audio_processing_impl.h | 73 | ||||
-rw-r--r-- | webrtc/modules/audio_processing/include/audio_processing.h | 140 | ||||
-rw-r--r-- | webrtc/modules/audio_processing/include/mock_audio_processing.h | 9 | ||||
-rw-r--r-- | webrtc/modules/audio_processing/test/audio_processing_unittest.cc | 100 | ||||
-rw-r--r-- | webrtc/modules/audio_processing/test/audioproc_float.cc | 17 |
13 files changed, 708 insertions, 371 deletions
diff --git a/talk/media/webrtc/fakewebrtcvoiceengine.h b/talk/media/webrtc/fakewebrtcvoiceengine.h index 419170b24d..50cdd144ee 100644 --- a/talk/media/webrtc/fakewebrtcvoiceengine.h +++ b/talk/media/webrtc/fakewebrtcvoiceengine.h @@ -112,6 +112,8 @@ class FakeAudioProcessing : public webrtc::AudioProcessing { webrtc::AudioProcessing::ChannelLayout input_layout, webrtc::AudioProcessing::ChannelLayout output_layout, webrtc::AudioProcessing::ChannelLayout reverse_layout)); + WEBRTC_STUB(Initialize, ( + const webrtc::ProcessingConfig& processing_config)); WEBRTC_VOID_FUNC(SetExtraOptions, (const webrtc::Config& config)) { experimental_ns_enabled_ = config.Get<webrtc::ExperimentalNs>().enabled; @@ -136,12 +138,20 @@ class FakeAudioProcessing : public webrtc::AudioProcessing { int output_sample_rate_hz, webrtc::AudioProcessing::ChannelLayout output_layout, float* const* dest)); + WEBRTC_STUB(ProcessStream, + (const float* const* src, + const webrtc::StreamConfig& input_config, + const webrtc::StreamConfig& output_config, + float* const* dest)); WEBRTC_STUB(AnalyzeReverseStream, (webrtc::AudioFrame* frame)); WEBRTC_STUB(AnalyzeReverseStream, ( const float* const* data, int samples_per_channel, int sample_rate_hz, webrtc::AudioProcessing::ChannelLayout layout)); + WEBRTC_STUB(AnalyzeReverseStream, ( + const float* const* data, + const webrtc::StreamConfig& reverse_config)); WEBRTC_STUB(set_stream_delay_ms, (int delay)); WEBRTC_STUB_CONST(stream_delay_ms, ()); WEBRTC_BOOL_STUB_CONST(was_stream_delay_set, ()); diff --git a/webrtc/common_audio/audio_util.cc b/webrtc/common_audio/audio_util.cc index 2047295cb9..be67c48f60 100644 --- a/webrtc/common_audio/audio_util.cc +++ b/webrtc/common_audio/audio_util.cc @@ -39,4 +39,13 @@ void FloatS16ToFloat(const float* src, size_t size, float* dest) { dest[i] = FloatS16ToFloat(src[i]); } +template <> +void DownmixInterleavedToMono<int16_t>(const int16_t* interleaved, + int num_multichannel_frames, + int num_channels, + int16_t* deinterleaved) { + DownmixInterleavedToMonoImpl<int16_t, int32_t>( + interleaved, num_multichannel_frames, num_channels, deinterleaved); +} + } // namespace webrtc diff --git a/webrtc/common_audio/audio_util_unittest.cc b/webrtc/common_audio/audio_util_unittest.cc index 2cdf53813c..3ac3911445 100644 --- a/webrtc/common_audio/audio_util_unittest.cc +++ b/webrtc/common_audio/audio_util_unittest.cc @@ -8,11 +8,15 @@ * be found in the AUTHORS file in the root of the source tree. */ +#include "testing/gmock/include/gmock/gmock.h" #include "testing/gtest/include/gtest/gtest.h" #include "webrtc/common_audio/include/audio_util.h" #include "webrtc/typedefs.h" namespace webrtc { +namespace { + +using ::testing::ElementsAreArray; void ExpectArraysEq(const int16_t* ref, const int16_t* test, int length) { for (int i = 0; i < length; ++i) { @@ -28,11 +32,17 @@ void ExpectArraysEq(const float* ref, const float* test, int length) { TEST(AudioUtilTest, FloatToS16) { const int kSize = 9; - const float kInput[kSize] = { - 0.f, 0.4f / 32767.f, 0.6f / 32767.f, -0.4f / 32768.f, -0.6f / 32768.f, - 1.f, -1.f, 1.1f, -1.1f}; - const int16_t kReference[kSize] = { - 0, 0, 1, 0, -1, 32767, -32768, 32767, -32768}; + const float kInput[kSize] = {0.f, + 0.4f / 32767.f, + 0.6f / 32767.f, + -0.4f / 32768.f, + -0.6f / 32768.f, + 1.f, + -1.f, + 1.1f, + -1.1f}; + const int16_t kReference[kSize] = {0, 0, 1, 0, -1, + 32767, -32768, 32767, -32768}; int16_t output[kSize]; FloatToS16(kInput, kSize, output); ExpectArraysEq(kReference, output, kSize); @@ -50,8 +60,8 @@ TEST(AudioUtilTest, S16ToFloat) { TEST(AudioUtilTest, FloatS16ToS16) { const int kSize = 7; - const float kInput[kSize] = { - 0.f, 0.4f, 0.5f, -0.4f, -0.5f, 32768.f, -32769.f}; + const float kInput[kSize] = {0.f, 0.4f, 0.5f, -0.4f, + -0.5f, 32768.f, -32769.f}; const int16_t kReference[kSize] = {0, 0, 1, 0, -1, 32767, -32768}; int16_t output[kSize]; FloatS16ToS16(kInput, kSize, output); @@ -60,11 +70,17 @@ TEST(AudioUtilTest, FloatS16ToS16) { TEST(AudioUtilTest, FloatToFloatS16) { const int kSize = 9; - const float kInput[kSize] = { - 0.f, 0.4f / 32767.f, 0.6f / 32767.f, -0.4f / 32768.f, -0.6f / 32768.f, - 1.f, -1.f, 1.1f, -1.1f}; - const float kReference[kSize] = { - 0.f, 0.4f, 0.6f, -0.4f, -0.6f, 32767.f, -32768.f, 36043.7f, -36044.8f}; + const float kInput[kSize] = {0.f, + 0.4f / 32767.f, + 0.6f / 32767.f, + -0.4f / 32768.f, + -0.6f / 32768.f, + 1.f, + -1.f, + 1.1f, + -1.1f}; + const float kReference[kSize] = {0.f, 0.4f, 0.6f, -0.4f, -0.6f, + 32767.f, -32768.f, 36043.7f, -36044.8f}; float output[kSize]; FloatToFloatS16(kInput, kSize, output); ExpectArraysEq(kReference, output, kSize); @@ -72,11 +88,17 @@ TEST(AudioUtilTest, FloatToFloatS16) { TEST(AudioUtilTest, FloatS16ToFloat) { const int kSize = 9; - const float kInput[kSize] = { - 0.f, 0.4f, 0.6f, -0.4f, -0.6f, 32767.f, -32768.f, 36043.7f, -36044.8f}; - const float kReference[kSize] = { - 0.f, 0.4f / 32767.f, 0.6f / 32767.f, -0.4f / 32768.f, -0.6f / 32768.f, - 1.f, -1.f, 1.1f, -1.1f}; + const float kInput[kSize] = {0.f, 0.4f, 0.6f, -0.4f, -0.6f, + 32767.f, -32768.f, 36043.7f, -36044.8f}; + const float kReference[kSize] = {0.f, + 0.4f / 32767.f, + 0.6f / 32767.f, + -0.4f / 32768.f, + -0.6f / 32768.f, + 1.f, + -1.f, + 1.1f, + -1.1f}; float output[kSize]; FloatS16ToFloat(kInput, kSize, output); ExpectArraysEq(kReference, output, kSize); @@ -114,4 +136,96 @@ TEST(AudioUtilTest, InterleavingMonoIsIdentical) { ExpectArraysEq(mono, interleaved, kSamplesPerChannel); } +TEST(AudioUtilTest, DownmixInterleavedToMono) { + { + const int kNumFrames = 4; + const int kNumChannels = 1; + const int16_t interleaved[kNumChannels * kNumFrames] = {1, 2, -1, -3}; + int16_t deinterleaved[kNumFrames]; + + DownmixInterleavedToMono(interleaved, kNumFrames, kNumChannels, + deinterleaved); + + EXPECT_THAT(deinterleaved, ElementsAreArray(interleaved)); + } + { + const int kNumFrames = 2; + const int kNumChannels = 2; + const int16_t interleaved[kNumChannels * kNumFrames] = {10, 20, -10, -30}; + int16_t deinterleaved[kNumFrames]; + + DownmixInterleavedToMono(interleaved, kNumFrames, kNumChannels, + deinterleaved); + const int16_t expected[kNumFrames] = {15, -20}; + + EXPECT_THAT(deinterleaved, ElementsAreArray(expected)); + } + { + const int kNumFrames = 3; + const int kNumChannels = 3; + const int16_t interleaved[kNumChannels * kNumFrames] = { + 30000, 30000, 24001, -5, -10, -20, -30000, -30999, -30000}; + int16_t deinterleaved[kNumFrames]; + + DownmixInterleavedToMono(interleaved, kNumFrames, kNumChannels, + deinterleaved); + const int16_t expected[kNumFrames] = {28000, -11, -30333}; + + EXPECT_THAT(deinterleaved, ElementsAreArray(expected)); + } +} + +TEST(AudioUtilTest, DownmixToMonoTest) { + { + const int kNumFrames = 4; + const int kNumChannels = 1; + const float input_data[kNumChannels][kNumFrames] = {{1.f, 2.f, -1.f, -3.f}}; + const float* input[kNumChannels]; + for (int i = 0; i < kNumChannels; ++i) { + input[i] = input_data[i]; + } + + float downmixed[kNumFrames]; + + DownmixToMono<float, float>(input, kNumFrames, kNumChannels, downmixed); + + EXPECT_THAT(downmixed, ElementsAreArray(input_data[0])); + } + { + const int kNumFrames = 3; + const int kNumChannels = 2; + const float input_data[kNumChannels][kNumFrames] = {{1.f, 2.f, -1.f}, + {3.f, 0.f, 1.f}}; + const float* input[kNumChannels]; + for (int i = 0; i < kNumChannels; ++i) { + input[i] = input_data[i]; + } + + float downmixed[kNumFrames]; + const float expected[kNumFrames] = {2.f, 1.f, 0.f}; + + DownmixToMono<float, float>(input, kNumFrames, kNumChannels, downmixed); + + EXPECT_THAT(downmixed, ElementsAreArray(expected)); + } + { + const int kNumFrames = 3; + const int kNumChannels = 3; + const int16_t input_data[kNumChannels][kNumFrames] = { + {30000, -5, -30000}, {30000, -10, -30999}, {24001, -20, -30000}}; + const int16_t* input[kNumChannels]; + for (int i = 0; i < kNumChannels; ++i) { + input[i] = input_data[i]; + } + + int16_t downmixed[kNumFrames]; + const int16_t expected[kNumFrames] = {28000, -11, -30333}; + + DownmixToMono<int16_t, int32_t>(input, kNumFrames, kNumChannels, downmixed); + + EXPECT_THAT(downmixed, ElementsAreArray(expected)); + } +} + +} // namespace } // namespace webrtc diff --git a/webrtc/common_audio/include/audio_util.h b/webrtc/common_audio/include/audio_util.h index 8262649145..b217c683fd 100644 --- a/webrtc/common_audio/include/audio_util.h +++ b/webrtc/common_audio/include/audio_util.h @@ -12,7 +12,9 @@ #define WEBRTC_COMMON_AUDIO_INCLUDE_AUDIO_UTIL_H_ #include <limits> +#include <cstring> +#include "webrtc/base/checks.h" #include "webrtc/base/scoped_ptr.h" #include "webrtc/typedefs.h" @@ -26,10 +28,10 @@ typedef std::numeric_limits<int16_t> limits_int16; // FloatS16: float [-32768.0, 32767.0] static inline int16_t FloatToS16(float v) { if (v > 0) - return v >= 1 ? limits_int16::max() : - static_cast<int16_t>(v * limits_int16::max() + 0.5f); - return v <= -1 ? limits_int16::min() : - static_cast<int16_t>(-v * limits_int16::min() - 0.5f); + return v >= 1 ? limits_int16::max() + : static_cast<int16_t>(v * limits_int16::max() + 0.5f); + return v <= -1 ? limits_int16::min() + : static_cast<int16_t>(-v * limits_int16::min() - 0.5f); } static inline float S16ToFloat(int16_t v) { @@ -42,10 +44,9 @@ static inline int16_t FloatS16ToS16(float v) { static const float kMaxRound = limits_int16::max() - 0.5f; static const float kMinRound = limits_int16::min() + 0.5f; if (v > 0) - return v >= kMaxRound ? limits_int16::max() : - static_cast<int16_t>(v + 0.5f); - return v <= kMinRound ? limits_int16::min() : - static_cast<int16_t>(v - 0.5f); + return v >= kMaxRound ? limits_int16::max() + : static_cast<int16_t>(v + 0.5f); + return v <= kMinRound ? limits_int16::min() : static_cast<int16_t>(v - 0.5f); } static inline float FloatToFloatS16(float v) { @@ -69,8 +70,10 @@ void FloatS16ToFloat(const float* src, size_t size, float* dest); // |deinterleaved| buffers (|num_channel| buffers with |samples_per_channel| // per buffer). template <typename T> -void Deinterleave(const T* interleaved, int samples_per_channel, - int num_channels, T* const* deinterleaved) { +void Deinterleave(const T* interleaved, + int samples_per_channel, + int num_channels, + T* const* deinterleaved) { for (int i = 0; i < num_channels; ++i) { T* channel = deinterleaved[i]; int interleaved_idx = i; @@ -85,8 +88,10 @@ void Deinterleave(const T* interleaved, int samples_per_channel, // |interleaved|. There must be sufficient space allocated in |interleaved| // (|samples_per_channel| * |num_channels|). template <typename T> -void Interleave(const T* const* deinterleaved, int samples_per_channel, - int num_channels, T* interleaved) { +void Interleave(const T* const* deinterleaved, + int samples_per_channel, + int num_channels, + T* interleaved) { for (int i = 0; i < num_channels; ++i) { const T* channel = deinterleaved[i]; int interleaved_idx = i; @@ -97,6 +102,56 @@ void Interleave(const T* const* deinterleaved, int samples_per_channel, } } +template <typename T, typename Intermediate> +void DownmixToMono(const T* const* input_channels, + int num_frames, + int num_channels, + T* out) { + for (int i = 0; i < num_frames; ++i) { + Intermediate value = input_channels[0][i]; + for (int j = 1; j < num_channels; ++j) { + value += input_channels[j][i]; + } + out[i] = value / num_channels; + } +} + +// Downmixes an interleaved multichannel signal to a single channel by averaging +// all channels. +template <typename T, typename Intermediate> +void DownmixInterleavedToMonoImpl(const T* interleaved, + int num_frames, + int num_channels, + T* deinterleaved) { + DCHECK_GT(num_channels, 0); + DCHECK_GT(num_frames, 0); + + const T* const end = interleaved + num_frames * num_channels; + + while (interleaved < end) { + const T* const frame_end = interleaved + num_channels; + + Intermediate value = *interleaved++; + while (interleaved < frame_end) { + value += *interleaved++; + } + + *deinterleaved++ = value / num_channels; + } +} + +template <typename T> +void DownmixInterleavedToMono(const T* interleaved, + int num_frames, + int num_channels, + T* deinterleaved); + +template <> +void DownmixInterleavedToMono<int16_t>(const int16_t* interleaved, + int num_frames, + int num_channels, + int16_t* deinterleaved); + } // namespace webrtc #endif // WEBRTC_COMMON_AUDIO_INCLUDE_AUDIO_UTIL_H_ diff --git a/webrtc/common_audio/wav_file.cc b/webrtc/common_audio/wav_file.cc index 995043461a..a0c792c54a 100644 --- a/webrtc/common_audio/wav_file.cc +++ b/webrtc/common_audio/wav_file.cc @@ -123,11 +123,6 @@ void WavWriter::WriteSamples(const int16_t* samples, size_t num_samples) { num_samples_ += static_cast<uint32_t>(written); CHECK(written <= std::numeric_limits<uint32_t>::max() || num_samples_ >= written); // detect uint32_t overflow - CHECK(CheckWavParameters(num_channels_, - sample_rate_, - kWavFormat, - kBytesPerSample, - num_samples_)); } void WavWriter::WriteSamples(const float* samples, size_t num_samples) { diff --git a/webrtc/modules/audio_processing/audio_buffer.cc b/webrtc/modules/audio_processing/audio_buffer.cc index 04dcaea799..9073ad7569 100644 --- a/webrtc/modules/audio_processing/audio_buffer.cc +++ b/webrtc/modules/audio_processing/audio_buffer.cc @@ -23,39 +23,13 @@ const int kSamplesPer16kHzChannel = 160; const int kSamplesPer32kHzChannel = 320; const int kSamplesPer48kHzChannel = 480; -bool HasKeyboardChannel(AudioProcessing::ChannelLayout layout) { - switch (layout) { - case AudioProcessing::kMono: - case AudioProcessing::kStereo: - return false; - case AudioProcessing::kMonoAndKeyboard: - case AudioProcessing::kStereoAndKeyboard: - return true; +int KeyboardChannelIndex(const StreamConfig& stream_config) { + if (!stream_config.has_keyboard()) { + assert(false); + return -1; } - assert(false); - return false; -} - -int KeyboardChannelIndex(AudioProcessing::ChannelLayout layout) { - switch (layout) { - case AudioProcessing::kMono: - case AudioProcessing::kStereo: - assert(false); - return -1; - case AudioProcessing::kMonoAndKeyboard: - return 1; - case AudioProcessing::kStereoAndKeyboard: - return 2; - } - assert(false); - return -1; -} -template <typename T> -void StereoToMono(const T* left, const T* right, T* out, - int num_frames) { - for (int i = 0; i < num_frames; ++i) - out[i] = (left[i] + right[i]) / 2; + return stream_config.num_channels(); } int NumBandsFromSamplesPerChannel(int num_frames) { @@ -91,7 +65,7 @@ AudioBuffer::AudioBuffer(int input_num_frames, assert(input_num_frames_ > 0); assert(proc_num_frames_ > 0); assert(output_num_frames_ > 0); - assert(num_input_channels_ > 0 && num_input_channels_ <= 2); + assert(num_input_channels_ > 0); assert(num_proc_channels_ > 0 && num_proc_channels_ <= num_input_channels_); if (input_num_frames_ != proc_num_frames_ || @@ -130,29 +104,28 @@ AudioBuffer::AudioBuffer(int input_num_frames, AudioBuffer::~AudioBuffer() {} void AudioBuffer::CopyFrom(const float* const* data, - int num_frames, - AudioProcessing::ChannelLayout layout) { - assert(num_frames == input_num_frames_); - assert(ChannelsFromLayout(layout) == num_input_channels_); + const StreamConfig& stream_config) { + assert(stream_config.num_frames() == input_num_frames_); + assert(stream_config.num_channels() == num_input_channels_); InitForNewData(); // Initialized lazily because there's a different condition in // DeinterleaveFrom. - if ((num_input_channels_ == 2 && num_proc_channels_ == 1) && !input_buffer_) { + const bool need_to_downmix = + num_input_channels_ > 1 && num_proc_channels_ == 1; + if (need_to_downmix && !input_buffer_) { input_buffer_.reset( new IFChannelBuffer(input_num_frames_, num_proc_channels_)); } - if (HasKeyboardChannel(layout)) { - keyboard_data_ = data[KeyboardChannelIndex(layout)]; + if (stream_config.has_keyboard()) { + keyboard_data_ = data[KeyboardChannelIndex(stream_config)]; } // Downmix. const float* const* data_ptr = data; - if (num_input_channels_ == 2 && num_proc_channels_ == 1) { - StereoToMono(data[0], - data[1], - input_buffer_->fbuf()->channels()[0], - input_num_frames_); + if (need_to_downmix) { + DownmixToMono<float, float>(data, input_num_frames_, num_input_channels_, + input_buffer_->fbuf()->channels()[0]); data_ptr = input_buffer_->fbuf_const()->channels(); } @@ -175,11 +148,10 @@ void AudioBuffer::CopyFrom(const float* const* data, } } -void AudioBuffer::CopyTo(int num_frames, - AudioProcessing::ChannelLayout layout, +void AudioBuffer::CopyTo(const StreamConfig& stream_config, float* const* data) { - assert(num_frames == output_num_frames_); - assert(ChannelsFromLayout(layout) == num_channels_); + assert(stream_config.num_frames() == output_num_frames_); + assert(stream_config.num_channels() == num_channels_); // Convert to the float range. float* const* data_ptr = data; @@ -327,9 +299,6 @@ const ChannelBuffer<float>* AudioBuffer::split_data_f() const { } const int16_t* AudioBuffer::mixed_low_pass_data() { - // Currently only mixing stereo to mono is supported. - assert(num_proc_channels_ == 1 || num_proc_channels_ == 2); - if (num_proc_channels_ == 1) { return split_bands_const(0)[kBand0To8kHz]; } @@ -339,10 +308,10 @@ const int16_t* AudioBuffer::mixed_low_pass_data() { mixed_low_pass_channels_.reset( new ChannelBuffer<int16_t>(num_split_frames_, 1)); } - StereoToMono(split_bands_const(0)[kBand0To8kHz], - split_bands_const(1)[kBand0To8kHz], - mixed_low_pass_channels_->channels()[0], - num_split_frames_); + + DownmixToMono<int16_t, int32_t>(split_channels_const(kBand0To8kHz), + num_split_frames_, num_channels_, + mixed_low_pass_channels_->channels()[0]); mixed_low_pass_valid_ = true; } return mixed_low_pass_channels_->channels()[0]; @@ -411,11 +380,10 @@ void AudioBuffer::DeinterleaveFrom(AudioFrame* frame) { } else { deinterleaved = input_buffer_->ibuf()->channels(); } - if (num_input_channels_ == 2 && num_proc_channels_ == 1) { - // Downmix directly; no explicit deinterleaving needed. - for (int i = 0; i < input_num_frames_; ++i) { - deinterleaved[0][i] = (frame->data_[i * 2] + frame->data_[i * 2 + 1]) / 2; - } + if (num_proc_channels_ == 1) { + // Downmix and deinterleave simultaneously. + DownmixInterleavedToMono(frame->data_, input_num_frames_, + num_input_channels_, deinterleaved[0]); } else { assert(num_proc_channels_ == num_input_channels_); Deinterleave(frame->data_, diff --git a/webrtc/modules/audio_processing/audio_buffer.h b/webrtc/modules/audio_processing/audio_buffer.h index 4291fb3eb9..6750af0871 100644 --- a/webrtc/modules/audio_processing/audio_buffer.h +++ b/webrtc/modules/audio_processing/audio_buffer.h @@ -112,12 +112,8 @@ class AudioBuffer { void InterleaveTo(AudioFrame* frame, bool data_changed) const; // Use for float deinterleaved data. - void CopyFrom(const float* const* data, - int num_frames, - AudioProcessing::ChannelLayout layout); - void CopyTo(int num_frames, - AudioProcessing::ChannelLayout layout, - float* const* data); + void CopyFrom(const float* const* data, const StreamConfig& stream_config); + void CopyTo(const StreamConfig& stream_config, float* const* data); void CopyLowPassToReference(); // Splits the signal into different bands. diff --git a/webrtc/modules/audio_processing/audio_processing_impl.cc b/webrtc/modules/audio_processing/audio_processing_impl.cc index 87b82a6a35..e28008a1e4 100644 --- a/webrtc/modules/audio_processing/audio_processing_impl.cc +++ b/webrtc/modules/audio_processing/audio_processing_impl.cc @@ -11,6 +11,7 @@ #include "webrtc/modules/audio_processing/audio_processing_impl.h" #include <assert.h> +#include <algorithm> #include "webrtc/base/checks.h" #include "webrtc/base/platform_file.h" @@ -48,15 +49,32 @@ extern "C" { #endif #endif // WEBRTC_AUDIOPROC_DEBUG_DUMP -#define RETURN_ON_ERR(expr) \ - do { \ - int err = (expr); \ - if (err != kNoError) { \ - return err; \ - } \ +#define RETURN_ON_ERR(expr) \ + do { \ + int err = (expr); \ + if (err != kNoError) { \ + return err; \ + } \ } while (0) namespace webrtc { +namespace { + +static bool LayoutHasKeyboard(AudioProcessing::ChannelLayout layout) { + switch (layout) { + case AudioProcessing::kMono: + case AudioProcessing::kStereo: + return false; + case AudioProcessing::kMonoAndKeyboard: + case AudioProcessing::kStereoAndKeyboard: + return true; + } + + assert(false); + return false; +} + +} // namespace // Throughout webrtc, it's assumed that success is represented by zero. static_assert(AudioProcessing::kNoError == 0, "kNoError must be zero"); @@ -75,9 +93,7 @@ static_assert(AudioProcessing::kNoError == 0, "kNoError must be zero"); class GainControlForNewAgc : public GainControl, public VolumeCallbacks { public: explicit GainControlForNewAgc(GainControlImpl* gain_control) - : real_gain_control_(gain_control), - volume_(0) { - } + : real_gain_control_(gain_control), volume_(0) {} // GainControl implementation. int Enable(bool enable) override { @@ -166,10 +182,10 @@ AudioProcessingImpl::AudioProcessingImpl(const Config& config, debug_file_(FileWrapper::Create()), event_msg_(new audioproc::Event()), #endif - fwd_in_format_(kSampleRate16kHz, 1), + api_format_({{{kSampleRate16kHz, 1, false}, + {kSampleRate16kHz, 1, false}, + {kSampleRate16kHz, 1, false}}}), fwd_proc_format_(kSampleRate16kHz), - fwd_out_format_(kSampleRate16kHz, 1), - rev_in_format_(kSampleRate16kHz, 1), rev_proc_format_(kSampleRate16kHz, 1), split_rate_(kSampleRate16kHz), stream_delay_ms_(0), @@ -253,12 +269,11 @@ int AudioProcessingImpl::Initialize() { int AudioProcessingImpl::set_sample_rate_hz(int rate) { CriticalSectionScoped crit_scoped(crit_); - return InitializeLocked(rate, - rate, - rev_in_format_.rate(), - fwd_in_format_.num_channels(), - fwd_out_format_.num_channels(), - rev_in_format_.num_channels()); + + ProcessingConfig processing_config = api_format_; + processing_config.input_stream().set_sample_rate_hz(rate); + processing_config.output_stream().set_sample_rate_hz(rate); + return InitializeLocked(processing_config); } int AudioProcessingImpl::Initialize(int input_sample_rate_hz, @@ -267,29 +282,39 @@ int AudioProcessingImpl::Initialize(int input_sample_rate_hz, ChannelLayout input_layout, ChannelLayout output_layout, ChannelLayout reverse_layout) { + const ProcessingConfig processing_config = { + {{input_sample_rate_hz, ChannelsFromLayout(input_layout), + LayoutHasKeyboard(input_layout)}, + {output_sample_rate_hz, ChannelsFromLayout(output_layout), + LayoutHasKeyboard(output_layout)}, + {reverse_sample_rate_hz, ChannelsFromLayout(reverse_layout), + LayoutHasKeyboard(reverse_layout)}}}; + + return Initialize(processing_config); +} + +int AudioProcessingImpl::Initialize(const ProcessingConfig& processing_config) { CriticalSectionScoped crit_scoped(crit_); - return InitializeLocked(input_sample_rate_hz, - output_sample_rate_hz, - reverse_sample_rate_hz, - ChannelsFromLayout(input_layout), - ChannelsFromLayout(output_layout), - ChannelsFromLayout(reverse_layout)); + return InitializeLocked(processing_config); } int AudioProcessingImpl::InitializeLocked() { - const int fwd_audio_buffer_channels = beamformer_enabled_ ? - fwd_in_format_.num_channels() : - fwd_out_format_.num_channels(); - render_audio_.reset(new AudioBuffer(rev_in_format_.samples_per_channel(), - rev_in_format_.num_channels(), - rev_proc_format_.samples_per_channel(), - rev_proc_format_.num_channels(), - rev_proc_format_.samples_per_channel())); - capture_audio_.reset(new AudioBuffer(fwd_in_format_.samples_per_channel(), - fwd_in_format_.num_channels(), - fwd_proc_format_.samples_per_channel(), - fwd_audio_buffer_channels, - fwd_out_format_.samples_per_channel())); + const int fwd_audio_buffer_channels = + beamformer_enabled_ ? api_format_.input_stream().num_channels() + : api_format_.output_stream().num_channels(); + if (api_format_.reverse_stream().num_channels() > 0) { + render_audio_.reset(new AudioBuffer( + api_format_.reverse_stream().num_frames(), + api_format_.reverse_stream().num_channels(), + rev_proc_format_.num_frames(), rev_proc_format_.num_channels(), + rev_proc_format_.num_frames())); + } else { + render_audio_.reset(nullptr); + } + capture_audio_.reset(new AudioBuffer( + api_format_.input_stream().num_frames(), + api_format_.input_stream().num_channels(), fwd_proc_format_.num_frames(), + fwd_audio_buffer_channels, api_format_.output_stream().num_frames())); // Initialize all components. for (auto item : component_list_) { @@ -317,38 +342,38 @@ int AudioProcessingImpl::InitializeLocked() { return kNoError; } -int AudioProcessingImpl::InitializeLocked(int input_sample_rate_hz, - int output_sample_rate_hz, - int reverse_sample_rate_hz, - int num_input_channels, - int num_output_channels, - int num_reverse_channels) { - if (input_sample_rate_hz <= 0 || - output_sample_rate_hz <= 0 || - reverse_sample_rate_hz <= 0) { - return kBadSampleRateError; - } - if (num_output_channels > num_input_channels) { - return kBadNumberChannelsError; +int AudioProcessingImpl::InitializeLocked(const ProcessingConfig& config) { + for (const auto& stream : config.streams) { + if (stream.num_channels() < 0) { + return kBadNumberChannelsError; + } + if (stream.num_channels() > 0 && stream.sample_rate_hz() <= 0) { + return kBadSampleRateError; + } } - // Only mono and stereo supported currently. - if (num_input_channels > 2 || num_input_channels < 1 || - num_output_channels > 2 || num_output_channels < 1 || - num_reverse_channels > 2 || num_reverse_channels < 1) { + + const int num_in_channels = config.input_stream().num_channels(); + const int num_out_channels = config.output_stream().num_channels(); + + // Need at least one input channel. + // Need either one output channel or as many outputs as there are inputs. + if (num_in_channels == 0 || + !(num_out_channels == 1 || num_out_channels == num_in_channels)) { return kBadNumberChannelsError; } + if (beamformer_enabled_ && - (static_cast<size_t>(num_input_channels) != array_geometry_.size() || - num_output_channels > 1)) { + (static_cast<size_t>(num_in_channels) != array_geometry_.size() || + num_out_channels > 1)) { return kBadNumberChannelsError; } - fwd_in_format_.set(input_sample_rate_hz, num_input_channels); - fwd_out_format_.set(output_sample_rate_hz, num_output_channels); - rev_in_format_.set(reverse_sample_rate_hz, num_reverse_channels); + api_format_ = config; // We process at the closest native rate >= min(input rate, output rate)... - int min_proc_rate = std::min(fwd_in_format_.rate(), fwd_out_format_.rate()); + const int min_proc_rate = + std::min(api_format_.input_stream().sample_rate_hz(), + api_format_.output_stream().sample_rate_hz()); int fwd_proc_rate; if (supports_48kHz_ && min_proc_rate > kSampleRate32kHz) { fwd_proc_rate = kSampleRate48kHz; @@ -364,15 +389,15 @@ int AudioProcessingImpl::InitializeLocked(int input_sample_rate_hz, fwd_proc_rate = kSampleRate16kHz; } - fwd_proc_format_.set(fwd_proc_rate); + fwd_proc_format_ = StreamConfig(fwd_proc_rate); // We normally process the reverse stream at 16 kHz. Unless... int rev_proc_rate = kSampleRate16kHz; - if (fwd_proc_format_.rate() == kSampleRate8kHz) { + if (fwd_proc_format_.sample_rate_hz() == kSampleRate8kHz) { // ...the forward stream is at 8 kHz. rev_proc_rate = kSampleRate8kHz; } else { - if (rev_in_format_.rate() == kSampleRate32kHz) { + if (api_format_.reverse_stream().sample_rate_hz() == kSampleRate32kHz) { // ...or the input is at 32 kHz, in which case we use the splitting // filter rather than the resampler. rev_proc_rate = kSampleRate32kHz; @@ -381,13 +406,13 @@ int AudioProcessingImpl::InitializeLocked(int input_sample_rate_hz, // Always downmix the reverse stream to mono for analysis. This has been // demonstrated to work well for AEC in most practical scenarios. - rev_proc_format_.set(rev_proc_rate, 1); + rev_proc_format_ = StreamConfig(rev_proc_rate, 1); - if (fwd_proc_format_.rate() == kSampleRate32kHz || - fwd_proc_format_.rate() == kSampleRate48kHz) { + if (fwd_proc_format_.sample_rate_hz() == kSampleRate32kHz || + fwd_proc_format_.sample_rate_hz() == kSampleRate48kHz) { split_rate_ = kSampleRate16kHz; } else { - split_rate_ = fwd_proc_format_.rate(); + split_rate_ = fwd_proc_format_.sample_rate_hz(); } return InitializeLocked(); @@ -395,26 +420,12 @@ int AudioProcessingImpl::InitializeLocked(int input_sample_rate_hz, // Calls InitializeLocked() if any of the audio parameters have changed from // their current values. -int AudioProcessingImpl::MaybeInitializeLocked(int input_sample_rate_hz, - int output_sample_rate_hz, - int reverse_sample_rate_hz, - int num_input_channels, - int num_output_channels, - int num_reverse_channels) { - if (input_sample_rate_hz == fwd_in_format_.rate() && - output_sample_rate_hz == fwd_out_format_.rate() && - reverse_sample_rate_hz == rev_in_format_.rate() && - num_input_channels == fwd_in_format_.num_channels() && - num_output_channels == fwd_out_format_.num_channels() && - num_reverse_channels == rev_in_format_.num_channels()) { +int AudioProcessingImpl::MaybeInitializeLocked( + const ProcessingConfig& processing_config) { + if (processing_config == api_format_) { return kNoError; } - return InitializeLocked(input_sample_rate_hz, - output_sample_rate_hz, - reverse_sample_rate_hz, - num_input_channels, - num_output_channels, - num_reverse_channels); + return InitializeLocked(processing_config); } void AudioProcessingImpl::SetExtraOptions(const Config& config) { @@ -431,16 +442,16 @@ void AudioProcessingImpl::SetExtraOptions(const Config& config) { int AudioProcessingImpl::input_sample_rate_hz() const { CriticalSectionScoped crit_scoped(crit_); - return fwd_in_format_.rate(); + return api_format_.input_stream().sample_rate_hz(); } int AudioProcessingImpl::sample_rate_hz() const { CriticalSectionScoped crit_scoped(crit_); - return fwd_in_format_.rate(); + return api_format_.input_stream().sample_rate_hz(); } int AudioProcessingImpl::proc_sample_rate_hz() const { - return fwd_proc_format_.rate(); + return fwd_proc_format_.sample_rate_hz(); } int AudioProcessingImpl::proc_split_sample_rate_hz() const { @@ -452,11 +463,11 @@ int AudioProcessingImpl::num_reverse_channels() const { } int AudioProcessingImpl::num_input_channels() const { - return fwd_in_format_.num_channels(); + return api_format_.input_stream().num_channels(); } int AudioProcessingImpl::num_output_channels() const { - return fwd_out_format_.num_channels(); + return api_format_.output_stream().num_channels(); } void AudioProcessingImpl::set_output_will_be_muted(bool muted) { @@ -479,44 +490,60 @@ int AudioProcessingImpl::ProcessStream(const float* const* src, int output_sample_rate_hz, ChannelLayout output_layout, float* const* dest) { + StreamConfig input_stream = api_format_.input_stream(); + input_stream.set_sample_rate_hz(input_sample_rate_hz); + input_stream.set_num_channels(ChannelsFromLayout(input_layout)); + input_stream.set_has_keyboard(LayoutHasKeyboard(input_layout)); + + StreamConfig output_stream = api_format_.output_stream(); + output_stream.set_sample_rate_hz(output_sample_rate_hz); + output_stream.set_num_channels(ChannelsFromLayout(output_layout)); + output_stream.set_has_keyboard(LayoutHasKeyboard(output_layout)); + + if (samples_per_channel != input_stream.num_frames()) { + return kBadDataLengthError; + } + return ProcessStream(src, input_stream, output_stream, dest); +} + +int AudioProcessingImpl::ProcessStream(const float* const* src, + const StreamConfig& input_config, + const StreamConfig& output_config, + float* const* dest) { CriticalSectionScoped crit_scoped(crit_); if (!src || !dest) { return kNullPointerError; } - RETURN_ON_ERR(MaybeInitializeLocked(input_sample_rate_hz, - output_sample_rate_hz, - rev_in_format_.rate(), - ChannelsFromLayout(input_layout), - ChannelsFromLayout(output_layout), - rev_in_format_.num_channels())); - if (samples_per_channel != fwd_in_format_.samples_per_channel()) { - return kBadDataLengthError; - } + ProcessingConfig processing_config = api_format_; + processing_config.input_stream() = input_config; + processing_config.output_stream() = output_config; + + RETURN_ON_ERR(MaybeInitializeLocked(processing_config)); + assert(processing_config.input_stream().num_frames() == + api_format_.input_stream().num_frames()); #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP if (debug_file_->Open()) { event_msg_->set_type(audioproc::Event::STREAM); audioproc::Stream* msg = event_msg_->mutable_stream(); const size_t channel_size = - sizeof(float) * fwd_in_format_.samples_per_channel(); - for (int i = 0; i < fwd_in_format_.num_channels(); ++i) + sizeof(float) * api_format_.input_stream().num_frames(); + for (int i = 0; i < api_format_.input_stream().num_channels(); ++i) msg->add_input_channel(src[i], channel_size); } #endif - capture_audio_->CopyFrom(src, samples_per_channel, input_layout); + capture_audio_->CopyFrom(src, api_format_.input_stream()); RETURN_ON_ERR(ProcessStreamLocked()); - capture_audio_->CopyTo(fwd_out_format_.samples_per_channel(), - output_layout, - dest); + capture_audio_->CopyTo(api_format_.output_stream(), dest); #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP if (debug_file_->Open()) { audioproc::Stream* msg = event_msg_->mutable_stream(); const size_t channel_size = - sizeof(float) * fwd_out_format_.samples_per_channel(); - for (int i = 0; i < fwd_out_format_.num_channels(); ++i) + sizeof(float) * api_format_.input_stream().num_frames(); + for (int i = 0; i < api_format_.input_stream().num_channels(); ++i) msg->add_output_channel(dest[i], channel_size); RETURN_ON_ERR(WriteMessageToDebugFile()); } @@ -545,13 +572,14 @@ int AudioProcessingImpl::ProcessStream(AudioFrame* frame) { // TODO(ajm): The input and output rates and channels are currently // constrained to be identical in the int16 interface. - RETURN_ON_ERR(MaybeInitializeLocked(frame->sample_rate_hz_, - frame->sample_rate_hz_, - rev_in_format_.rate(), - frame->num_channels_, - frame->num_channels_, - rev_in_format_.num_channels())); - if (frame->samples_per_channel_ != fwd_in_format_.samples_per_channel()) { + ProcessingConfig processing_config = api_format_; + processing_config.input_stream().set_sample_rate_hz(frame->sample_rate_hz_); + processing_config.input_stream().set_num_channels(frame->num_channels_); + processing_config.output_stream().set_sample_rate_hz(frame->sample_rate_hz_); + processing_config.output_stream().set_num_channels(frame->num_channels_); + + RETURN_ON_ERR(MaybeInitializeLocked(processing_config)); + if (frame->samples_per_channel_ != api_format_.input_stream().num_frames()) { return kBadDataLengthError; } @@ -559,9 +587,8 @@ int AudioProcessingImpl::ProcessStream(AudioFrame* frame) { if (debug_file_->Open()) { event_msg_->set_type(audioproc::Event::STREAM); audioproc::Stream* msg = event_msg_->mutable_stream(); - const size_t data_size = sizeof(int16_t) * - frame->samples_per_channel_ * - frame->num_channels_; + const size_t data_size = + sizeof(int16_t) * frame->samples_per_channel_ * frame->num_channels_; msg->set_input_data(frame->data_, data_size); } #endif @@ -573,9 +600,8 @@ int AudioProcessingImpl::ProcessStream(AudioFrame* frame) { #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP if (debug_file_->Open()) { audioproc::Stream* msg = event_msg_->mutable_stream(); - const size_t data_size = sizeof(int16_t) * - frame->samples_per_channel_ * - frame->num_channels_; + const size_t data_size = + sizeof(int16_t) * frame->samples_per_channel_ * frame->num_channels_; msg->set_output_data(frame->data_, data_size); RETURN_ON_ERR(WriteMessageToDebugFile()); } @@ -584,7 +610,6 @@ int AudioProcessingImpl::ProcessStream(AudioFrame* frame) { return kNoError; } - int AudioProcessingImpl::ProcessStreamLocked() { #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP if (debug_file_->Open()) { @@ -600,9 +625,8 @@ int AudioProcessingImpl::ProcessStreamLocked() { AudioBuffer* ca = capture_audio_.get(); // For brevity. if (use_new_agc_ && gain_control_->is_enabled()) { - agc_manager_->AnalyzePreProcess(ca->channels()[0], - ca->num_channels(), - fwd_proc_format_.samples_per_channel()); + agc_manager_->AnalyzePreProcess(ca->channels()[0], ca->num_channels(), + fwd_proc_format_.num_frames()); } bool data_processed = is_data_processed(); @@ -627,12 +651,10 @@ int AudioProcessingImpl::ProcessStreamLocked() { RETURN_ON_ERR(echo_control_mobile_->ProcessCaptureAudio(ca)); RETURN_ON_ERR(voice_detection_->ProcessCaptureAudio(ca)); - if (use_new_agc_ && - gain_control_->is_enabled() && + if (use_new_agc_ && gain_control_->is_enabled() && (!beamformer_enabled_ || beamformer_->is_target_present())) { agc_manager_->Process(ca->split_bands_const(0)[kBand0To8kHz], - ca->num_frames_per_band(), - split_rate_); + ca->num_frames_per_band(), split_rate_); } RETURN_ON_ERR(gain_control_->ProcessCaptureAudio(ca)); @@ -646,15 +668,11 @@ int AudioProcessingImpl::ProcessStreamLocked() { float voice_probability = agc_manager_.get() ? agc_manager_->voice_probability() : 1.f; - transient_suppressor_->Suppress(ca->channels_f()[0], - ca->num_frames(), - ca->num_channels(), - ca->split_bands_const_f(0)[kBand0To8kHz], - ca->num_frames_per_band(), - ca->keyboard_data(), - ca->num_keyboard_frames(), - voice_probability, - key_pressed_); + transient_suppressor_->Suppress( + ca->channels_f()[0], ca->num_frames(), ca->num_channels(), + ca->split_bands_const_f(0)[kBand0To8kHz], ca->num_frames_per_band(), + ca->keyboard_data(), ca->num_keyboard_frames(), voice_probability, + key_pressed_); } // The level estimator operates on the recombined data. @@ -668,35 +686,47 @@ int AudioProcessingImpl::AnalyzeReverseStream(const float* const* data, int samples_per_channel, int sample_rate_hz, ChannelLayout layout) { + const StreamConfig reverse_config = { + sample_rate_hz, ChannelsFromLayout(layout), LayoutHasKeyboard(layout), + }; + if (samples_per_channel != reverse_config.num_frames()) { + return kBadDataLengthError; + } + return AnalyzeReverseStream(data, reverse_config); +} + +int AudioProcessingImpl::AnalyzeReverseStream( + const float* const* data, + const StreamConfig& reverse_config) { CriticalSectionScoped crit_scoped(crit_); if (data == NULL) { return kNullPointerError; } - const int num_channels = ChannelsFromLayout(layout); - RETURN_ON_ERR(MaybeInitializeLocked(fwd_in_format_.rate(), - fwd_out_format_.rate(), - sample_rate_hz, - fwd_in_format_.num_channels(), - fwd_out_format_.num_channels(), - num_channels)); - if (samples_per_channel != rev_in_format_.samples_per_channel()) { - return kBadDataLengthError; + if (reverse_config.num_channels() <= 0) { + return kBadNumberChannelsError; } + ProcessingConfig processing_config = api_format_; + processing_config.reverse_stream() = reverse_config; + + RETURN_ON_ERR(MaybeInitializeLocked(processing_config)); + assert(reverse_config.num_frames() == + api_format_.reverse_stream().num_frames()); + #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP if (debug_file_->Open()) { event_msg_->set_type(audioproc::Event::REVERSE_STREAM); audioproc::ReverseStream* msg = event_msg_->mutable_reverse_stream(); const size_t channel_size = - sizeof(float) * rev_in_format_.samples_per_channel(); - for (int i = 0; i < num_channels; ++i) + sizeof(float) * api_format_.reverse_stream().num_frames(); + for (int i = 0; i < api_format_.reverse_stream().num_channels(); ++i) msg->add_channel(data[i], channel_size); RETURN_ON_ERR(WriteMessageToDebugFile()); } #endif - render_audio_->CopyFrom(data, samples_per_channel, layout); + render_audio_->CopyFrom(data, api_format_.reverse_stream()); return AnalyzeReverseStreamLocked(); } @@ -713,17 +743,21 @@ int AudioProcessingImpl::AnalyzeReverseStream(AudioFrame* frame) { return kBadSampleRateError; } // This interface does not tolerate different forward and reverse rates. - if (frame->sample_rate_hz_ != fwd_in_format_.rate()) { + if (frame->sample_rate_hz_ != api_format_.input_stream().sample_rate_hz()) { return kBadSampleRateError; } - RETURN_ON_ERR(MaybeInitializeLocked(fwd_in_format_.rate(), - fwd_out_format_.rate(), - frame->sample_rate_hz_, - fwd_in_format_.num_channels(), - fwd_in_format_.num_channels(), - frame->num_channels_)); - if (frame->samples_per_channel_ != rev_in_format_.samples_per_channel()) { + if (frame->num_channels_ <= 0) { + return kBadNumberChannelsError; + } + + ProcessingConfig processing_config = api_format_; + processing_config.reverse_stream().set_sample_rate_hz(frame->sample_rate_hz_); + processing_config.reverse_stream().set_num_channels(frame->num_channels_); + + RETURN_ON_ERR(MaybeInitializeLocked(processing_config)); + if (frame->samples_per_channel_ != + api_format_.reverse_stream().num_frames()) { return kBadDataLengthError; } @@ -731,9 +765,8 @@ int AudioProcessingImpl::AnalyzeReverseStream(AudioFrame* frame) { if (debug_file_->Open()) { event_msg_->set_type(audioproc::Event::REVERSE_STREAM); audioproc::ReverseStream* msg = event_msg_->mutable_reverse_stream(); - const size_t data_size = sizeof(int16_t) * - frame->samples_per_channel_ * - frame->num_channels_; + const size_t data_size = + sizeof(int16_t) * frame->samples_per_channel_ * frame->num_channels_; msg->set_data(frame->data_, data_size); RETURN_ON_ERR(WriteMessageToDebugFile()); } @@ -745,7 +778,7 @@ int AudioProcessingImpl::AnalyzeReverseStream(AudioFrame* frame) { int AudioProcessingImpl::AnalyzeReverseStreamLocked() { AudioBuffer* ra = render_audio_.get(); // For brevity. - if (rev_proc_format_.rate() == kSampleRate32kHz) { + if (rev_proc_format_.sample_rate_hz() == kSampleRate32kHz) { ra->SplitIntoFrequencyBands(); } @@ -947,13 +980,15 @@ bool AudioProcessingImpl::is_data_processed() const { bool AudioProcessingImpl::output_copy_needed(bool is_data_processed) const { // Check if we've upmixed or downmixed the audio. - return ((fwd_out_format_.num_channels() != fwd_in_format_.num_channels()) || + return ((api_format_.output_stream().num_channels() != + api_format_.input_stream().num_channels()) || is_data_processed || transient_suppressor_enabled_); } bool AudioProcessingImpl::synthesis_needed(bool is_data_processed) const { - return (is_data_processed && (fwd_proc_format_.rate() == kSampleRate32kHz || - fwd_proc_format_.rate() == kSampleRate48kHz)); + return (is_data_processed && + (fwd_proc_format_.sample_rate_hz() == kSampleRate32kHz || + fwd_proc_format_.sample_rate_hz() == kSampleRate48kHz)); } bool AudioProcessingImpl::analysis_needed(bool is_data_processed) const { @@ -961,8 +996,8 @@ bool AudioProcessingImpl::analysis_needed(bool is_data_processed) const { !transient_suppressor_enabled_) { // Only level_estimator_ is enabled. return false; - } else if (fwd_proc_format_.rate() == kSampleRate32kHz || - fwd_proc_format_.rate() == kSampleRate48kHz) { + } else if (fwd_proc_format_.sample_rate_hz() == kSampleRate32kHz || + fwd_proc_format_.sample_rate_hz() == kSampleRate48kHz) { // Something besides level_estimator_ is enabled, and we have super-wb. return true; } @@ -986,9 +1021,9 @@ void AudioProcessingImpl::InitializeTransient() { if (!transient_suppressor_.get()) { transient_suppressor_.reset(new TransientSuppressor()); } - transient_suppressor_->Initialize(fwd_proc_format_.rate(), - split_rate_, - fwd_out_format_.num_channels()); + transient_suppressor_->Initialize( + fwd_proc_format_.sample_rate_hz(), split_rate_, + api_format_.output_stream().num_channels()); } } @@ -1031,8 +1066,8 @@ void AudioProcessingImpl::MaybeUpdateHistograms() { const int frames_per_ms = rtc::CheckedDivExact(split_rate_, 1000); const int aec_system_delay_ms = WebRtcAec_system_delay(echo_cancellation()->aec_core()) / frames_per_ms; - const int diff_aec_system_delay_ms = aec_system_delay_ms - - last_aec_system_delay_ms_; + const int diff_aec_system_delay_ms = + aec_system_delay_ms - last_aec_system_delay_ms_; if (diff_aec_system_delay_ms > kMinDiffDelayMs && last_aec_system_delay_ms_ != 0) { RTC_HISTOGRAM_COUNTS("WebRTC.Audio.AecSystemDelayJump", @@ -1072,8 +1107,8 @@ int AudioProcessingImpl::WriteMessageToDebugFile() { return kUnspecifiedError; } #if defined(WEBRTC_ARCH_BIG_ENDIAN) - // TODO(ajm): Use little-endian "on the wire". For the moment, we can be - // pretty safe in assuming little-endian. +// TODO(ajm): Use little-endian "on the wire". For the moment, we can be +// pretty safe in assuming little-endian. #endif if (!event_msg_->SerializeToString(&event_str_)) { @@ -1096,12 +1131,12 @@ int AudioProcessingImpl::WriteMessageToDebugFile() { int AudioProcessingImpl::WriteInitMessage() { event_msg_->set_type(audioproc::Event::INIT); audioproc::Init* msg = event_msg_->mutable_init(); - msg->set_sample_rate(fwd_in_format_.rate()); - msg->set_num_input_channels(fwd_in_format_.num_channels()); - msg->set_num_output_channels(fwd_out_format_.num_channels()); - msg->set_num_reverse_channels(rev_in_format_.num_channels()); - msg->set_reverse_sample_rate(rev_in_format_.rate()); - msg->set_output_sample_rate(fwd_out_format_.rate()); + msg->set_sample_rate(api_format_.input_stream().sample_rate_hz()); + msg->set_num_input_channels(api_format_.input_stream().num_channels()); + msg->set_num_output_channels(api_format_.output_stream().num_channels()); + msg->set_num_reverse_channels(api_format_.reverse_stream().num_channels()); + msg->set_reverse_sample_rate(api_format_.reverse_stream().sample_rate_hz()); + msg->set_output_sample_rate(api_format_.output_stream().sample_rate_hz()); int err = WriteMessageToDebugFile(); if (err != kNoError) { diff --git a/webrtc/modules/audio_processing/audio_processing_impl.h b/webrtc/modules/audio_processing/audio_processing_impl.h index bbd1719158..0597cd9518 100644 --- a/webrtc/modules/audio_processing/audio_processing_impl.h +++ b/webrtc/modules/audio_processing/audio_processing_impl.h @@ -13,6 +13,7 @@ #include <list> #include <string> +#include <vector> #include "webrtc/base/scoped_ptr.h" #include "webrtc/base/thread_annotations.h" @@ -47,42 +48,6 @@ class Event; } // namespace audioproc #endif -class AudioRate { - public: - explicit AudioRate(int sample_rate_hz) { set(sample_rate_hz); } - virtual ~AudioRate() {} - - void set(int rate) { - rate_ = rate; - samples_per_channel_ = AudioProcessing::kChunkSizeMs * rate_ / 1000; - } - - int rate() const { return rate_; } - int samples_per_channel() const { return samples_per_channel_; } - - private: - int rate_; - int samples_per_channel_; -}; - -class AudioFormat : public AudioRate { - public: - AudioFormat(int sample_rate_hz, int num_channels) - : AudioRate(sample_rate_hz), - num_channels_(num_channels) {} - virtual ~AudioFormat() {} - - void set(int rate, int num_channels) { - AudioRate::set(rate); - num_channels_ = num_channels; - } - - int num_channels() const { return num_channels_; } - - private: - int num_channels_; -}; - class AudioProcessingImpl : public AudioProcessing { public: explicit AudioProcessingImpl(const Config& config); @@ -99,6 +64,7 @@ class AudioProcessingImpl : public AudioProcessing { ChannelLayout input_layout, ChannelLayout output_layout, ChannelLayout reverse_layout) override; + int Initialize(const ProcessingConfig& processing_config) override; void SetExtraOptions(const Config& config) override; int set_sample_rate_hz(int rate) override; int input_sample_rate_hz() const override; @@ -118,11 +84,17 @@ class AudioProcessingImpl : public AudioProcessing { int output_sample_rate_hz, ChannelLayout output_layout, float* const* dest) override; + int ProcessStream(const float* const* src, + const StreamConfig& input_config, + const StreamConfig& output_config, + float* const* dest) override; int AnalyzeReverseStream(AudioFrame* frame) override; int AnalyzeReverseStream(const float* const* data, int samples_per_channel, int sample_rate_hz, ChannelLayout layout) override; + int AnalyzeReverseStream(const float* const* data, + const StreamConfig& reverse_config) override; int set_stream_delay_ms(int delay) override; int stream_delay_ms() const override; bool was_stream_delay_set() const override; @@ -148,19 +120,9 @@ class AudioProcessingImpl : public AudioProcessing { virtual int InitializeLocked() EXCLUSIVE_LOCKS_REQUIRED(crit_); private: - int InitializeLocked(int input_sample_rate_hz, - int output_sample_rate_hz, - int reverse_sample_rate_hz, - int num_input_channels, - int num_output_channels, - int num_reverse_channels) + int InitializeLocked(const ProcessingConfig& config) EXCLUSIVE_LOCKS_REQUIRED(crit_); - int MaybeInitializeLocked(int input_sample_rate_hz, - int output_sample_rate_hz, - int reverse_sample_rate_hz, - int num_input_channels, - int num_output_channels, - int num_reverse_channels) + int MaybeInitializeLocked(const ProcessingConfig& config) EXCLUSIVE_LOCKS_REQUIRED(crit_); int ProcessStreamLocked() EXCLUSIVE_LOCKS_REQUIRED(crit_); int AnalyzeReverseStreamLocked() EXCLUSIVE_LOCKS_REQUIRED(crit_); @@ -197,13 +159,14 @@ class AudioProcessingImpl : public AudioProcessing { std::string event_str_; // Memory for protobuf serialization. #endif - AudioFormat fwd_in_format_; - // This one is an AudioRate, because the forward processing number of channels - // is mutable and is tracked by the capture_audio_. - AudioRate fwd_proc_format_; - AudioFormat fwd_out_format_; - AudioFormat rev_in_format_; - AudioFormat rev_proc_format_; + // Format of processing streams at input/output call sites. + ProcessingConfig api_format_; + + // Only the rate and samples fields of fwd_proc_format_ are used because the + // forward processing number of channels is mutable and is tracked by the + // capture_audio_. + StreamConfig fwd_proc_format_; + StreamConfig rev_proc_format_; int split_rate_; int stream_delay_ms_; diff --git a/webrtc/modules/audio_processing/include/audio_processing.h b/webrtc/modules/audio_processing/include/audio_processing.h index 6fa1c96c07..6a8ef1b0e5 100644 --- a/webrtc/modules/audio_processing/include/audio_processing.h +++ b/webrtc/modules/audio_processing/include/audio_processing.h @@ -29,6 +29,9 @@ class AudioFrame; template<typename T> class Beamformer; +class StreamConfig; +class ProcessingConfig; + class EchoCancellation; class EchoControlMobile; class GainControl; @@ -84,7 +87,7 @@ static const int kAgcStartupMinVolume = 0; #endif // defined(WEBRTC_CHROMIUM_BUILD) struct ExperimentalAgc { ExperimentalAgc() : enabled(true), startup_min_volume(kAgcStartupMinVolume) {} - ExperimentalAgc(bool enabled) + explicit ExperimentalAgc(bool enabled) : enabled(enabled), startup_min_volume(kAgcStartupMinVolume) {} ExperimentalAgc(bool enabled, int startup_min_volume) : enabled(enabled), startup_min_volume(startup_min_volume) {} @@ -199,6 +202,7 @@ static const int kAudioProcMaxNativeSampleRateHz = 32000; // class AudioProcessing { public: + // TODO(mgraczyk): Remove once all methods that use ChannelLayout are gone. enum ChannelLayout { kMono, // Left, right. @@ -236,10 +240,17 @@ class AudioProcessing { // The int16 interfaces require: // - only |NativeRate|s be used // - that the input, output and reverse rates must match - // - that |output_layout| matches |input_layout| + // - that |processing_config.output_stream()| matches + // |processing_config.input_stream()|. + // + // The float interfaces accept arbitrary rates and support differing input and + // output layouts, but the output must have either one channel or the same + // number of channels as the input. + virtual int Initialize(const ProcessingConfig& processing_config) = 0; + + // Initialize with unpacked parameters. See Initialize() above for details. // - // The float interfaces accept arbitrary rates and support differing input - // and output layouts, but the output may only remove channels, not add. + // TODO(mgraczyk): Remove once clients are updated to use the new interface. virtual int Initialize(int input_sample_rate_hz, int output_sample_rate_hz, int reverse_sample_rate_hz, @@ -292,8 +303,10 @@ class AudioProcessing { // |input_layout|. At output, the channels will be arranged according to // |output_layout| at |output_sample_rate_hz| in |dest|. // - // The output layout may only remove channels, not add. |src| and |dest| - // may use the same memory, if desired. + // The output layout must have one channel or as many channels as the input. + // |src| and |dest| may use the same memory, if desired. + // + // TODO(mgraczyk): Remove once clients are updated to use the new interface. virtual int ProcessStream(const float* const* src, int samples_per_channel, int input_sample_rate_hz, @@ -302,6 +315,18 @@ class AudioProcessing { ChannelLayout output_layout, float* const* dest) = 0; + // Accepts deinterleaved float audio with the range [-1, 1]. Each element of + // |src| points to a channel buffer, arranged according to |input_stream|. At + // output, the channels will be arranged according to |output_stream| in + // |dest|. + // + // The output must have one channel or as many channels as the input. |src| + // and |dest| may use the same memory, if desired. + virtual int ProcessStream(const float* const* src, + const StreamConfig& input_config, + const StreamConfig& output_config, + float* const* dest) = 0; + // Analyzes a 10 ms |frame| of the reverse direction audio stream. The frame // will not be modified. On the client-side, this is the far-end (or to be // rendered) audio. @@ -321,11 +346,18 @@ class AudioProcessing { // Accepts deinterleaved float audio with the range [-1, 1]. Each element // of |data| points to a channel buffer, arranged according to |layout|. + // + // TODO(mgraczyk): Remove once clients are updated to use the new interface. virtual int AnalyzeReverseStream(const float* const* data, int samples_per_channel, int sample_rate_hz, ChannelLayout layout) = 0; + // Accepts deinterleaved float audio with the range [-1, 1]. Each element of + // |data| points to a channel buffer, arranged according to |reverse_config|. + virtual int AnalyzeReverseStream(const float* const* data, + const StreamConfig& reverse_config) = 0; + // This must be called if and only if echo processing is enabled. // // Sets the |delay| in ms between AnalyzeReverseStream() receiving a far-end @@ -432,6 +464,102 @@ class AudioProcessing { static const int kChunkSizeMs = 10; }; +class StreamConfig { + public: + // sample_rate_hz: The sampling rate of the stream. + // + // num_channels: The number of audio channels in the stream, excluding the + // keyboard channel if it is present. When passing a + // StreamConfig with an array of arrays T*[N], + // + // N == {num_channels + 1 if has_keyboard + // {num_channels if !has_keyboard + // + // has_keyboard: True if the stream has a keyboard channel. When has_keyboard + // is true, the last channel in any corresponding list of + // channels is the keyboard channel. + StreamConfig(int sample_rate_hz = 0, + int num_channels = 0, + bool has_keyboard = false) + : sample_rate_hz_(sample_rate_hz), + num_channels_(num_channels), + has_keyboard_(has_keyboard), + num_frames_(calculate_frames(sample_rate_hz)) {} + + void set_sample_rate_hz(int value) { + sample_rate_hz_ = value; + num_frames_ = calculate_frames(value); + } + void set_num_channels(int value) { num_channels_ = value; } + void set_has_keyboard(bool value) { has_keyboard_ = value; } + + int sample_rate_hz() const { return sample_rate_hz_; } + + // The number of channels in the stream, not including the keyboard channel if + // present. + int num_channels() const { return num_channels_; } + + bool has_keyboard() const { return has_keyboard_; } + int num_frames() const { return num_frames_; } + + bool operator==(const StreamConfig& other) const { + return sample_rate_hz_ == other.sample_rate_hz_ && + num_channels_ == other.num_channels_ && + has_keyboard_ == other.has_keyboard_; + } + + bool operator!=(const StreamConfig& other) const { return !(*this == other); } + + private: + static int calculate_frames(int sample_rate_hz) { + return AudioProcessing::kChunkSizeMs * sample_rate_hz / 1000; + } + + int sample_rate_hz_; + int num_channels_; + bool has_keyboard_; + int num_frames_; +}; + +class ProcessingConfig { + public: + enum StreamName { + kInputStream, + kOutputStream, + kReverseStream, + kNumStreamNames, + }; + + const StreamConfig& input_stream() const { + return streams[StreamName::kInputStream]; + } + const StreamConfig& output_stream() const { + return streams[StreamName::kOutputStream]; + } + const StreamConfig& reverse_stream() const { + return streams[StreamName::kReverseStream]; + } + + StreamConfig& input_stream() { return streams[StreamName::kInputStream]; } + StreamConfig& output_stream() { return streams[StreamName::kOutputStream]; } + StreamConfig& reverse_stream() { return streams[StreamName::kReverseStream]; } + + bool operator==(const ProcessingConfig& other) const { + for (int i = 0; i < StreamName::kNumStreamNames; ++i) { + if (this->streams[i] != other.streams[i]) { + return false; + } + } + return true; + } + + bool operator!=(const ProcessingConfig& other) const { + return !(*this == other); + } + + StreamConfig streams[StreamName::kNumStreamNames]; +}; + // The acoustic echo cancellation (AEC) component provides better performance // than AECM but also requires more processing power and is dependent on delay // stability and reporting accuracy. As such it is well-suited and recommended diff --git a/webrtc/modules/audio_processing/include/mock_audio_processing.h b/webrtc/modules/audio_processing/include/mock_audio_processing.h index 480d0e34b2..8007a86c7f 100644 --- a/webrtc/modules/audio_processing/include/mock_audio_processing.h +++ b/webrtc/modules/audio_processing/include/mock_audio_processing.h @@ -186,6 +186,8 @@ class MockAudioProcessing : public AudioProcessing { ChannelLayout input_layout, ChannelLayout output_layout, ChannelLayout reverse_layout)); + MOCK_METHOD1(Initialize, + int(const ProcessingConfig& processing_config)); MOCK_METHOD1(SetExtraOptions, void(const Config& config)); MOCK_METHOD1(set_sample_rate_hz, @@ -218,11 +220,18 @@ class MockAudioProcessing : public AudioProcessing { int output_sample_rate_hz, ChannelLayout output_layout, float* const* dest)); + MOCK_METHOD4(ProcessStream, + int(const float* const* src, + const StreamConfig& input_config, + const StreamConfig& output_config, + float* const* dest)); MOCK_METHOD1(AnalyzeReverseStream, int(AudioFrame* frame)); MOCK_METHOD4(AnalyzeReverseStream, int(const float* const* data, int frames, int sample_rate_hz, ChannelLayout input_layout)); + MOCK_METHOD2(AnalyzeReverseStream, + int(const float* const* data, const StreamConfig& reverse_config)); MOCK_METHOD1(set_stream_delay_ms, int(int delay)); MOCK_CONST_METHOD0(stream_delay_ms, diff --git a/webrtc/modules/audio_processing/test/audio_processing_unittest.cc b/webrtc/modules/audio_processing/test/audio_processing_unittest.cc index 291035a012..3030182676 100644 --- a/webrtc/modules/audio_processing/test/audio_processing_unittest.cc +++ b/webrtc/modules/audio_processing/test/audio_processing_unittest.cc @@ -354,8 +354,14 @@ class ApmTest : public ::testing::Test { void ProcessWithDefaultStreamParameters(AudioFrame* frame); void ProcessDelayVerificationTest(int delay_ms, int system_delay_ms, int delay_min, int delay_max); - void TestChangingChannels(int num_channels, - AudioProcessing::Error expected_return); + void TestChangingChannelsInt16Interface( + int num_channels, + AudioProcessing::Error expected_return); + void TestChangingForwardChannels(int num_in_channels, + int num_out_channels, + AudioProcessing::Error expected_return); + void TestChangingReverseChannels(int num_rev_channels, + AudioProcessing::Error expected_return); void RunQuantizedVolumeDoesNotGetStuckTest(int sample_rate); void RunManualVolumeChangeIsPossibleTest(int sample_rate); void StreamParametersTest(Format format); @@ -449,12 +455,10 @@ void ApmTest::TearDown() { void ApmTest::Init(AudioProcessing* ap) { ASSERT_EQ(kNoErr, - ap->Initialize(frame_->sample_rate_hz_, - output_sample_rate_hz_, - revframe_->sample_rate_hz_, - LayoutFromChannels(frame_->num_channels_), - LayoutFromChannels(num_output_channels_), - LayoutFromChannels(revframe_->num_channels_))); + ap->Initialize( + {{{frame_->sample_rate_hz_, frame_->num_channels_}, + {output_sample_rate_hz_, num_output_channels_}, + {revframe_->sample_rate_hz_, revframe_->num_channels_}}})); } void ApmTest::Init(int sample_rate_hz, @@ -791,26 +795,79 @@ TEST_F(ApmTest, DelayOffsetWithLimitsIsSetProperly) { EXPECT_EQ(50, apm_->stream_delay_ms()); } -void ApmTest::TestChangingChannels(int num_channels, - AudioProcessing::Error expected_return) { +void ApmTest::TestChangingChannelsInt16Interface( + int num_channels, + AudioProcessing::Error expected_return) { frame_->num_channels_ = num_channels; EXPECT_EQ(expected_return, apm_->ProcessStream(frame_)); EXPECT_EQ(expected_return, apm_->AnalyzeReverseStream(frame_)); } -TEST_F(ApmTest, Channels) { - // Testing number of invalid channels. - TestChangingChannels(0, apm_->kBadNumberChannelsError); - TestChangingChannels(3, apm_->kBadNumberChannelsError); - // Testing number of valid channels. - for (int i = 1; i < 3; i++) { - TestChangingChannels(i, kNoErr); +void ApmTest::TestChangingForwardChannels( + int num_in_channels, + int num_out_channels, + AudioProcessing::Error expected_return) { + const StreamConfig input_stream = {frame_->sample_rate_hz_, num_in_channels}; + const StreamConfig output_stream = {output_sample_rate_hz_, num_out_channels}; + + EXPECT_EQ(expected_return, + apm_->ProcessStream(float_cb_->channels(), input_stream, + output_stream, float_cb_->channels())); +} + +void ApmTest::TestChangingReverseChannels( + int num_rev_channels, + AudioProcessing::Error expected_return) { + const ProcessingConfig processing_config = { + {{ frame_->sample_rate_hz_, apm_->num_input_channels() }, + { output_sample_rate_hz_, apm_->num_output_channels() }, + { frame_->sample_rate_hz_, num_rev_channels }}}; + + EXPECT_EQ(expected_return, + apm_->AnalyzeReverseStream(float_cb_->channels(), + processing_config.reverse_stream())); +} + +TEST_F(ApmTest, ChannelsInt16Interface) { + // Testing number of invalid and valid channels. + Init(16000, 16000, 16000, 4, 4, 4, false); + + TestChangingChannelsInt16Interface(0, apm_->kBadNumberChannelsError); + + for (int i = 1; i < 4; i++) { + TestChangingChannelsInt16Interface(i, kNoErr); EXPECT_EQ(i, apm_->num_input_channels()); // We always force the number of reverse channels used for processing to 1. EXPECT_EQ(1, apm_->num_reverse_channels()); } } +TEST_F(ApmTest, Channels) { + // Testing number of invalid and valid channels. + Init(16000, 16000, 16000, 4, 4, 4, false); + + TestChangingForwardChannels(0, 1, apm_->kBadNumberChannelsError); + TestChangingReverseChannels(0, apm_->kBadNumberChannelsError); + + for (int i = 1; i < 4; ++i) { + for (int j = 0; j < 1; ++j) { + // Output channels much be one or match input channels. + if (j == 1 || i == j) { + TestChangingForwardChannels(i, j, kNoErr); + TestChangingReverseChannels(i, kNoErr); + + EXPECT_EQ(i, apm_->num_input_channels()); + EXPECT_EQ(j, apm_->num_output_channels()); + // The number of reverse channels used for processing to is always 1. + EXPECT_EQ(1, apm_->num_reverse_channels()); + } else { + TestChangingForwardChannels(i, j, + AudioProcessing::kBadNumberChannelsError); + } + } + } +} + TEST_F(ApmTest, SampleRatesInt) { // Testing invalid sample rates SetContainerFormat(10000, 2, frame_, &float_cb_); @@ -2294,12 +2351,9 @@ class AudioProcessingTest config.Set<ExperimentalAgc>(new ExperimentalAgc(false)); rtc::scoped_ptr<AudioProcessing> ap(AudioProcessing::Create(config)); EnableAllAPComponents(ap.get()); - ap->Initialize(input_rate, - output_rate, - reverse_rate, - LayoutFromChannels(num_input_channels), - LayoutFromChannels(num_output_channels), - LayoutFromChannels(num_reverse_channels)); + ap->Initialize({{{input_rate, num_input_channels}, + {output_rate, num_output_channels}, + {reverse_rate, num_reverse_channels}}}); FILE* far_file = fopen(ResourceFilePath("far", reverse_rate).c_str(), "rb"); FILE* near_file = fopen(ResourceFilePath("near", input_rate).c_str(), "rb"); diff --git a/webrtc/modules/audio_processing/test/audioproc_float.cc b/webrtc/modules/audio_processing/test/audioproc_float.cc index dac43629cf..d2983b2c56 100644 --- a/webrtc/modules/audio_processing/test/audioproc_float.cc +++ b/webrtc/modules/audio_processing/test/audioproc_float.cc @@ -127,6 +127,13 @@ int main(int argc, char* argv[]) { TickTime processing_start_time; TickInterval accumulated_time; int num_chunks = 0; + + const StreamConfig input_config = { + in_file.sample_rate(), in_buf.num_channels(), + }; + const StreamConfig output_config = { + out_file.sample_rate(), out_buf.num_channels(), + }; while (in_file.ReadSamples(in_interleaved.size(), &in_interleaved[0]) == in_interleaved.size()) { // Have logs display the file time rather than wallclock time. @@ -139,14 +146,8 @@ int main(int argc, char* argv[]) { if (FLAGS_perf) { processing_start_time = TickTime::Now(); } - CHECK_EQ(kNoErr, - ap->ProcessStream(in_buf.channels(), - in_buf.num_frames(), - in_file.sample_rate(), - LayoutFromChannels(in_buf.num_channels()), - out_file.sample_rate(), - LayoutFromChannels(out_buf.num_channels()), - out_buf.channels())); + CHECK_EQ(kNoErr, ap->ProcessStream(in_buf.channels(), input_config, + output_config, out_buf.channels())); if (FLAGS_perf) { accumulated_time += TickTime::Now() - processing_start_time; } |