diff options
author | andrew@webrtc.org <andrew@webrtc.org> | 2014-10-30 03:40:10 +0000 |
---|---|---|
committer | andrew@webrtc.org <andrew@webrtc.org> | 2014-10-30 03:40:10 +0000 |
commit | bce1329490c4cc7c1313cfee1afa41c721daa699 (patch) | |
tree | 3fe11a275d4ddf39ac3816592a590461c3b238ff | |
parent | 7b5a8968dbc3040e1739f839522ef7d09f8815ea (diff) | |
download | webrtc-bce1329490c4cc7c1313cfee1afa41c721daa699.tar.gz |
Refactor audio conversion functions.
Use a consistent naming scheme that can be understood at the callsite
without having to refer to documentation.
Remove hacks in AudioBuffer intended to maintain bit-exactness with the
float path. The conversions etc. are now all natural, and instead we
enforce close but not bit-exact output between the two paths.
Output of ApmTest.Process:
https://paste.googleplex.com/5931055831842816
R=aluebs@webrtc.org, bjornv@webrtc.org, kwiberg@webrtc.org
Review URL: https://webrtc-codereview.appspot.com/13049004
git-svn-id: http://webrtc.googlecode.com/svn/trunk/webrtc@7561 4adac7df-926f-26a2-2b94-8c16560cd09d
-rw-r--r-- | common_audio/audio_util.cc | 22 | ||||
-rw-r--r-- | common_audio/audio_util_unittest.cc | 48 | ||||
-rw-r--r-- | common_audio/include/audio_util.h | 54 | ||||
-rw-r--r-- | common_audio/resampler/push_sinc_resampler.cc | 2 | ||||
-rw-r--r-- | common_audio/resampler/push_sinc_resampler_unittest.cc | 9 | ||||
-rw-r--r-- | common_audio/wav_writer.cc | 2 | ||||
-rw-r--r-- | modules/audio_processing/audio_buffer.cc | 38 | ||||
-rw-r--r-- | modules/audio_processing/test/audio_processing_unittest.cc | 53 | ||||
-rw-r--r-- | modules/audio_processing/test/test_utils.h | 23 |
9 files changed, 155 insertions, 96 deletions
diff --git a/common_audio/audio_util.cc b/common_audio/audio_util.cc index f2936b07..2047295c 100644 --- a/common_audio/audio_util.cc +++ b/common_audio/audio_util.cc @@ -14,19 +14,29 @@ namespace webrtc { -void RoundToInt16(const float* src, size_t size, int16_t* dest) { +void FloatToS16(const float* src, size_t size, int16_t* dest) { for (size_t i = 0; i < size; ++i) - dest[i] = RoundToInt16(src[i]); + dest[i] = FloatToS16(src[i]); } -void ScaleAndRoundToInt16(const float* src, size_t size, int16_t* dest) { +void S16ToFloat(const int16_t* src, size_t size, float* dest) { for (size_t i = 0; i < size; ++i) - dest[i] = ScaleAndRoundToInt16(src[i]); + dest[i] = S16ToFloat(src[i]); } -void ScaleToFloat(const int16_t* src, size_t size, float* dest) { +void FloatS16ToS16(const float* src, size_t size, int16_t* dest) { for (size_t i = 0; i < size; ++i) - dest[i] = ScaleToFloat(src[i]); + dest[i] = FloatS16ToS16(src[i]); +} + +void FloatToFloatS16(const float* src, size_t size, float* dest) { + for (size_t i = 0; i < size; ++i) + dest[i] = FloatToFloatS16(src[i]); +} + +void FloatS16ToFloat(const float* src, size_t size, float* dest) { + for (size_t i = 0; i < size; ++i) + dest[i] = FloatS16ToFloat(src[i]); } } // namespace webrtc diff --git a/common_audio/audio_util_unittest.cc b/common_audio/audio_util_unittest.cc index bf9ad812..2cdf5381 100644 --- a/common_audio/audio_util_unittest.cc +++ b/common_audio/audio_util_unittest.cc @@ -26,35 +26,59 @@ void ExpectArraysEq(const float* ref, const float* test, int length) { } } -TEST(AudioUtilTest, RoundToInt16) { +TEST(AudioUtilTest, FloatToS16) { + const int kSize = 9; + const float kInput[kSize] = { + 0.f, 0.4f / 32767.f, 0.6f / 32767.f, -0.4f / 32768.f, -0.6f / 32768.f, + 1.f, -1.f, 1.1f, -1.1f}; + const int16_t kReference[kSize] = { + 0, 0, 1, 0, -1, 32767, -32768, 32767, -32768}; + int16_t output[kSize]; + FloatToS16(kInput, kSize, output); + ExpectArraysEq(kReference, output, kSize); +} + +TEST(AudioUtilTest, S16ToFloat) { + const int kSize = 7; + const int16_t kInput[kSize] = {0, 1, -1, 16384, -16384, 32767, -32768}; + const float kReference[kSize] = { + 0.f, 1.f / 32767.f, -1.f / 32768.f, 16384.f / 32767.f, -0.5f, 1.f, -1.f}; + float output[kSize]; + S16ToFloat(kInput, kSize, output); + ExpectArraysEq(kReference, output, kSize); +} + +TEST(AudioUtilTest, FloatS16ToS16) { const int kSize = 7; const float kInput[kSize] = { 0.f, 0.4f, 0.5f, -0.4f, -0.5f, 32768.f, -32769.f}; const int16_t kReference[kSize] = {0, 0, 1, 0, -1, 32767, -32768}; int16_t output[kSize]; - RoundToInt16(kInput, kSize, output); + FloatS16ToS16(kInput, kSize, output); ExpectArraysEq(kReference, output, kSize); } -TEST(AudioUtilTest, ScaleAndRoundToInt16) { +TEST(AudioUtilTest, FloatToFloatS16) { const int kSize = 9; const float kInput[kSize] = { 0.f, 0.4f / 32767.f, 0.6f / 32767.f, -0.4f / 32768.f, -0.6f / 32768.f, 1.f, -1.f, 1.1f, -1.1f}; - const int16_t kReference[kSize] = { - 0, 0, 1, 0, -1, 32767, -32768, 32767, -32768}; - int16_t output[kSize]; - ScaleAndRoundToInt16(kInput, kSize, output); + const float kReference[kSize] = { + 0.f, 0.4f, 0.6f, -0.4f, -0.6f, 32767.f, -32768.f, 36043.7f, -36044.8f}; + float output[kSize]; + FloatToFloatS16(kInput, kSize, output); ExpectArraysEq(kReference, output, kSize); } -TEST(AudioUtilTest, ScaleToFloat) { - const int kSize = 7; - const int16_t kInput[kSize] = {0, 1, -1, 16384, -16384, 32767, -32768}; +TEST(AudioUtilTest, FloatS16ToFloat) { + const int kSize = 9; + const float kInput[kSize] = { + 0.f, 0.4f, 0.6f, -0.4f, -0.6f, 32767.f, -32768.f, 36043.7f, -36044.8f}; const float kReference[kSize] = { - 0.f, 1.f / 32767.f, -1.f / 32768.f, 16384.f / 32767.f, -0.5f, 1.f, -1.f}; + 0.f, 0.4f / 32767.f, 0.6f / 32767.f, -0.4f / 32768.f, -0.6f / 32768.f, + 1.f, -1.f, 1.1f, -1.1f}; float output[kSize]; - ScaleToFloat(kInput, kSize, output); + FloatS16ToFloat(kInput, kSize, output); ExpectArraysEq(kReference, output, kSize); } diff --git a/common_audio/include/audio_util.h b/common_audio/include/audio_util.h index 0ce034be..5a4e8151 100644 --- a/common_audio/include/audio_util.h +++ b/common_audio/include/audio_util.h @@ -20,18 +20,11 @@ namespace webrtc { typedef std::numeric_limits<int16_t> limits_int16; -static inline int16_t RoundToInt16(float v) { - const float kMaxRound = limits_int16::max() - 0.5f; - const float kMinRound = limits_int16::min() + 0.5f; - if (v > 0) - return v >= kMaxRound ? limits_int16::max() : - static_cast<int16_t>(v + 0.5f); - return v <= kMinRound ? limits_int16::min() : - static_cast<int16_t>(v - 0.5f); -} - -// Scale (from [-1, 1]) and round to full-range int16 with clamping. -static inline int16_t ScaleAndRoundToInt16(float v) { +// The conversion functions use the following naming convention: +// S16: int16_t [-32768, 32767] +// Float: float [-1.0, 1.0] +// FloatS16: float [-32768.0, 32767.0] +static inline int16_t FloatToS16(float v) { if (v > 0) return v >= 1 ? limits_int16::max() : static_cast<int16_t>(v * limits_int16::max() + 0.5f); @@ -39,22 +32,37 @@ static inline int16_t ScaleAndRoundToInt16(float v) { static_cast<int16_t>(-v * limits_int16::min() - 0.5f); } -// Scale to float [-1, 1]. -static inline float ScaleToFloat(int16_t v) { - const float kMaxInt16Inverse = 1.f / limits_int16::max(); - const float kMinInt16Inverse = 1.f / limits_int16::min(); +static inline float S16ToFloat(int16_t v) { + static const float kMaxInt16Inverse = 1.f / limits_int16::max(); + static const float kMinInt16Inverse = 1.f / limits_int16::min(); return v * (v > 0 ? kMaxInt16Inverse : -kMinInt16Inverse); } -// Round |size| elements of |src| to int16 with clamping and write to |dest|. -void RoundToInt16(const float* src, size_t size, int16_t* dest); +static inline int16_t FloatS16ToS16(float v) { + static const float kMaxRound = limits_int16::max() - 0.5f; + static const float kMinRound = limits_int16::min() + 0.5f; + if (v > 0) + return v >= kMaxRound ? limits_int16::max() : + static_cast<int16_t>(v + 0.5f); + return v <= kMinRound ? limits_int16::min() : + static_cast<int16_t>(v - 0.5f); +} -// Scale (from [-1, 1]) and round |size| elements of |src| to full-range int16 -// with clamping and write to |dest|. -void ScaleAndRoundToInt16(const float* src, size_t size, int16_t* dest); +static inline float FloatToFloatS16(float v) { + return v > 0 ? v * limits_int16::max() : -v * limits_int16::min(); +} + +static inline float FloatS16ToFloat(float v) { + static const float kMaxInt16Inverse = 1.f / limits_int16::max(); + static const float kMinInt16Inverse = 1.f / limits_int16::min(); + return v * (v > 0 ? kMaxInt16Inverse : -kMinInt16Inverse); +} -// Scale |size| elements of |src| to float [-1, 1] and write to |dest|. -void ScaleToFloat(const int16_t* src, size_t size, float* dest); +void FloatToS16(const float* src, size_t size, int16_t* dest); +void S16ToFloat(const int16_t* src, size_t size, float* dest); +void FloatS16ToS16(const float* src, size_t size, int16_t* dest); +void FloatToFloatS16(const float* src, size_t size, float* dest); +void FloatS16ToFloat(const float* src, size_t size, float* dest); // Deinterleave audio from |interleaved| to the channel buffers pointed to // by |deinterleaved|. There must be sufficient space allocated in the diff --git a/common_audio/resampler/push_sinc_resampler.cc b/common_audio/resampler/push_sinc_resampler.cc index 02755590..49e2e12e 100644 --- a/common_audio/resampler/push_sinc_resampler.cc +++ b/common_audio/resampler/push_sinc_resampler.cc @@ -40,7 +40,7 @@ int PushSincResampler::Resample(const int16_t* source, source_ptr_int_ = source; // Pass NULL as the float source to have Run() read from the int16 source. Resample(NULL, source_length, float_buffer_.get(), destination_frames_); - RoundToInt16(float_buffer_.get(), destination_frames_, destination); + FloatS16ToS16(float_buffer_.get(), destination_frames_, destination); source_ptr_int_ = NULL; return destination_frames_; } diff --git a/common_audio/resampler/push_sinc_resampler_unittest.cc b/common_audio/resampler/push_sinc_resampler_unittest.cc index 1ca4fdf9..90ac0cf0 100644 --- a/common_audio/resampler/push_sinc_resampler_unittest.cc +++ b/common_audio/resampler/push_sinc_resampler_unittest.cc @@ -160,16 +160,15 @@ void PushSincResamplerTest::ResampleTest(bool int_format) { resampler_source.Run(input_samples, source.get()); if (int_format) { for (int i = 0; i < kNumBlocks; ++i) { - ScaleAndRoundToInt16( - &source[i * input_block_size], input_block_size, source_int.get()); + FloatToS16(&source[i * input_block_size], input_block_size, + source_int.get()); EXPECT_EQ(output_block_size, resampler.Resample(source_int.get(), input_block_size, destination_int.get(), output_block_size)); - ScaleToFloat(destination_int.get(), - output_block_size, - &resampled_destination[i * output_block_size]); + S16ToFloat(destination_int.get(), output_block_size, + &resampled_destination[i * output_block_size]); } } else { for (int i = 0; i < kNumBlocks; ++i) { diff --git a/common_audio/wav_writer.cc b/common_audio/wav_writer.cc index 30a220c2..52449789 100644 --- a/common_audio/wav_writer.cc +++ b/common_audio/wav_writer.cc @@ -68,7 +68,7 @@ void WavFile::WriteSamples(const float* samples, size_t num_samples) { for (size_t i = 0; i < num_samples; i += kChunksize) { int16_t isamples[kChunksize]; const size_t chunk = std::min(kChunksize, num_samples - i); - RoundToInt16(samples + i, chunk, isamples); + FloatS16ToS16(samples + i, chunk, isamples); WriteSamples(isamples, chunk); } } diff --git a/modules/audio_processing/audio_buffer.cc b/modules/audio_processing/audio_buffer.cc index 8aff61cc..99470601 100644 --- a/modules/audio_processing/audio_buffer.cc +++ b/modules/audio_processing/audio_buffer.cc @@ -51,18 +51,11 @@ int KeyboardChannelIndex(AudioProcessing::ChannelLayout layout) { return -1; } -void StereoToMono(const float* left, const float* right, float* out, +template <typename T> +void StereoToMono(const T* left, const T* right, T* out, int samples_per_channel) { - for (int i = 0; i < samples_per_channel; ++i) { + for (int i = 0; i < samples_per_channel; ++i) out[i] = (left[i] + right[i]) / 2; - } -} - -void StereoToMono(const int16_t* left, const int16_t* right, int16_t* out, - int samples_per_channel) { - for (int i = 0; i < samples_per_channel; ++i) { - out[i] = (left[i] + right[i]) >> 1; - } } } // namespace @@ -114,13 +107,7 @@ class IFChannelBuffer { void RefreshI() { if (!ivalid_) { assert(fvalid_); - const float* const float_data = fbuf_.data(); - int16_t* const int_data = ibuf_.data(); - const int length = ibuf_.length(); - for (int i = 0; i < length; ++i) - int_data[i] = WEBRTC_SPL_SAT(std::numeric_limits<int16_t>::max(), - float_data[i], - std::numeric_limits<int16_t>::min()); + FloatS16ToS16(fbuf_.data(), ibuf_.length(), ibuf_.data()); ivalid_ = true; } } @@ -230,8 +217,8 @@ void AudioBuffer::CopyFrom(const float* const* data, // Convert to int16. for (int i = 0; i < num_proc_channels_; ++i) { - ScaleAndRoundToInt16(data_ptr[i], proc_samples_per_channel_, - channels_->ibuf()->channel(i)); + FloatToFloatS16(data_ptr[i], proc_samples_per_channel_, + channels_->fbuf()->channel(i)); } } @@ -248,9 +235,9 @@ void AudioBuffer::CopyTo(int samples_per_channel, data_ptr = process_buffer_->channels(); } for (int i = 0; i < num_proc_channels_; ++i) { - ScaleToFloat(channels_->ibuf()->channel(i), - proc_samples_per_channel_, - data_ptr[i]); + FloatS16ToFloat(channels_->fbuf()->channel(i), + proc_samples_per_channel_, + data_ptr[i]); } // Resample. @@ -449,12 +436,7 @@ void AudioBuffer::DeinterleaveFrom(AudioFrame* frame) { // Downmix directly; no explicit deinterleaving needed. int16_t* downmixed = channels_->ibuf()->channel(0); for (int i = 0; i < input_samples_per_channel_; ++i) { - // HACK(ajm): The downmixing in the int16_t path is in practice never - // called from production code. We do this weird scaling to and from float - // to satisfy tests checking for bit-exactness with the float path. - float downmix_float = (ScaleToFloat(frame->data_[i * 2]) + - ScaleToFloat(frame->data_[i * 2 + 1])) / 2; - downmixed[i] = ScaleAndRoundToInt16(downmix_float); + downmixed[i] = (frame->data_[i * 2] + frame->data_[i * 2 + 1]) / 2; } } else { assert(num_proc_channels_ == num_input_channels_); diff --git a/modules/audio_processing/test/audio_processing_unittest.cc b/modules/audio_processing/test/audio_processing_unittest.cc index a0fb303b..af31a636 100644 --- a/modules/audio_processing/test/audio_processing_unittest.cc +++ b/modules/audio_processing/test/audio_processing_unittest.cc @@ -66,9 +66,9 @@ void ConvertToFloat(const int16_t* int_data, ChannelBuffer<float>* cb) { cb->samples_per_channel(), cb->num_channels(), cb_int.channels()); - ScaleToFloat(cb_int.data(), - cb->samples_per_channel() * cb->num_channels(), - cb->data()); + S16ToFloat(cb_int.data(), + cb->samples_per_channel() * cb->num_channels(), + cb->data()); } void ConvertToFloat(const AudioFrame& frame, ChannelBuffer<float>* cb) { @@ -135,7 +135,7 @@ void SetFrameTo(AudioFrame* frame, int16_t left, int16_t right) { void ScaleFrame(AudioFrame* frame, float scale) { for (int i = 0; i < frame->samples_per_channel_ * frame->num_channels_; ++i) { - frame->data_[i] = RoundToInt16(frame->data_[i] * scale); + frame->data_[i] = FloatS16ToS16(frame->data_[i] * scale); } } @@ -1650,7 +1650,7 @@ TEST_F(ApmTest, DebugDumpFromFileHandle) { #endif // WEBRTC_AUDIOPROC_DEBUG_DUMP } -TEST_F(ApmTest, FloatAndIntInterfacesGiveIdenticalResults) { +TEST_F(ApmTest, FloatAndIntInterfacesGiveSimilarResults) { audioproc::OutputData ref_data; OpenFileAndReadMessage(ref_filename_, &ref_data); @@ -1679,7 +1679,8 @@ TEST_F(ApmTest, FloatAndIntInterfacesGiveIdenticalResults) { Init(fapm.get()); ChannelBuffer<int16_t> output_cb(samples_per_channel, num_input_channels); - scoped_ptr<int16_t[]> output_int16(new int16_t[output_length]); + ChannelBuffer<int16_t> output_int16(samples_per_channel, + num_input_channels); int analog_level = 127; while (ReadFrame(far_file_, revframe_, revfloat_cb_.get()) && @@ -1701,7 +1702,9 @@ TEST_F(ApmTest, FloatAndIntInterfacesGiveIdenticalResults) { EXPECT_NOERR(fapm->gain_control()->set_stream_analog_level(analog_level)); EXPECT_NOERR(apm_->ProcessStream(frame_)); - // TODO(ajm): Update to support different output rates. + Deinterleave(frame_->data_, samples_per_channel, num_output_channels, + output_int16.channels()); + EXPECT_NOERR(fapm->ProcessStream( float_cb_->channels(), samples_per_channel, @@ -1711,24 +1714,34 @@ TEST_F(ApmTest, FloatAndIntInterfacesGiveIdenticalResults) { LayoutFromChannels(num_output_channels), float_cb_->channels())); - // Convert to interleaved int16. - ScaleAndRoundToInt16(float_cb_->data(), output_length, output_cb.data()); - Interleave(output_cb.channels(), - samples_per_channel, - num_output_channels, - output_int16.get()); - // Verify float and int16 paths produce identical output. - EXPECT_EQ(0, memcmp(frame_->data_, output_int16.get(), output_length)); + FloatToS16(float_cb_->data(), output_length, output_cb.data()); + for (int j = 0; j < num_output_channels; ++j) { + float variance = 0; + float snr = ComputeSNR(output_int16.channel(j), output_cb.channel(j), + samples_per_channel, &variance); + #if defined(WEBRTC_AUDIOPROC_FIXED_PROFILE) + // There are a few chunks in the fixed-point profile that give low SNR. + // Listening confirmed the difference is acceptable. + const float kVarianceThreshold = 150; + const float kSNRThreshold = 10; + #else + const float kVarianceThreshold = 20; + const float kSNRThreshold = 20; + #endif + // Skip frames with low energy. + if (sqrt(variance) > kVarianceThreshold) { + EXPECT_LT(kSNRThreshold, snr); + } + } analog_level = fapm->gain_control()->stream_analog_level(); EXPECT_EQ(apm_->gain_control()->stream_analog_level(), fapm->gain_control()->stream_analog_level()); EXPECT_EQ(apm_->echo_cancellation()->stream_has_echo(), fapm->echo_cancellation()->stream_has_echo()); - EXPECT_EQ(apm_->voice_detection()->stream_has_voice(), - fapm->voice_detection()->stream_has_voice()); - EXPECT_EQ(apm_->noise_suppression()->speech_probability(), - fapm->noise_suppression()->speech_probability()); + EXPECT_NEAR(apm_->noise_suppression()->speech_probability(), + fapm->noise_suppression()->speech_probability(), + 0.0005); // Reset in case of downmixing. frame_->num_channels_ = test->num_input_channels(); @@ -2002,7 +2015,7 @@ bool ReadChunk(FILE* file, int16_t* int_data, float* float_data, return false; // This is expected. } - ScaleToFloat(int_data, frame_size, float_data); + S16ToFloat(int_data, frame_size, float_data); if (cb->num_channels() == 1) { MixStereoToMono(float_data, cb->data(), cb->samples_per_channel()); } else { diff --git a/modules/audio_processing/test/test_utils.h b/modules/audio_processing/test/test_utils.h index 61edd8f3..a99f3427 100644 --- a/modules/audio_processing/test/test_utils.h +++ b/modules/audio_processing/test/test_utils.h @@ -8,6 +8,7 @@ * be found in the AUTHORS file in the root of the source tree. */ +#include <math.h> #include <limits> #include "webrtc/audio_processing/debug.pb.h" @@ -153,4 +154,26 @@ static inline bool ReadMessageFromFile(FILE* file, return msg->ParseFromArray(bytes.get(), size); } +template <typename T> +float ComputeSNR(const T* ref, const T* test, int length, float* variance) { + float mse = 0; + float mean = 0; + *variance = 0; + for (int i = 0; i < length; ++i) { + T error = ref[i] - test[i]; + mse += error * error; + *variance += ref[i] * ref[i]; + mean += ref[i]; + } + mse /= length; + *variance /= length; + mean /= length; + *variance -= mean * mean; + + float snr = 100; // We assign 100 dB to the zero-error case. + if (mse > 0) + snr = 10 * log10(*variance / mse); + return snr; +} + } // namespace webrtc |