Refactor audio conversion functions.

Use a consistent naming scheme that can be understood at the callsite without having to refer to documentation. Remove hacks in AudioBuffer intended to maintain bit-exactness with the float path. The conversions etc. are now all natural, and instead we enforce close but not bit-exact output between the two paths. Output of ApmTest.Process: https://paste.googleplex.com/5931055831842816 R=aluebs@webrtc.org, bjornv@webrtc.org, kwiberg@webrtc.org Review URL: https://webrtc-codereview.appspot.com/13049004 git-svn-id: http://webrtc.googlecode.com/svn/trunk/webrtc@7561 4adac7df-926f-26a2-2b94-8c16560cd09d
author: andrew@webrtc.org <andrew@webrtc.org> 2014-10-30 03:40:10 +0000
committer: andrew@webrtc.org <andrew@webrtc.org> 2014-10-30 03:40:10 +0000
commit: bce1329490c4cc7c1313cfee1afa41c721daa699 (patch)
tree: 3fe11a275d4ddf39ac3816592a590461c3b238ff
parent: 7b5a8968dbc3040e1739f839522ef7d09f8815ea (diff)
download: webrtc-bce1329490c4cc7c1313cfee1afa41c721daa699.tar.gz
9 files changed, 155 insertions, 96 deletions
diff --git a/common_audio/audio_util.cc b/common_audio/audio_util.cc
index f2936b07..2047295c 100644
--- a/common_audio/audio_util.cc
+++ b/common_audio/audio_util.cc
@@ -14,19 +14,29 @@
 
 namespace webrtc {
 
-void RoundToInt16(const float* src, size_t size, int16_t* dest) {
+void FloatToS16(const float* src, size_t size, int16_t* dest) {
   for (size_t i = 0; i < size; ++i)
-    dest[i] = RoundToInt16(src[i]);
+    dest[i] = FloatToS16(src[i]);
 }
 
-void ScaleAndRoundToInt16(const float* src, size_t size, int16_t* dest) {
+void S16ToFloat(const int16_t* src, size_t size, float* dest) {
   for (size_t i = 0; i < size; ++i)
-    dest[i] = ScaleAndRoundToInt16(src[i]);
+    dest[i] = S16ToFloat(src[i]);
 }
 
-void ScaleToFloat(const int16_t* src, size_t size, float* dest) {
+void FloatS16ToS16(const float* src, size_t size, int16_t* dest) {
   for (size_t i = 0; i < size; ++i)
-    dest[i] = ScaleToFloat(src[i]);
+    dest[i] = FloatS16ToS16(src[i]);
+}
+
+void FloatToFloatS16(const float* src, size_t size, float* dest) {
+  for (size_t i = 0; i < size; ++i)
+    dest[i] = FloatToFloatS16(src[i]);
+}
+
+void FloatS16ToFloat(const float* src, size_t size, float* dest) {
+  for (size_t i = 0; i < size; ++i)
+    dest[i] = FloatS16ToFloat(src[i]);
 }
 
 }  // namespace webrtc
diff --git a/common_audio/audio_util_unittest.cc b/common_audio/audio_util_unittest.cc
index bf9ad812..2cdf5381 100644
--- a/common_audio/audio_util_unittest.cc
+++ b/common_audio/audio_util_unittest.cc
@@ -26,35 +26,59 @@ void ExpectArraysEq(const float* ref, const float* test, int length) {
   }
 }
 
-TEST(AudioUtilTest, RoundToInt16) {
+TEST(AudioUtilTest, FloatToS16) {
+  const int kSize = 9;
+  const float kInput[kSize] = {
+      0.f, 0.4f / 32767.f, 0.6f / 32767.f, -0.4f / 32768.f, -0.6f / 32768.f,
+      1.f, -1.f, 1.1f, -1.1f};
+  const int16_t kReference[kSize] = {
+    0, 0, 1, 0, -1, 32767, -32768, 32767, -32768};
+  int16_t output[kSize];
+  FloatToS16(kInput, kSize, output);
+  ExpectArraysEq(kReference, output, kSize);
+}
+
+TEST(AudioUtilTest, S16ToFloat) {
+  const int kSize = 7;
+  const int16_t kInput[kSize] = {0, 1, -1, 16384, -16384, 32767, -32768};
+  const float kReference[kSize] = {
+      0.f, 1.f / 32767.f, -1.f / 32768.f, 16384.f / 32767.f, -0.5f, 1.f, -1.f};
+  float output[kSize];
+  S16ToFloat(kInput, kSize, output);
+  ExpectArraysEq(kReference, output, kSize);
+}
+
+TEST(AudioUtilTest, FloatS16ToS16) {
   const int kSize = 7;
   const float kInput[kSize] = {
       0.f, 0.4f, 0.5f, -0.4f, -0.5f, 32768.f, -32769.f};
   const int16_t kReference[kSize] = {0, 0, 1, 0, -1, 32767, -32768};
   int16_t output[kSize];
-  RoundToInt16(kInput, kSize, output);
+  FloatS16ToS16(kInput, kSize, output);
   ExpectArraysEq(kReference, output, kSize);
 }
 
-TEST(AudioUtilTest, ScaleAndRoundToInt16) {
+TEST(AudioUtilTest, FloatToFloatS16) {
   const int kSize = 9;
   const float kInput[kSize] = {
       0.f, 0.4f / 32767.f, 0.6f / 32767.f, -0.4f / 32768.f, -0.6f / 32768.f,
       1.f, -1.f, 1.1f, -1.1f};
-  const int16_t kReference[kSize] = {
-    0, 0, 1, 0, -1, 32767, -32768, 32767, -32768};
-  int16_t output[kSize];
-  ScaleAndRoundToInt16(kInput, kSize, output);
+  const float kReference[kSize] = {
+    0.f, 0.4f, 0.6f, -0.4f, -0.6f, 32767.f, -32768.f, 36043.7f, -36044.8f};
+  float output[kSize];
+  FloatToFloatS16(kInput, kSize, output);
   ExpectArraysEq(kReference, output, kSize);
 }
 
-TEST(AudioUtilTest, ScaleToFloat) {
-  const int kSize = 7;
-  const int16_t kInput[kSize] = {0, 1, -1, 16384, -16384, 32767, -32768};
+TEST(AudioUtilTest, FloatS16ToFloat) {
+  const int kSize = 9;
+  const float kInput[kSize] = {
+    0.f, 0.4f, 0.6f, -0.4f, -0.6f, 32767.f, -32768.f, 36043.7f, -36044.8f};
   const float kReference[kSize] = {
-      0.f, 1.f / 32767.f, -1.f / 32768.f, 16384.f / 32767.f, -0.5f, 1.f, -1.f};
+      0.f, 0.4f / 32767.f, 0.6f / 32767.f, -0.4f / 32768.f, -0.6f / 32768.f,
+      1.f, -1.f, 1.1f, -1.1f};
   float output[kSize];
-  ScaleToFloat(kInput, kSize, output);
+  FloatS16ToFloat(kInput, kSize, output);
   ExpectArraysEq(kReference, output, kSize);
 }
 
diff --git a/common_audio/include/audio_util.h b/common_audio/include/audio_util.h
index 0ce034be..5a4e8151 100644
--- a/common_audio/include/audio_util.h
+++ b/common_audio/include/audio_util.h
@@ -20,18 +20,11 @@ namespace webrtc {
 
 typedef std::numeric_limits<int16_t> limits_int16;
 
-static inline int16_t RoundToInt16(float v) {
-  const float kMaxRound = limits_int16::max() - 0.5f;
-  const float kMinRound = limits_int16::min() + 0.5f;
-  if (v > 0)
-    return v >= kMaxRound ? limits_int16::max() :
-                            static_cast<int16_t>(v + 0.5f);
-  return v <= kMinRound ? limits_int16::min() :
-                          static_cast<int16_t>(v - 0.5f);
-}
-
-// Scale (from [-1, 1]) and round to full-range int16 with clamping.
-static inline int16_t ScaleAndRoundToInt16(float v) {
+// The conversion functions use the following naming convention:
+// S16:      int16_t [-32768, 32767]
+// Float:    float   [-1.0, 1.0]
+// FloatS16: float   [-32768.0, 32767.0]
+static inline int16_t FloatToS16(float v) {
   if (v > 0)
     return v >= 1 ? limits_int16::max() :
                     static_cast<int16_t>(v * limits_int16::max() + 0.5f);
@@ -39,22 +32,37 @@ static inline int16_t ScaleAndRoundToInt16(float v) {
                    static_cast<int16_t>(-v * limits_int16::min() - 0.5f);
 }
 
-// Scale to float [-1, 1].
-static inline float ScaleToFloat(int16_t v) {
-  const float kMaxInt16Inverse = 1.f / limits_int16::max();
-  const float kMinInt16Inverse = 1.f / limits_int16::min();
+static inline float S16ToFloat(int16_t v) {
+  static const float kMaxInt16Inverse = 1.f / limits_int16::max();
+  static const float kMinInt16Inverse = 1.f / limits_int16::min();
   return v * (v > 0 ? kMaxInt16Inverse : -kMinInt16Inverse);
 }
 
-// Round |size| elements of |src| to int16 with clamping and write to |dest|.
-void RoundToInt16(const float* src, size_t size, int16_t* dest);
+static inline int16_t FloatS16ToS16(float v) {
+  static const float kMaxRound = limits_int16::max() - 0.5f;
+  static const float kMinRound = limits_int16::min() + 0.5f;
+  if (v > 0)
+    return v >= kMaxRound ? limits_int16::max() :
+                            static_cast<int16_t>(v + 0.5f);
+  return v <= kMinRound ? limits_int16::min() :
+                          static_cast<int16_t>(v - 0.5f);
+}
 
-// Scale (from [-1, 1]) and round |size| elements of |src| to full-range int16
-// with clamping and write to |dest|.
-void ScaleAndRoundToInt16(const float* src, size_t size, int16_t* dest);
+static inline float FloatToFloatS16(float v) {
+  return v > 0 ? v * limits_int16::max() : -v * limits_int16::min();
+}
+
+static inline float FloatS16ToFloat(float v) {
+  static const float kMaxInt16Inverse = 1.f / limits_int16::max();
+  static const float kMinInt16Inverse = 1.f / limits_int16::min();
+  return v * (v > 0 ? kMaxInt16Inverse : -kMinInt16Inverse);
+}
 
-// Scale |size| elements of |src| to float [-1, 1] and write to |dest|.
-void ScaleToFloat(const int16_t* src, size_t size, float* dest);
+void FloatToS16(const float* src, size_t size, int16_t* dest);
+void S16ToFloat(const int16_t* src, size_t size, float* dest);
+void FloatS16ToS16(const float* src, size_t size, int16_t* dest);
+void FloatToFloatS16(const float* src, size_t size, float* dest);
+void FloatS16ToFloat(const float* src, size_t size, float* dest);
 
 // Deinterleave audio from |interleaved| to the channel buffers pointed to
 // by |deinterleaved|. There must be sufficient space allocated in the
diff --git a/common_audio/resampler/push_sinc_resampler.cc b/common_audio/resampler/push_sinc_resampler.cc
index 02755590..49e2e12e 100644
--- a/common_audio/resampler/push_sinc_resampler.cc
+++ b/common_audio/resampler/push_sinc_resampler.cc
@@ -40,7 +40,7 @@ int PushSincResampler::Resample(const int16_t* source,
   source_ptr_int_ = source;
   // Pass NULL as the float source to have Run() read from the int16 source.
   Resample(NULL, source_length, float_buffer_.get(), destination_frames_);
-  RoundToInt16(float_buffer_.get(), destination_frames_, destination);
+  FloatS16ToS16(float_buffer_.get(), destination_frames_, destination);
   source_ptr_int_ = NULL;
   return destination_frames_;
 }
diff --git a/common_audio/resampler/push_sinc_resampler_unittest.cc b/common_audio/resampler/push_sinc_resampler_unittest.cc
index 1ca4fdf9..90ac0cf0 100644
--- a/common_audio/resampler/push_sinc_resampler_unittest.cc
+++ b/common_audio/resampler/push_sinc_resampler_unittest.cc
@@ -160,16 +160,15 @@ void PushSincResamplerTest::ResampleTest(bool int_format) {
   resampler_source.Run(input_samples, source.get());
   if (int_format) {
     for (int i = 0; i < kNumBlocks; ++i) {
-      ScaleAndRoundToInt16(
-          &source[i * input_block_size], input_block_size, source_int.get());
+      FloatToS16(&source[i * input_block_size], input_block_size,
+               source_int.get());
       EXPECT_EQ(output_block_size,
                 resampler.Resample(source_int.get(),
                                    input_block_size,
                                    destination_int.get(),
                                    output_block_size));
-      ScaleToFloat(destination_int.get(),
-                   output_block_size,
-                   &resampled_destination[i * output_block_size]);
+      S16ToFloat(destination_int.get(), output_block_size,
+               &resampled_destination[i * output_block_size]);
     }
   } else {
     for (int i = 0; i < kNumBlocks; ++i) {
diff --git a/common_audio/wav_writer.cc b/common_audio/wav_writer.cc
index 30a220c2..52449789 100644
--- a/common_audio/wav_writer.cc
+++ b/common_audio/wav_writer.cc
@@ -68,7 +68,7 @@ void WavFile::WriteSamples(const float* samples, size_t num_samples) {
   for (size_t i = 0; i < num_samples; i += kChunksize) {
     int16_t isamples[kChunksize];
     const size_t chunk = std::min(kChunksize, num_samples - i);
-    RoundToInt16(samples + i, chunk, isamples);
+    FloatS16ToS16(samples + i, chunk, isamples);
     WriteSamples(isamples, chunk);
   }
 }
diff --git a/modules/audio_processing/audio_buffer.cc b/modules/audio_processing/audio_buffer.cc
index 8aff61cc..99470601 100644
--- a/modules/audio_processing/audio_buffer.cc
+++ b/modules/audio_processing/audio_buffer.cc
@@ -51,18 +51,11 @@ int KeyboardChannelIndex(AudioProcessing::ChannelLayout layout) {
   return -1;
 }
 
-void StereoToMono(const float* left, const float* right, float* out,
+template <typename T>
+void StereoToMono(const T* left, const T* right, T* out,
                   int samples_per_channel) {
-  for (int i = 0; i < samples_per_channel; ++i) {
+  for (int i = 0; i < samples_per_channel; ++i)
     out[i] = (left[i] + right[i]) / 2;
-  }
-}
-
-void StereoToMono(const int16_t* left, const int16_t* right, int16_t* out,
-                  int samples_per_channel) {
-  for (int i = 0; i < samples_per_channel; ++i) {
-    out[i] = (left[i] + right[i]) >> 1;
-  }
 }
 
 }  // namespace
@@ -114,13 +107,7 @@ class IFChannelBuffer {
   void RefreshI() {
     if (!ivalid_) {
       assert(fvalid_);
-      const float* const float_data = fbuf_.data();
-      int16_t* const int_data = ibuf_.data();
-      const int length = ibuf_.length();
-      for (int i = 0; i < length; ++i)
-        int_data[i] = WEBRTC_SPL_SAT(std::numeric_limits<int16_t>::max(),
-                                     float_data[i],
-                                     std::numeric_limits<int16_t>::min());
+      FloatS16ToS16(fbuf_.data(), ibuf_.length(), ibuf_.data());
       ivalid_ = true;
     }
   }
@@ -230,8 +217,8 @@ void AudioBuffer::CopyFrom(const float* const* data,
 
   // Convert to int16.
   for (int i = 0; i < num_proc_channels_; ++i) {
-    ScaleAndRoundToInt16(data_ptr[i], proc_samples_per_channel_,
-                         channels_->ibuf()->channel(i));
+    FloatToFloatS16(data_ptr[i], proc_samples_per_channel_,
+                channels_->fbuf()->channel(i));
   }
 }
 
@@ -248,9 +235,9 @@ void AudioBuffer::CopyTo(int samples_per_channel,
     data_ptr = process_buffer_->channels();
   }
   for (int i = 0; i < num_proc_channels_; ++i) {
-    ScaleToFloat(channels_->ibuf()->channel(i),
-                 proc_samples_per_channel_,
-                 data_ptr[i]);
+    FloatS16ToFloat(channels_->fbuf()->channel(i),
+                proc_samples_per_channel_,
+                data_ptr[i]);
   }
 
   // Resample.
@@ -449,12 +436,7 @@ void AudioBuffer::DeinterleaveFrom(AudioFrame* frame) {
     // Downmix directly; no explicit deinterleaving needed.
     int16_t* downmixed = channels_->ibuf()->channel(0);
     for (int i = 0; i < input_samples_per_channel_; ++i) {
-      // HACK(ajm): The downmixing in the int16_t path is in practice never
-      // called from production code. We do this weird scaling to and from float
-      // to satisfy tests checking for bit-exactness with the float path.
-      float downmix_float = (ScaleToFloat(frame->data_[i * 2]) +
-                             ScaleToFloat(frame->data_[i * 2 + 1])) / 2;
-      downmixed[i] = ScaleAndRoundToInt16(downmix_float);
+      downmixed[i] = (frame->data_[i * 2] + frame->data_[i * 2 + 1]) / 2;
     }
   } else {
     assert(num_proc_channels_ == num_input_channels_);
diff --git a/modules/audio_processing/test/audio_processing_unittest.cc b/modules/audio_processing/test/audio_processing_unittest.cc
index a0fb303b..af31a636 100644
--- a/modules/audio_processing/test/audio_processing_unittest.cc
+++ b/modules/audio_processing/test/audio_processing_unittest.cc
@@ -66,9 +66,9 @@ void ConvertToFloat(const int16_t* int_data, ChannelBuffer<float>* cb) {
                cb->samples_per_channel(),
                cb->num_channels(),
                cb_int.channels());
-  ScaleToFloat(cb_int.data(),
-               cb->samples_per_channel() * cb->num_channels(),
-               cb->data());
+  S16ToFloat(cb_int.data(),
+           cb->samples_per_channel() * cb->num_channels(),
+           cb->data());
 }
 
 void ConvertToFloat(const AudioFrame& frame, ChannelBuffer<float>* cb) {
@@ -135,7 +135,7 @@ void SetFrameTo(AudioFrame* frame, int16_t left, int16_t right) {
 
 void ScaleFrame(AudioFrame* frame, float scale) {
   for (int i = 0; i < frame->samples_per_channel_ * frame->num_channels_; ++i) {
-    frame->data_[i] = RoundToInt16(frame->data_[i] * scale);
+    frame->data_[i] = FloatS16ToS16(frame->data_[i] * scale);
   }
 }
 
@@ -1650,7 +1650,7 @@ TEST_F(ApmTest, DebugDumpFromFileHandle) {
 #endif  // WEBRTC_AUDIOPROC_DEBUG_DUMP
 }
 
-TEST_F(ApmTest, FloatAndIntInterfacesGiveIdenticalResults) {
+TEST_F(ApmTest, FloatAndIntInterfacesGiveSimilarResults) {
   audioproc::OutputData ref_data;
   OpenFileAndReadMessage(ref_filename_, &ref_data);
 
@@ -1679,7 +1679,8 @@ TEST_F(ApmTest, FloatAndIntInterfacesGiveIdenticalResults) {
     Init(fapm.get());
 
     ChannelBuffer<int16_t> output_cb(samples_per_channel, num_input_channels);
-    scoped_ptr<int16_t[]> output_int16(new int16_t[output_length]);
+    ChannelBuffer<int16_t> output_int16(samples_per_channel,
+                                        num_input_channels);
 
     int analog_level = 127;
     while (ReadFrame(far_file_, revframe_, revfloat_cb_.get()) &&
@@ -1701,7 +1702,9 @@ TEST_F(ApmTest, FloatAndIntInterfacesGiveIdenticalResults) {
       EXPECT_NOERR(fapm->gain_control()->set_stream_analog_level(analog_level));
 
       EXPECT_NOERR(apm_->ProcessStream(frame_));
-      // TODO(ajm): Update to support different output rates.
+      Deinterleave(frame_->data_, samples_per_channel, num_output_channels,
+                   output_int16.channels());
+
       EXPECT_NOERR(fapm->ProcessStream(
           float_cb_->channels(),
           samples_per_channel,
@@ -1711,24 +1714,34 @@ TEST_F(ApmTest, FloatAndIntInterfacesGiveIdenticalResults) {
           LayoutFromChannels(num_output_channels),
           float_cb_->channels()));
 
-      // Convert to interleaved int16.
-      ScaleAndRoundToInt16(float_cb_->data(), output_length, output_cb.data());
-      Interleave(output_cb.channels(),
-                 samples_per_channel,
-                 num_output_channels,
-                 output_int16.get());
-      // Verify float and int16 paths produce identical output.
-      EXPECT_EQ(0, memcmp(frame_->data_, output_int16.get(), output_length));
+      FloatToS16(float_cb_->data(), output_length, output_cb.data());
+      for (int j = 0; j < num_output_channels; ++j) {
+        float variance = 0;
+        float snr = ComputeSNR(output_int16.channel(j), output_cb.channel(j),
+                               samples_per_channel, &variance);
+  #if defined(WEBRTC_AUDIOPROC_FIXED_PROFILE)
+        // There are a few chunks in the fixed-point profile that give low SNR.
+        // Listening confirmed the difference is acceptable.
+        const float kVarianceThreshold = 150;
+        const float kSNRThreshold = 10;
+  #else
+        const float kVarianceThreshold = 20;
+        const float kSNRThreshold = 20;
+  #endif
+        // Skip frames with low energy.
+        if (sqrt(variance) > kVarianceThreshold) {
+          EXPECT_LT(kSNRThreshold, snr);
+        }
+      }
 
       analog_level = fapm->gain_control()->stream_analog_level();
       EXPECT_EQ(apm_->gain_control()->stream_analog_level(),
                 fapm->gain_control()->stream_analog_level());
       EXPECT_EQ(apm_->echo_cancellation()->stream_has_echo(),
                 fapm->echo_cancellation()->stream_has_echo());
-      EXPECT_EQ(apm_->voice_detection()->stream_has_voice(),
-                fapm->voice_detection()->stream_has_voice());
-      EXPECT_EQ(apm_->noise_suppression()->speech_probability(),
-                fapm->noise_suppression()->speech_probability());
+      EXPECT_NEAR(apm_->noise_suppression()->speech_probability(),
+                  fapm->noise_suppression()->speech_probability(),
+                  0.0005);
 
       // Reset in case of downmixing.
       frame_->num_channels_ = test->num_input_channels();
@@ -2002,7 +2015,7 @@ bool ReadChunk(FILE* file, int16_t* int_data, float* float_data,
     return false;  // This is expected.
   }
 
-  ScaleToFloat(int_data, frame_size, float_data);
+  S16ToFloat(int_data, frame_size, float_data);
   if (cb->num_channels() == 1) {
     MixStereoToMono(float_data, cb->data(), cb->samples_per_channel());
   } else {
diff --git a/modules/audio_processing/test/test_utils.h b/modules/audio_processing/test/test_utils.h
index 61edd8f3..a99f3427 100644
--- a/modules/audio_processing/test/test_utils.h
+++ b/modules/audio_processing/test/test_utils.h
@@ -8,6 +8,7 @@
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
+#include <math.h>
 #include <limits>
 
 #include "webrtc/audio_processing/debug.pb.h"
@@ -153,4 +154,26 @@ static inline bool ReadMessageFromFile(FILE* file,
   return msg->ParseFromArray(bytes.get(), size);
 }
 
+template <typename T>
+float ComputeSNR(const T* ref, const T* test, int length, float* variance) {
+  float mse = 0;
+  float mean = 0;
+  *variance = 0;
+  for (int i = 0; i < length; ++i) {
+    T error = ref[i] - test[i];
+    mse += error * error;
+    *variance += ref[i] * ref[i];
+    mean += ref[i];
+  }
+  mse /= length;
+  *variance /= length;
+  mean /= length;
+  *variance -= mean * mean;
+
+  float snr = 100;  // We assign 100 dB to the zero-error case.
+  if (mse > 0)
+    snr = 10 * log10(*variance / mse);
+  return snr;
+}
+
 }  // namespace webrtc
author	andrew@webrtc.org <andrew@webrtc.org>	2014-10-30 03:40:10 +0000
committer	andrew@webrtc.org <andrew@webrtc.org>	2014-10-30 03:40:10 +0000
commit	bce1329490c4cc7c1313cfee1afa41c721daa699 (patch)
tree	3fe11a275d4ddf39ac3816592a590461c3b238ff
parent	7b5a8968dbc3040e1739f839522ef7d09f8815ea (diff)
download	webrtc-bce1329490c4cc7c1313cfee1afa41c721daa699.tar.gz