summaryrefslogtreecommitdiff
path: root/media/base/sinc_resampler.cc
diff options
context:
space:
mode:
authorTorne (Richard Coles) <torne@google.com>2013-03-28 15:31:22 +0000
committerTorne (Richard Coles) <torne@google.com>2013-03-28 15:31:22 +0000
commit2a99a7e74a7f215066514fe81d2bfa6639d9eddd (patch)
tree7c2d04841fcd599fd83b0f0bb1100e1c89a35bae /media/base/sinc_resampler.cc
parent61c449bbbb53310a8c041d8cefdd6b01a126cc7e (diff)
downloadchromium_org-2a99a7e74a7f215066514fe81d2bfa6639d9eddd.tar.gz
Merge from Chromium at DEPS revision r190564
This commit was generated by merge_to_master.py. Change-Id: Icadecbce29854b8fa25fd335b2c1949b5ca5d170
Diffstat (limited to 'media/base/sinc_resampler.cc')
-rw-r--r--media/base/sinc_resampler.cc126
1 files changed, 28 insertions, 98 deletions
diff --git a/media/base/sinc_resampler.cc b/media/base/sinc_resampler.cc
index d836fc7cbc..00f9314c61 100644
--- a/media/base/sinc_resampler.cc
+++ b/media/base/sinc_resampler.cc
@@ -40,11 +40,6 @@
#include "base/cpu.h"
#include "base/logging.h"
-#include "build/build_config.h"
-
-#if defined(ARCH_CPU_X86_FAMILY) && defined(__SSE__)
-#include <xmmintrin.h>
-#endif
#if defined(ARCH_CPU_ARM_FAMILY) && defined(USE_NEON)
#include <arm_neon.h>
@@ -52,33 +47,6 @@
namespace media {
-namespace {
-
-enum {
- // The kernel size can be adjusted for quality (higher is better) at the
- // expense of performance. Must be a multiple of 32.
- // TODO(dalecurtis): Test performance to see if we can jack this up to 64+.
- kKernelSize = 32,
-
- // The number of destination frames generated per processing pass. Affects
- // how often and for how much SincResampler calls back for input. Must be
- // greater than kKernelSize.
- kBlockSize = 512,
-
- // The kernel offset count is used for interpolation and is the number of
- // sub-sample kernel shifts. Can be adjusted for quality (higher is better)
- // at the expense of allocating more memory.
- kKernelOffsetCount = 32,
- kKernelStorageSize = kKernelSize * (kKernelOffsetCount + 1),
-
- // The size (in samples) of the internal buffer used by the resampler.
- kBufferSize = kBlockSize + kKernelSize
-};
-
-} // namespace
-
-const int SincResampler::kMaximumLookAheadSize = kBufferSize;
-
SincResampler::SincResampler(double io_sample_rate_ratio, const ReadCB& read_cb)
: io_sample_rate_ratio_(io_sample_rate_ratio),
virtual_source_idx_(0),
@@ -89,6 +57,9 @@ SincResampler::SincResampler(double io_sample_rate_ratio, const ReadCB& read_cb)
base::AlignedAlloc(sizeof(float) * kKernelStorageSize, 16))),
input_buffer_(static_cast<float*>(
base::AlignedAlloc(sizeof(float) * kBufferSize, 16))),
+#if defined(ARCH_CPU_X86_FAMILY) && !defined(__SSE__)
+ convolve_proc_(base::CPU().has_sse() ? Convolve_SSE : Convolve_C),
+#endif
// Setup various region pointers in the buffer (see diagram above).
r0_(input_buffer_.get() + kKernelSize / 2),
r1_(input_buffer_.get()),
@@ -168,6 +139,22 @@ void SincResampler::InitializeKernel() {
}
}
+// If we know the minimum architecture avoid function hopping for CPU detection.
+#if defined(ARCH_CPU_X86_FAMILY)
+#if defined(__SSE__)
+#define CONVOLVE_FUNC Convolve_SSE
+#else
+// X86 CPU detection required. |convolve_proc_| will be set upon construction.
+// TODO(dalecurtis): Once Chrome moves to a SSE baseline this can be removed.
+#define CONVOLVE_FUNC convolve_proc_
+#endif
+#elif defined(ARCH_CPU_ARM_FAMILY) && defined(USE_NEON)
+#define CONVOLVE_FUNC Convolve_NEON
+#else
+// Unknown architecture.
+#define CONVOLVE_FUNC Convolve_C
+#endif
+
void SincResampler::Resample(float* destination, int frames) {
int remaining_frames = frames;
@@ -193,12 +180,17 @@ void SincResampler::Resample(float* destination, int frames) {
float* k1 = kernel_storage_.get() + offset_idx * kKernelSize;
float* k2 = k1 + kKernelSize;
+ // Ensure |k1|, |k2| are 16-byte aligned for SIMD usage. Should always be
+ // true so long as kKernelSize is a multiple of 16.
+ DCHECK_EQ(0u, reinterpret_cast<uintptr_t>(k1) & 0x0F);
+ DCHECK_EQ(0u, reinterpret_cast<uintptr_t>(k2) & 0x0F);
+
// Initialize input pointer based on quantized |virtual_source_idx_|.
float* input_ptr = r1_ + source_idx;
// Figure out how much to weight each kernel's "convolution".
double kernel_interpolation_factor = virtual_offset_idx - offset_idx;
- *destination++ = Convolve(
+ *destination++ = CONVOLVE_FUNC(
input_ptr, k1, k2, kernel_interpolation_factor);
// Advance the virtual index.
@@ -222,7 +214,9 @@ void SincResampler::Resample(float* destination, int frames) {
}
}
-int SincResampler::ChunkSize() {
+#undef CONVOLVE_FUNC
+
+int SincResampler::ChunkSize() const {
return kBlockSize / io_sample_rate_ratio_;
}
@@ -232,26 +226,6 @@ void SincResampler::Flush() {
memset(input_buffer_.get(), 0, sizeof(*input_buffer_.get()) * kBufferSize);
}
-float SincResampler::Convolve(const float* input_ptr, const float* k1,
- const float* k2,
- double kernel_interpolation_factor) {
- // Rely on function level static initialization to keep ConvolveProc selection
- // thread safe.
- typedef float (*ConvolveProc)(const float* src, const float* k1,
- const float* k2,
- double kernel_interpolation_factor);
-#if defined(ARCH_CPU_X86_FAMILY) && defined(__SSE__)
- static const ConvolveProc kConvolveProc =
- base::CPU().has_sse() ? Convolve_SSE : Convolve_C;
-#elif defined(ARCH_CPU_ARM_FAMILY) && defined(USE_NEON)
- static const ConvolveProc kConvolveProc = Convolve_NEON;
-#else
- static const ConvolveProc kConvolveProc = Convolve_C;
-#endif
-
- return kConvolveProc(input_ptr, k1, k2, kernel_interpolation_factor);
-}
-
float SincResampler::Convolve_C(const float* input_ptr, const float* k1,
const float* k2,
double kernel_interpolation_factor) {
@@ -271,50 +245,6 @@ float SincResampler::Convolve_C(const float* input_ptr, const float* k1,
+ kernel_interpolation_factor * sum2;
}
-#if defined(ARCH_CPU_X86_FAMILY) && defined(__SSE__)
-float SincResampler::Convolve_SSE(const float* input_ptr, const float* k1,
- const float* k2,
- double kernel_interpolation_factor) {
- // Ensure |k1|, |k2| are 16-byte aligned for SSE usage. Should always be true
- // so long as kKernelSize is a multiple of 16.
- DCHECK_EQ(0u, reinterpret_cast<uintptr_t>(k1) & 0x0F);
- DCHECK_EQ(0u, reinterpret_cast<uintptr_t>(k2) & 0x0F);
-
- __m128 m_input;
- __m128 m_sums1 = _mm_setzero_ps();
- __m128 m_sums2 = _mm_setzero_ps();
-
- // Based on |input_ptr| alignment, we need to use loadu or load. Unrolling
- // these loops hurt performance in local testing.
- if (reinterpret_cast<uintptr_t>(input_ptr) & 0x0F) {
- for (int i = 0; i < kKernelSize; i += 4) {
- m_input = _mm_loadu_ps(input_ptr + i);
- m_sums1 = _mm_add_ps(m_sums1, _mm_mul_ps(m_input, _mm_load_ps(k1 + i)));
- m_sums2 = _mm_add_ps(m_sums2, _mm_mul_ps(m_input, _mm_load_ps(k2 + i)));
- }
- } else {
- for (int i = 0; i < kKernelSize; i += 4) {
- m_input = _mm_load_ps(input_ptr + i);
- m_sums1 = _mm_add_ps(m_sums1, _mm_mul_ps(m_input, _mm_load_ps(k1 + i)));
- m_sums2 = _mm_add_ps(m_sums2, _mm_mul_ps(m_input, _mm_load_ps(k2 + i)));
- }
- }
-
- // Linearly interpolate the two "convolutions".
- m_sums1 = _mm_mul_ps(m_sums1, _mm_set_ps1(1.0 - kernel_interpolation_factor));
- m_sums2 = _mm_mul_ps(m_sums2, _mm_set_ps1(kernel_interpolation_factor));
- m_sums1 = _mm_add_ps(m_sums1, m_sums2);
-
- // Sum components together.
- float result;
- m_sums2 = _mm_add_ps(_mm_movehl_ps(m_sums1, m_sums1), m_sums1);
- _mm_store_ss(&result, _mm_add_ss(m_sums2, _mm_shuffle_ps(
- m_sums2, m_sums2, 1)));
-
- return result;
-}
-#endif
-
#if defined(ARCH_CPU_ARM_FAMILY) && defined(USE_NEON)
float SincResampler::Convolve_NEON(const float* input_ptr, const float* k1,
const float* k2,