summaryrefslogtreecommitdiff
path: root/cpu_ref/rsCpuCore.cpp
diff options
context:
space:
mode:
authorRose, James <james.rose@intel.com>2014-04-22 12:08:06 +0800
committerXiaofei Wan <xiaofei.wan@intel.com>2014-04-22 12:08:27 +0800
commit7b7060c61e4182b29186849c5a857ea5f0898e56 (patch)
tree329c1d4403c3542757db63fb1fb230e74f78b0c1 /cpu_ref/rsCpuCore.cpp
parent33c565f4766f961f4302c3e007a5ceaee312cc8c (diff)
downloadrs-7b7060c61e4182b29186849c5a857ea5f0898e56.tar.gz
Improve RS intrinsics performance.
Renderscript CPU performance for intrinsics cases is not good for x86 platforms. In many cases it is significantly slower even with SIMD Intrinsics. In current x86 implementation it is using full 32 bit multiplies which aren't well supported on current Atom platforms. This patch uses 16 bit multiply with 32 bit add pmaddwd instruction where appropriate. It also adds atom specificoptimizations to improve RS intrinsics performance. Change-Id: Ifc01b5a6d6f7430d2dc218f1618b9df3fb7937fe Signed-off-by: Xiaofei Wan <xiaofei.wan@intel.com>
Diffstat (limited to 'cpu_ref/rsCpuCore.cpp')
-rw-r--r--cpu_ref/rsCpuCore.cpp10
1 files changed, 7 insertions, 3 deletions
diff --git a/cpu_ref/rsCpuCore.cpp b/cpu_ref/rsCpuCore.cpp
index c38d631b..277836a6 100644
--- a/cpu_ref/rsCpuCore.cpp
+++ b/cpu_ref/rsCpuCore.cpp
@@ -200,7 +200,7 @@ void RsdCpuReferenceImpl::unlockMutex() {
pthread_mutex_unlock(&gInitMutex);
}
-#if defined(ARCH_ARM_HAVE_VFP)
+#if defined(ARCH_ARM_HAVE_VFP) || defined(ARCH_X86_HAVE_SSSE3)
static int
read_file(const char* pathname, char* buffer, size_t buffsize)
{
@@ -228,9 +228,13 @@ static void GetCpuInfo() {
return;
}
+#if defined(ARCH_ARM_HAVE_VFP)
gArchUseSIMD = !!strstr(cpuinfo, " neon");
+#elif defined(ARCH_X86_HAVE_SSSE3)
+ gArchUseSIMD = !!strstr(cpuinfo, " ssse3");
+#endif
}
-#endif // ARCH_ARM_HAVE_VFP
+#endif // ARCH_ARM_HAVE_VFP || ARCH_X86_HAVE_SSSE3
bool RsdCpuReferenceImpl::init(uint32_t version_major, uint32_t version_minor,
sym_lookup_t lfn, script_lookup_t slfn) {
@@ -257,7 +261,7 @@ bool RsdCpuReferenceImpl::init(uint32_t version_major, uint32_t version_minor,
ALOGE("pthread_setspecific %i", status);
}
-#if defined(ARCH_ARM_HAVE_VFP)
+#if defined(ARCH_ARM_HAVE_VFP) || defined(ARCH_X86_HAVE_SSSE3)
GetCpuInfo();
#endif