aboutsummaryrefslogtreecommitdiff
path: root/src/dsp/cpu.c
diff options
context:
space:
mode:
authorVikas Arora <vikasa@google.com>2014-07-25 13:53:32 -0700
committerVikas Arora <vikasa@google.com>2014-07-25 13:53:32 -0700
commit33f74dabbc7920a65ed435d7417987589febdc16 (patch)
treeb4a6d3345d23e0c12ea82ab354b75c83c7309516 /src/dsp/cpu.c
parent40d327484446eb6adea41a817b2fb218d00a7649 (diff)
downloadwebp-33f74dabbc7920a65ed435d7417987589febdc16.tar.gz
Sync-patch with libwebp ver 0.4.1-rc1.
Sync-patch with libwebp ver 0.4.1-rc1 (change#I5346984d2). - NEON assembly optimizations: - ~25% faster lossy decode / encode (-m 4) - ~10% faster lossless decode - ~5-10% faster lossless encode (-m 3/4) - Arch64 (arm64) & MIPS support/optimizations. Change-Id: I855b65cec8fad5ec567c276b698e7714dc4bffd2
Diffstat (limited to 'src/dsp/cpu.c')
-rw-r--r--src/dsp/cpu.c56
1 files changed, 53 insertions, 3 deletions
diff --git a/src/dsp/cpu.c b/src/dsp/cpu.c
index 0988df27..a2954981 100644
--- a/src/dsp/cpu.c
+++ b/src/dsp/cpu.c
@@ -14,7 +14,7 @@
#include "./dsp.h"
#if defined(__ANDROID__)
-#include "./cpu-features.h"
+#include "cpu-features.h"
#endif
//------------------------------------------------------------------------------
@@ -38,10 +38,41 @@ static WEBP_INLINE void GetCPUInfo(int cpu_info[4], int info_type) {
: "=a"(cpu_info[0]), "=b"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
: "a"(info_type));
}
+#elif defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 150030729 // >= VS2008 SP1
+#define GetCPUInfo(info, type) __cpuidex(info, type, 0) // set ecx=0
#elif defined(WEBP_MSC_SSE2)
#define GetCPUInfo __cpuid
#endif
+// NaCl has no support for xgetbv or the raw opcode.
+#if !defined(__native_client__) && (defined(__i386__) || defined(__x86_64__))
+static WEBP_INLINE uint64_t xgetbv(void) {
+ const uint32_t ecx = 0;
+ uint32_t eax, edx;
+ // Use the raw opcode for xgetbv for compatibility with older toolchains.
+ __asm__ volatile (
+ ".byte 0x0f, 0x01, 0xd0\n"
+ : "=a"(eax), "=d"(edx) : "c" (ecx));
+ return ((uint64_t)edx << 32) | eax;
+}
+#elif defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 160040219 // >= VS2010 SP1
+#define xgetbv() _xgetbv(0)
+#elif defined(_M_IX86)
+static WEBP_INLINE uint64_t xgetbv(void) {
+ uint32_t eax_, edx_;
+ __asm {
+ xor ecx, ecx // ecx = 0
+ // Use the raw opcode for xgetbv for compatibility with older toolchains.
+ __asm _emit 0x0f __asm _emit 0x01 __asm _emit 0xd0
+ mov eax_, eax
+ mov edx_, edx
+ }
+ return ((uint64_t)edx_ << 32) | eax_;
+}
+#else
+#define xgetbv() 0U // no AVX for older x64 or unrecognized toolchains.
+#endif
+
#if defined(__i386__) || defined(__x86_64__) || defined(WEBP_MSC_SSE2)
static int x86CPUInfo(CPUFeature feature) {
int cpu_info[4];
@@ -52,10 +83,23 @@ static int x86CPUInfo(CPUFeature feature) {
if (feature == kSSE3) {
return 0 != (cpu_info[2] & 0x00000001);
}
+ if (feature == kAVX) {
+ // bits 27 (OSXSAVE) & 28 (256-bit AVX)
+ if ((cpu_info[2] & 0x18000000) == 0x18000000) {
+ // XMM state and YMM state enabled by the OS.
+ return (xgetbv() & 0x6) == 0x6;
+ }
+ }
+ if (feature == kAVX2) {
+ if (x86CPUInfo(kAVX)) {
+ GetCPUInfo(cpu_info, 7);
+ return ((cpu_info[1] & 0x00000020) == 0x00000020);
+ }
+ }
return 0;
}
VP8CPUInfo VP8GetCPUInfo = x86CPUInfo;
-#elif defined(WEBP_ANDROID_NEON)
+#elif defined(WEBP_ANDROID_NEON) // NB: needs to be before generic NEON test.
static int AndroidCPUInfo(CPUFeature feature) {
const AndroidCpuFamily cpu_family = android_getCpuFamily();
const uint64_t cpu_features = android_getCpuFeatures();
@@ -66,7 +110,7 @@ static int AndroidCPUInfo(CPUFeature feature) {
return 0;
}
VP8CPUInfo VP8GetCPUInfo = AndroidCPUInfo;
-#elif defined(__ARM_NEON__)
+#elif defined(WEBP_USE_NEON)
// define a dummy function to enable turning off NEON at runtime by setting
// VP8DecGetCPUInfo = NULL
static int armCPUInfo(CPUFeature feature) {
@@ -74,6 +118,12 @@ static int armCPUInfo(CPUFeature feature) {
return 1;
}
VP8CPUInfo VP8GetCPUInfo = armCPUInfo;
+#elif defined(__mips__)
+static int mipsCPUInfo(CPUFeature feature) {
+ (void)feature;
+ return 1;
+}
+VP8CPUInfo VP8GetCPUInfo = mipsCPUInfo;
#else
VP8CPUInfo VP8GetCPUInfo = NULL;
#endif