summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorzhongwei.yao@arm.com <zhongwei.yao@arm.com>2014-10-09 02:00:40 +0000
committerzhongwei.yao@arm.com <zhongwei.yao@arm.com>2014-10-09 02:00:40 +0000
commit0eb196f8db5b53d8593857b47e5a9701d1fc0f29 (patch)
tree8dccd4f73f6275a7d40182f84e89e445e8a62319
parent205c1440cf822b7203934eb818a6ea278fd93cba (diff)
downloadlibyuv-0eb196f8db5b53d8593857b47e5a9701d1fc0f29.tar.gz
clear aarch64 related macro and fix bugs
fix 2 bugs: - build bug libyuv.gyp - runtime bug in ScaleRowDown38_2_Box_NEON BUG= TESTED=libyuv_unittest R=fbarchard@google.com, fbarchard@chromium.org Review URL: https://webrtc-codereview.appspot.com/23939004 git-svn-id: http://libyuv.googlecode.com/svn/trunk@1117 16f28f9a-4ce2-e073-06de-1de4eb20be90
-rw-r--r--include/libyuv/scale_row.h10
-rw-r--r--libyuv.gyp22
-rw-r--r--source/rotate.cc24
-rw-r--r--source/scale_neon64.cc31
4 files changed, 16 insertions, 71 deletions
diff --git a/include/libyuv/scale_row.h b/include/libyuv/scale_row.h
index 402d859..27aa04b 100644
--- a/include/libyuv/scale_row.h
+++ b/include/libyuv/scale_row.h
@@ -44,21 +44,13 @@ extern "C" {
// The following are available on Neon platforms:
#if !defined(LIBYUV_DISABLE_NEON) && !defined(__native_client__) && \
- (defined(__ARM_NEON__) || defined(LIBYUV_NEON))
+ (defined(__ARM_NEON__) || defined(LIBYUV_NEON) || defined(__aarch64__))
#define HAS_SCALEROWDOWN2_NEON
#define HAS_SCALEROWDOWN4_NEON
#define HAS_SCALEROWDOWN34_NEON
#define HAS_SCALEROWDOWN38_NEON
#define HAS_SCALEARGBROWDOWNEVEN_NEON
#define HAS_SCALEARGBROWDOWN2_NEON
-#elif !defined(LIBYUV_DISABLE_NEON) && !defined(__native_client__) && \
- (defined(__aarch64__) || defined(LIBYUV_NEON))
-#define HAS_SCALEROWDOWN2_NEON
-#define HAS_SCALEROWDOWN4_NEON
-#define HAS_SCALEROWDOWN34_NEON
-#define HAS_SCALEROWDOWN38_NEON
-#define HAS_SCALEARGBROWDOWN2_NEON
-#define HAS_SCALEARGBROWDOWNEVEN_NEON
#endif
// The following are available on Mips platforms:
diff --git a/libyuv.gyp b/libyuv.gyp
index fdb7455..3f06d41 100644
--- a/libyuv.gyp
+++ b/libyuv.gyp
@@ -130,16 +130,6 @@
'LIBYUV_DISABLE_X86',
],
}],
- ['OS == "android" and target_arch == "arm64"', {
- 'ldflags': [
- '-Wl,--dynamic-linker,/system/bin/linker64',
- ],
- }],
- ['OS == "android" and target_arch != "arm64"', {
- 'ldflags': [
- '-Wl,--dynamic-linker,/system/bin/linker',
- ],
- }],
], #conditions
'defines': [
# Enable the following 3 macros to turn off assembly for specified CPU.
@@ -159,6 +149,18 @@
'include',
'.',
],
+ 'conditions': [
+ ['OS == "android" and target_arch == "arm64"', {
+ 'ldflags': [
+ '-Wl,--dynamic-linker,/system/bin/linker64',
+ ],
+ }],
+ ['OS == "android" and target_arch != "arm64"', {
+ 'ldflags': [
+ '-Wl,--dynamic-linker,/system/bin/linker',
+ ],
+ }],
+ ], #conditions
},
'sources': [
'<@(libyuv_sources)',
diff --git a/source/rotate.cc b/source/rotate.cc
index 34b6666..48e4806 100644
--- a/source/rotate.cc
+++ b/source/rotate.cc
@@ -42,11 +42,7 @@ extern "C" {
#endif
#if !defined(LIBYUV_DISABLE_NEON) && !defined(__native_client__) && \
- (defined(__ARM_NEON__) || defined(LIBYUV_NEON))
-#define HAS_MIRRORROW_NEON
-void MirrorRow_NEON(const uint8* src, uint8* dst, int width);
-#define HAS_MIRRORROW_UV_NEON
-void MirrorUVRow_NEON(const uint8* src, uint8* dst_a, uint8* dst_b, int width);
+ (defined(__ARM_NEON__) || defined(LIBYUV_NEON) || defined(__aarch64__))
#define HAS_TRANSPOSE_WX8_NEON
void TransposeWx8_NEON(const uint8* src, int src_stride,
uint8* dst, int dst_stride, int width);
@@ -55,23 +51,7 @@ void TransposeUVWx8_NEON(const uint8* src, int src_stride,
uint8* dst_a, int dst_stride_a,
uint8* dst_b, int dst_stride_b,
int width);
-//following symbol is temporally enable for aarch64, until all neon optimized
-//functions have been ported to aarch64
-#elif !defined(LIBYUV_DISABLE_NEON) && !defined(__native_client__) && \
- (defined(__aarch64__) || defined(LIBYUV_NEON))
-// #define HAS_MIRRORROW_NEON
-// void MirrorRow_NEON(const uint8* src, uint8* dst, int width);
-// #define HAS_MIRRORROW_UV_NEON
-// void MirrorUVRow_NEON(const uint8* src, uint8* dst_a, uint8* dst_b, int width);
-#define HAS_TRANSPOSE_WX8_NEON
-void TransposeWx8_NEON(const uint8* src, int src_stride,
- uint8* dst, int dst_stride, int width);
-#define HAS_TRANSPOSE_UVWX8_NEON
-void TransposeUVWx8_NEON(const uint8* src, int src_stride,
- uint8* dst_a, int dst_stride_a,
- uint8* dst_b, int dst_stride_b,
- int width);
-#endif // defined(__ARM_NEON__)
+#endif
#if !defined(LIBYUV_DISABLE_MIPS) && !defined(__native_client__) && \
defined(__mips__) && \
diff --git a/source/scale_neon64.cc b/source/scale_neon64.cc
index e31a6c9..933abd4 100644
--- a/source/scale_neon64.cc
+++ b/source/scale_neon64.cc
@@ -20,7 +20,6 @@ extern "C" {
// This module is for GCC Neon armv8 64 bit.
#if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
-#ifdef HAS_SCALEROWDOWN2_NEON
// Read 32x1 throw away even pixels, and write 16x1.
void ScaleRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width) {
@@ -40,9 +39,7 @@ void ScaleRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
: "v0", "v1" // Clobber List
);
}
-#endif //HAS_SCALEROWDOWN2_NEON
-#ifdef HAS_SCALEROWDOWN2_NEON
// Read 32x2 average down and write 16x1.
void ScaleRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width) {
@@ -72,9 +69,7 @@ void ScaleRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
: "v0", "v1", "v2", "v3" // Clobber List
);
}
-#endif //HAS_SCALEROWDOWN2_NEON
-#ifdef HAS_SCALEROWDOWN4_NEON
void ScaleRowDown4_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width) {
asm volatile (
@@ -92,9 +87,7 @@ void ScaleRowDown4_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
: "v0", "v1", "v2", "v3", "memory", "cc"
);
}
-#endif //HAS_SCALEROWDOWN4_NEON
-#ifdef HAS_SCALEROWDOWN4_NEON
void ScaleRowDown4Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width) {
const uint8* src_ptr1 = src_ptr + src_stride;
@@ -130,9 +123,7 @@ asm volatile (
: "v0", "v1", "v2", "v3", "memory", "cc"
);
}
-#endif //HAS_SCALEROWDOWN4_NEON
-#ifdef HAS_SCALEROWDOWN34_NEON
// Down scale from 4 to 3 pixels. Use the neon multilane read/write
// to load up the every 4th pixel into a 4 different registers.
// Point samples 32 pixels to 24 pixels.
@@ -155,9 +146,7 @@ void ScaleRowDown34_NEON(const uint8* src_ptr,
: "v0", "v1", "v2", "v3", "memory", "cc"
);
}
-#endif //HAS_SCALEROWDOWN34_NEON
-#ifdef HAS_SCALEROWDOWN34_NEON
void ScaleRowDown34_0_Box_NEON(const uint8* src_ptr,
ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width) {
@@ -217,9 +206,7 @@ void ScaleRowDown34_0_Box_NEON(const uint8* src_ptr,
"v20", "memory", "cc"
);
}
-#endif //ScaleRowDown34_0_Box_NEON
-#ifdef HAS_SCALEROWDOWN34_NEON
void ScaleRowDown34_1_Box_NEON(const uint8* src_ptr,
ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width) {
@@ -262,9 +249,7 @@ void ScaleRowDown34_1_Box_NEON(const uint8* src_ptr,
: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", "memory", "cc"
);
}
-#endif //HAS_SCALEROWDOWN34_NEON
-#ifdef HAS_SCALEROWDOWN38_NEON
static uvec8 kShuf38 =
{ 0, 3, 6, 8, 11, 14, 16, 19, 22, 24, 27, 30, 0, 0, 0, 0 };
static uvec8 kShuf38_2 =
@@ -301,9 +286,6 @@ void ScaleRowDown38_NEON(const uint8* src_ptr,
);
}
-#endif //HAS_SCALEROWDOWN38_NEON
-
-#ifdef HAS_SCALEROWDOWN38_NEON
// 32x3 -> 12x1
void OMITFP ScaleRowDown38_3_Box_NEON(const uint8* src_ptr,
ptrdiff_t src_stride,
@@ -432,9 +414,7 @@ void OMITFP ScaleRowDown38_3_Box_NEON(const uint8* src_ptr,
"v30", "v31", "memory", "cc"
);
}
-#endif //HAS_SCALEROWDOWN38_NEON
-#ifdef HAS_SCALEROWDOWN38_NEON
// 32x2 -> 12x1
void ScaleRowDown38_2_Box_NEON(const uint8* src_ptr,
ptrdiff_t src_stride,
@@ -456,7 +436,7 @@ void ScaleRowDown38_2_Box_NEON(const uint8* src_ptr,
MEMACCESS(0)
"ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n"
MEMACCESS(3)
- "ld4 {v4.8b,v5.8b,v6.8b,v7.8b}, [%3], #32 \n"
+ "ld4 {v4.8b,v5.8b,v6.8b,v7.8b}, [%2], #32 \n"
"subs %3, %3, #12 \n"
// Shuffle the input data around to get align the data
@@ -541,7 +521,6 @@ void ScaleRowDown38_2_Box_NEON(const uint8* src_ptr,
"v18", "v19", "v30", "v31", "memory", "cc"
);
}
-#endif //HAS_SCALEROWDOWN38_NEON
// 16x2 -> 16x1
void ScaleFilterRows_NEON(uint8* dst_ptr,
@@ -643,7 +622,6 @@ void ScaleFilterRows_NEON(uint8* dst_ptr,
);
}
-#ifdef HAS_SCALEARGBROWDOWN2_NEON
void ScaleARGBRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width) {
asm volatile (
@@ -666,9 +644,7 @@ void ScaleARGBRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
: "memory", "cc", "v0", "v1", "v2", "v3" // Clobber List
);
}
-#endif //HAS_SCALEARGBROWDOWN2_NEON
-#ifdef HAS_SCALEARGBROWDOWN2_NEON
void ScaleARGBRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width) {
asm volatile (
@@ -703,9 +679,7 @@ void ScaleARGBRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
: "memory", "cc", "v0", "v1", "v2", "v3", "v16", "v17", "v18", "v19"
);
}
-#endif //HAS_SCALEARGBROWDOWN2_NEON
-#ifdef HAS_SCALEARGBROWDOWNEVEN_NEON
// Reads 4 pixels at a time.
// Alignment requirement: src_argb 4 byte aligned.
void ScaleARGBRowDownEven_NEON(const uint8* src_argb, ptrdiff_t src_stride,
@@ -731,9 +705,7 @@ void ScaleARGBRowDownEven_NEON(const uint8* src_argb, ptrdiff_t src_stride,
: "memory", "cc", "v0"
);
}
-#endif //HAS_SCALEARGBROWDOWNEVEN_NEON
-#ifdef HAS_SCALEARGBROWDOWNEVEN_NEON
// Reads 4 pixels at a time.
// Alignment requirement: src_argb 4 byte aligned.
// TODO, might be worth another optimization pass in future.
@@ -786,7 +758,6 @@ void ScaleARGBRowDownEvenBox_NEON(const uint8* src_argb, ptrdiff_t src_stride,
: "memory", "cc", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16"
);
}
-#endif // HAS_SCALEARGBROWDOWNEVEN_NEON
#endif // !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
#ifdef __cplusplus