diff options
author | zhongwei.yao@arm.com <zhongwei.yao@arm.com> | 2014-10-09 02:00:40 +0000 |
---|---|---|
committer | zhongwei.yao@arm.com <zhongwei.yao@arm.com> | 2014-10-09 02:00:40 +0000 |
commit | 0eb196f8db5b53d8593857b47e5a9701d1fc0f29 (patch) | |
tree | 8dccd4f73f6275a7d40182f84e89e445e8a62319 | |
parent | 205c1440cf822b7203934eb818a6ea278fd93cba (diff) | |
download | libyuv-0eb196f8db5b53d8593857b47e5a9701d1fc0f29.tar.gz |
clear aarch64 related macro and fix bugs
fix 2 bugs:
- build bug libyuv.gyp
- runtime bug in ScaleRowDown38_2_Box_NEON
BUG=
TESTED=libyuv_unittest
R=fbarchard@google.com, fbarchard@chromium.org
Review URL: https://webrtc-codereview.appspot.com/23939004
git-svn-id: http://libyuv.googlecode.com/svn/trunk@1117 16f28f9a-4ce2-e073-06de-1de4eb20be90
-rw-r--r-- | include/libyuv/scale_row.h | 10 | ||||
-rw-r--r-- | libyuv.gyp | 22 | ||||
-rw-r--r-- | source/rotate.cc | 24 | ||||
-rw-r--r-- | source/scale_neon64.cc | 31 |
4 files changed, 16 insertions, 71 deletions
diff --git a/include/libyuv/scale_row.h b/include/libyuv/scale_row.h index 402d859..27aa04b 100644 --- a/include/libyuv/scale_row.h +++ b/include/libyuv/scale_row.h @@ -44,21 +44,13 @@ extern "C" { // The following are available on Neon platforms: #if !defined(LIBYUV_DISABLE_NEON) && !defined(__native_client__) && \ - (defined(__ARM_NEON__) || defined(LIBYUV_NEON)) + (defined(__ARM_NEON__) || defined(LIBYUV_NEON) || defined(__aarch64__)) #define HAS_SCALEROWDOWN2_NEON #define HAS_SCALEROWDOWN4_NEON #define HAS_SCALEROWDOWN34_NEON #define HAS_SCALEROWDOWN38_NEON #define HAS_SCALEARGBROWDOWNEVEN_NEON #define HAS_SCALEARGBROWDOWN2_NEON -#elif !defined(LIBYUV_DISABLE_NEON) && !defined(__native_client__) && \ - (defined(__aarch64__) || defined(LIBYUV_NEON)) -#define HAS_SCALEROWDOWN2_NEON -#define HAS_SCALEROWDOWN4_NEON -#define HAS_SCALEROWDOWN34_NEON -#define HAS_SCALEROWDOWN38_NEON -#define HAS_SCALEARGBROWDOWN2_NEON -#define HAS_SCALEARGBROWDOWNEVEN_NEON #endif // The following are available on Mips platforms: @@ -130,16 +130,6 @@ 'LIBYUV_DISABLE_X86', ], }], - ['OS == "android" and target_arch == "arm64"', { - 'ldflags': [ - '-Wl,--dynamic-linker,/system/bin/linker64', - ], - }], - ['OS == "android" and target_arch != "arm64"', { - 'ldflags': [ - '-Wl,--dynamic-linker,/system/bin/linker', - ], - }], ], #conditions 'defines': [ # Enable the following 3 macros to turn off assembly for specified CPU. @@ -159,6 +149,18 @@ 'include', '.', ], + 'conditions': [ + ['OS == "android" and target_arch == "arm64"', { + 'ldflags': [ + '-Wl,--dynamic-linker,/system/bin/linker64', + ], + }], + ['OS == "android" and target_arch != "arm64"', { + 'ldflags': [ + '-Wl,--dynamic-linker,/system/bin/linker', + ], + }], + ], #conditions }, 'sources': [ '<@(libyuv_sources)', diff --git a/source/rotate.cc b/source/rotate.cc index 34b6666..48e4806 100644 --- a/source/rotate.cc +++ b/source/rotate.cc @@ -42,11 +42,7 @@ extern "C" { #endif #if !defined(LIBYUV_DISABLE_NEON) && !defined(__native_client__) && \ - (defined(__ARM_NEON__) || defined(LIBYUV_NEON)) -#define HAS_MIRRORROW_NEON -void MirrorRow_NEON(const uint8* src, uint8* dst, int width); -#define HAS_MIRRORROW_UV_NEON -void MirrorUVRow_NEON(const uint8* src, uint8* dst_a, uint8* dst_b, int width); + (defined(__ARM_NEON__) || defined(LIBYUV_NEON) || defined(__aarch64__)) #define HAS_TRANSPOSE_WX8_NEON void TransposeWx8_NEON(const uint8* src, int src_stride, uint8* dst, int dst_stride, int width); @@ -55,23 +51,7 @@ void TransposeUVWx8_NEON(const uint8* src, int src_stride, uint8* dst_a, int dst_stride_a, uint8* dst_b, int dst_stride_b, int width); -//following symbol is temporally enable for aarch64, until all neon optimized -//functions have been ported to aarch64 -#elif !defined(LIBYUV_DISABLE_NEON) && !defined(__native_client__) && \ - (defined(__aarch64__) || defined(LIBYUV_NEON)) -// #define HAS_MIRRORROW_NEON -// void MirrorRow_NEON(const uint8* src, uint8* dst, int width); -// #define HAS_MIRRORROW_UV_NEON -// void MirrorUVRow_NEON(const uint8* src, uint8* dst_a, uint8* dst_b, int width); -#define HAS_TRANSPOSE_WX8_NEON -void TransposeWx8_NEON(const uint8* src, int src_stride, - uint8* dst, int dst_stride, int width); -#define HAS_TRANSPOSE_UVWX8_NEON -void TransposeUVWx8_NEON(const uint8* src, int src_stride, - uint8* dst_a, int dst_stride_a, - uint8* dst_b, int dst_stride_b, - int width); -#endif // defined(__ARM_NEON__) +#endif #if !defined(LIBYUV_DISABLE_MIPS) && !defined(__native_client__) && \ defined(__mips__) && \ diff --git a/source/scale_neon64.cc b/source/scale_neon64.cc index e31a6c9..933abd4 100644 --- a/source/scale_neon64.cc +++ b/source/scale_neon64.cc @@ -20,7 +20,6 @@ extern "C" { // This module is for GCC Neon armv8 64 bit. #if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__) -#ifdef HAS_SCALEROWDOWN2_NEON // Read 32x1 throw away even pixels, and write 16x1. void ScaleRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride, uint8* dst, int dst_width) { @@ -40,9 +39,7 @@ void ScaleRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride, : "v0", "v1" // Clobber List ); } -#endif //HAS_SCALEROWDOWN2_NEON -#ifdef HAS_SCALEROWDOWN2_NEON // Read 32x2 average down and write 16x1. void ScaleRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride, uint8* dst, int dst_width) { @@ -72,9 +69,7 @@ void ScaleRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride, : "v0", "v1", "v2", "v3" // Clobber List ); } -#endif //HAS_SCALEROWDOWN2_NEON -#ifdef HAS_SCALEROWDOWN4_NEON void ScaleRowDown4_NEON(const uint8* src_ptr, ptrdiff_t src_stride, uint8* dst_ptr, int dst_width) { asm volatile ( @@ -92,9 +87,7 @@ void ScaleRowDown4_NEON(const uint8* src_ptr, ptrdiff_t src_stride, : "v0", "v1", "v2", "v3", "memory", "cc" ); } -#endif //HAS_SCALEROWDOWN4_NEON -#ifdef HAS_SCALEROWDOWN4_NEON void ScaleRowDown4Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride, uint8* dst_ptr, int dst_width) { const uint8* src_ptr1 = src_ptr + src_stride; @@ -130,9 +123,7 @@ asm volatile ( : "v0", "v1", "v2", "v3", "memory", "cc" ); } -#endif //HAS_SCALEROWDOWN4_NEON -#ifdef HAS_SCALEROWDOWN34_NEON // Down scale from 4 to 3 pixels. Use the neon multilane read/write // to load up the every 4th pixel into a 4 different registers. // Point samples 32 pixels to 24 pixels. @@ -155,9 +146,7 @@ void ScaleRowDown34_NEON(const uint8* src_ptr, : "v0", "v1", "v2", "v3", "memory", "cc" ); } -#endif //HAS_SCALEROWDOWN34_NEON -#ifdef HAS_SCALEROWDOWN34_NEON void ScaleRowDown34_0_Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride, uint8* dst_ptr, int dst_width) { @@ -217,9 +206,7 @@ void ScaleRowDown34_0_Box_NEON(const uint8* src_ptr, "v20", "memory", "cc" ); } -#endif //ScaleRowDown34_0_Box_NEON -#ifdef HAS_SCALEROWDOWN34_NEON void ScaleRowDown34_1_Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride, uint8* dst_ptr, int dst_width) { @@ -262,9 +249,7 @@ void ScaleRowDown34_1_Box_NEON(const uint8* src_ptr, : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", "memory", "cc" ); } -#endif //HAS_SCALEROWDOWN34_NEON -#ifdef HAS_SCALEROWDOWN38_NEON static uvec8 kShuf38 = { 0, 3, 6, 8, 11, 14, 16, 19, 22, 24, 27, 30, 0, 0, 0, 0 }; static uvec8 kShuf38_2 = @@ -301,9 +286,6 @@ void ScaleRowDown38_NEON(const uint8* src_ptr, ); } -#endif //HAS_SCALEROWDOWN38_NEON - -#ifdef HAS_SCALEROWDOWN38_NEON // 32x3 -> 12x1 void OMITFP ScaleRowDown38_3_Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride, @@ -432,9 +414,7 @@ void OMITFP ScaleRowDown38_3_Box_NEON(const uint8* src_ptr, "v30", "v31", "memory", "cc" ); } -#endif //HAS_SCALEROWDOWN38_NEON -#ifdef HAS_SCALEROWDOWN38_NEON // 32x2 -> 12x1 void ScaleRowDown38_2_Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride, @@ -456,7 +436,7 @@ void ScaleRowDown38_2_Box_NEON(const uint8* src_ptr, MEMACCESS(0) "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" MEMACCESS(3) - "ld4 {v4.8b,v5.8b,v6.8b,v7.8b}, [%3], #32 \n" + "ld4 {v4.8b,v5.8b,v6.8b,v7.8b}, [%2], #32 \n" "subs %3, %3, #12 \n" // Shuffle the input data around to get align the data @@ -541,7 +521,6 @@ void ScaleRowDown38_2_Box_NEON(const uint8* src_ptr, "v18", "v19", "v30", "v31", "memory", "cc" ); } -#endif //HAS_SCALEROWDOWN38_NEON // 16x2 -> 16x1 void ScaleFilterRows_NEON(uint8* dst_ptr, @@ -643,7 +622,6 @@ void ScaleFilterRows_NEON(uint8* dst_ptr, ); } -#ifdef HAS_SCALEARGBROWDOWN2_NEON void ScaleARGBRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride, uint8* dst, int dst_width) { asm volatile ( @@ -666,9 +644,7 @@ void ScaleARGBRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride, : "memory", "cc", "v0", "v1", "v2", "v3" // Clobber List ); } -#endif //HAS_SCALEARGBROWDOWN2_NEON -#ifdef HAS_SCALEARGBROWDOWN2_NEON void ScaleARGBRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride, uint8* dst, int dst_width) { asm volatile ( @@ -703,9 +679,7 @@ void ScaleARGBRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride, : "memory", "cc", "v0", "v1", "v2", "v3", "v16", "v17", "v18", "v19" ); } -#endif //HAS_SCALEARGBROWDOWN2_NEON -#ifdef HAS_SCALEARGBROWDOWNEVEN_NEON // Reads 4 pixels at a time. // Alignment requirement: src_argb 4 byte aligned. void ScaleARGBRowDownEven_NEON(const uint8* src_argb, ptrdiff_t src_stride, @@ -731,9 +705,7 @@ void ScaleARGBRowDownEven_NEON(const uint8* src_argb, ptrdiff_t src_stride, : "memory", "cc", "v0" ); } -#endif //HAS_SCALEARGBROWDOWNEVEN_NEON -#ifdef HAS_SCALEARGBROWDOWNEVEN_NEON // Reads 4 pixels at a time. // Alignment requirement: src_argb 4 byte aligned. // TODO, might be worth another optimization pass in future. @@ -786,7 +758,6 @@ void ScaleARGBRowDownEvenBox_NEON(const uint8* src_argb, ptrdiff_t src_stride, : "memory", "cc", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16" ); } -#endif // HAS_SCALEARGBROWDOWNEVEN_NEON #endif // !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__) #ifdef __cplusplus |