diff options
author | Android Chromium Automerger <chromium-automerger@android> | 2014-09-24 13:13:25 +0000 |
---|---|---|
committer | Android Chromium Automerger <chromium-automerger@android> | 2014-09-24 13:13:25 +0000 |
commit | a36810802fbdce0c92595b93eb78b17cd05153d6 (patch) | |
tree | acc4c805a3fb0c23531777e10a145481492c415f | |
parent | 8029e2981ce41392072548f343f99a44736aae2d (diff) | |
parent | efe9712d52c2d216fb3d1ceb508b8148847a7e4b (diff) | |
download | libvpx-a36810802fbdce0c92595b93eb78b17cd05153d6.tar.gz |
Merge third_party/libvpx from https://chromium.googlesource.com/chromium/deps/libvpx.git at efe9712d52c2d216fb3d1ceb508b8148847a7e4b
This commit was generated by merge_from_chromium.py.
Change-Id: I001fde9953b66e6dbdb5e0c751ef66aecc5b3f08
-rw-r--r-- | source/libvpx/vp8/common/arm/neon/loopfilter_neon.c | 80 | ||||
-rw-r--r-- | source/libvpx/vp8/encoder/arm/neon/vp8_shortwalsh4x4_neon.c | 13 |
2 files changed, 44 insertions, 49 deletions
diff --git a/source/libvpx/vp8/common/arm/neon/loopfilter_neon.c b/source/libvpx/vp8/common/arm/neon/loopfilter_neon.c index 0bec7fb..e103476 100644 --- a/source/libvpx/vp8/common/arm/neon/loopfilter_neon.c +++ b/source/libvpx/vp8/common/arm/neon/loopfilter_neon.c @@ -251,9 +251,38 @@ void vp8_loop_filter_horizontal_edge_uv_neon( return; } +#if (__GNUC__ == 4 && (__GNUC_MINOR__ == 6)) +#warning Using GCC 4.6 is not recommended +// Some versions of gcc4.6 do not correctly process vst4_lane_u8. When built +// with any gcc4.6, use the C code. +extern void vp8_loop_filter_vertical_edge_c(unsigned char *s, int p, + const unsigned char *blimit, + const unsigned char *limit, + const unsigned char *thresh, + int count); + +void vp8_loop_filter_vertical_edge_y_neon( + unsigned char *src, + int pitch, + unsigned char blimit, + unsigned char limit, + unsigned char thresh) { + vp8_loop_filter_vertical_edge_c(src, pitch, &blimit, &limit, &thresh, 2); +} + +void vp8_loop_filter_vertical_edge_uv_neon( + unsigned char *u, + int pitch, + unsigned char blimit, + unsigned char limit, + unsigned char thresh, + unsigned char *v) { + vp8_loop_filter_vertical_edge_c(u, pitch, &blimit, &limit, &thresh, 1); + vp8_loop_filter_vertical_edge_c(v, pitch, &blimit, &limit, &thresh, 1); +} +#else static INLINE void write_4x8(unsigned char *dst, int pitch, const uint8x8x4_t result) { -#if (__GNUC__ == 4 && (__GNUC_MINOR__ >= 7)) vst4_lane_u8(dst, result, 0); dst += pitch; vst4_lane_u8(dst, result, 1); @@ -269,54 +298,6 @@ static INLINE void write_4x8(unsigned char *dst, int pitch, vst4_lane_u8(dst, result, 6); dst += pitch; vst4_lane_u8(dst, result, 7); -#else - /* - * uint8x8x4_t result - 00 01 02 03 | 04 05 06 07 - 10 11 12 13 | 14 15 16 17 - 20 21 22 23 | 24 25 26 27 - 30 31 32 33 | 34 35 36 37 - --- - * after vtrn_u16 - 00 01 20 21 | 04 05 24 25 - 02 03 22 23 | 06 07 26 27 - 10 11 30 31 | 14 15 34 35 - 12 13 32 33 | 16 17 36 37 - --- - * after vtrn_u8 - 00 10 20 30 | 04 14 24 34 - 01 11 21 31 | 05 15 25 35 - 02 12 22 32 | 06 16 26 36 - 03 13 23 33 | 07 17 27 37 - */ - const uint16x4x2_t r02_u16 = vtrn_u16(vreinterpret_u16_u8(result.val[0]), - vreinterpret_u16_u8(result.val[2])); - const uint16x4x2_t r13_u16 = vtrn_u16(vreinterpret_u16_u8(result.val[1]), - vreinterpret_u16_u8(result.val[3])); - const uint8x8x2_t r01_u8 = vtrn_u8(vreinterpret_u8_u16(r02_u16.val[0]), - vreinterpret_u8_u16(r13_u16.val[0])); - const uint8x8x2_t r23_u8 = vtrn_u8(vreinterpret_u8_u16(r02_u16.val[1]), - vreinterpret_u8_u16(r13_u16.val[1])); - const uint32x2_t x_0_4 = vreinterpret_u32_u8(r01_u8.val[0]); - const uint32x2_t x_1_5 = vreinterpret_u32_u8(r01_u8.val[1]); - const uint32x2_t x_2_6 = vreinterpret_u32_u8(r23_u8.val[0]); - const uint32x2_t x_3_7 = vreinterpret_u32_u8(r23_u8.val[1]); - vst1_lane_u32((uint32_t *)dst, x_0_4, 0); - dst += pitch; - vst1_lane_u32((uint32_t *)dst, x_1_5, 0); - dst += pitch; - vst1_lane_u32((uint32_t *)dst, x_2_6, 0); - dst += pitch; - vst1_lane_u32((uint32_t *)dst, x_3_7, 0); - dst += pitch; - vst1_lane_u32((uint32_t *)dst, x_0_4, 1); - dst += pitch; - vst1_lane_u32((uint32_t *)dst, x_1_5, 1); - dst += pitch; - vst1_lane_u32((uint32_t *)dst, x_2_6, 1); - dst += pitch; - vst1_lane_u32((uint32_t *)dst, x_3_7, 1); -#endif } void vp8_loop_filter_vertical_edge_y_neon( @@ -547,3 +528,4 @@ void vp8_loop_filter_vertical_edge_uv_neon( vd = v - 2; write_4x8(vd, pitch, q4ResultH); } +#endif // (__GNUC__ == 4 && (__GNUC_MINOR__ == 6)) diff --git a/source/libvpx/vp8/encoder/arm/neon/vp8_shortwalsh4x4_neon.c b/source/libvpx/vp8/encoder/arm/neon/vp8_shortwalsh4x4_neon.c index d6b67f8..ffa3d91 100644 --- a/source/libvpx/vp8/encoder/arm/neon/vp8_shortwalsh4x4_neon.c +++ b/source/libvpx/vp8/encoder/arm/neon/vp8_shortwalsh4x4_neon.c @@ -10,6 +10,18 @@ #include <arm_neon.h> +#if (__GNUC__ == 4 && (__GNUC_MINOR__ == 6)) +#warning Using GCC 4.6 is not recommended +// Some versions of gcc4.6 do not correctly process this function. When built +// with any gcc4.6, use the C code. +#include "./vp8_rtcd.h" +void vp8_short_walsh4x4_neon( + int16_t *input, + int16_t *output, + int pitch) { + vp8_short_walsh4x4_c(input, output, pitch); +} +#else void vp8_short_walsh4x4_neon( int16_t *input, int16_t *output, @@ -116,3 +128,4 @@ void vp8_short_walsh4x4_neon( vst1q_s16(output + 8, q1s16); return; } +#endif // (__GNUC__ == 4 && (__GNUC_MINOR__ == 6)) |