summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndroid Chromium Automerger <chromium-automerger@android>2014-09-24 13:13:25 +0000
committerAndroid Chromium Automerger <chromium-automerger@android>2014-09-24 13:13:25 +0000
commita36810802fbdce0c92595b93eb78b17cd05153d6 (patch)
treeacc4c805a3fb0c23531777e10a145481492c415f
parent8029e2981ce41392072548f343f99a44736aae2d (diff)
parentefe9712d52c2d216fb3d1ceb508b8148847a7e4b (diff)
downloadlibvpx-a36810802fbdce0c92595b93eb78b17cd05153d6.tar.gz
Merge third_party/libvpx from https://chromium.googlesource.com/chromium/deps/libvpx.git at efe9712d52c2d216fb3d1ceb508b8148847a7e4b
This commit was generated by merge_from_chromium.py. Change-Id: I001fde9953b66e6dbdb5e0c751ef66aecc5b3f08
-rw-r--r--source/libvpx/vp8/common/arm/neon/loopfilter_neon.c80
-rw-r--r--source/libvpx/vp8/encoder/arm/neon/vp8_shortwalsh4x4_neon.c13
2 files changed, 44 insertions, 49 deletions
diff --git a/source/libvpx/vp8/common/arm/neon/loopfilter_neon.c b/source/libvpx/vp8/common/arm/neon/loopfilter_neon.c
index 0bec7fb..e103476 100644
--- a/source/libvpx/vp8/common/arm/neon/loopfilter_neon.c
+++ b/source/libvpx/vp8/common/arm/neon/loopfilter_neon.c
@@ -251,9 +251,38 @@ void vp8_loop_filter_horizontal_edge_uv_neon(
return;
}
+#if (__GNUC__ == 4 && (__GNUC_MINOR__ == 6))
+#warning Using GCC 4.6 is not recommended
+// Some versions of gcc4.6 do not correctly process vst4_lane_u8. When built
+// with any gcc4.6, use the C code.
+extern void vp8_loop_filter_vertical_edge_c(unsigned char *s, int p,
+ const unsigned char *blimit,
+ const unsigned char *limit,
+ const unsigned char *thresh,
+ int count);
+
+void vp8_loop_filter_vertical_edge_y_neon(
+ unsigned char *src,
+ int pitch,
+ unsigned char blimit,
+ unsigned char limit,
+ unsigned char thresh) {
+ vp8_loop_filter_vertical_edge_c(src, pitch, &blimit, &limit, &thresh, 2);
+}
+
+void vp8_loop_filter_vertical_edge_uv_neon(
+ unsigned char *u,
+ int pitch,
+ unsigned char blimit,
+ unsigned char limit,
+ unsigned char thresh,
+ unsigned char *v) {
+ vp8_loop_filter_vertical_edge_c(u, pitch, &blimit, &limit, &thresh, 1);
+ vp8_loop_filter_vertical_edge_c(v, pitch, &blimit, &limit, &thresh, 1);
+}
+#else
static INLINE void write_4x8(unsigned char *dst, int pitch,
const uint8x8x4_t result) {
-#if (__GNUC__ == 4 && (__GNUC_MINOR__ >= 7))
vst4_lane_u8(dst, result, 0);
dst += pitch;
vst4_lane_u8(dst, result, 1);
@@ -269,54 +298,6 @@ static INLINE void write_4x8(unsigned char *dst, int pitch,
vst4_lane_u8(dst, result, 6);
dst += pitch;
vst4_lane_u8(dst, result, 7);
-#else
- /*
- * uint8x8x4_t result
- 00 01 02 03 | 04 05 06 07
- 10 11 12 13 | 14 15 16 17
- 20 21 22 23 | 24 25 26 27
- 30 31 32 33 | 34 35 36 37
- ---
- * after vtrn_u16
- 00 01 20 21 | 04 05 24 25
- 02 03 22 23 | 06 07 26 27
- 10 11 30 31 | 14 15 34 35
- 12 13 32 33 | 16 17 36 37
- ---
- * after vtrn_u8
- 00 10 20 30 | 04 14 24 34
- 01 11 21 31 | 05 15 25 35
- 02 12 22 32 | 06 16 26 36
- 03 13 23 33 | 07 17 27 37
- */
- const uint16x4x2_t r02_u16 = vtrn_u16(vreinterpret_u16_u8(result.val[0]),
- vreinterpret_u16_u8(result.val[2]));
- const uint16x4x2_t r13_u16 = vtrn_u16(vreinterpret_u16_u8(result.val[1]),
- vreinterpret_u16_u8(result.val[3]));
- const uint8x8x2_t r01_u8 = vtrn_u8(vreinterpret_u8_u16(r02_u16.val[0]),
- vreinterpret_u8_u16(r13_u16.val[0]));
- const uint8x8x2_t r23_u8 = vtrn_u8(vreinterpret_u8_u16(r02_u16.val[1]),
- vreinterpret_u8_u16(r13_u16.val[1]));
- const uint32x2_t x_0_4 = vreinterpret_u32_u8(r01_u8.val[0]);
- const uint32x2_t x_1_5 = vreinterpret_u32_u8(r01_u8.val[1]);
- const uint32x2_t x_2_6 = vreinterpret_u32_u8(r23_u8.val[0]);
- const uint32x2_t x_3_7 = vreinterpret_u32_u8(r23_u8.val[1]);
- vst1_lane_u32((uint32_t *)dst, x_0_4, 0);
- dst += pitch;
- vst1_lane_u32((uint32_t *)dst, x_1_5, 0);
- dst += pitch;
- vst1_lane_u32((uint32_t *)dst, x_2_6, 0);
- dst += pitch;
- vst1_lane_u32((uint32_t *)dst, x_3_7, 0);
- dst += pitch;
- vst1_lane_u32((uint32_t *)dst, x_0_4, 1);
- dst += pitch;
- vst1_lane_u32((uint32_t *)dst, x_1_5, 1);
- dst += pitch;
- vst1_lane_u32((uint32_t *)dst, x_2_6, 1);
- dst += pitch;
- vst1_lane_u32((uint32_t *)dst, x_3_7, 1);
-#endif
}
void vp8_loop_filter_vertical_edge_y_neon(
@@ -547,3 +528,4 @@ void vp8_loop_filter_vertical_edge_uv_neon(
vd = v - 2;
write_4x8(vd, pitch, q4ResultH);
}
+#endif // (__GNUC__ == 4 && (__GNUC_MINOR__ == 6))
diff --git a/source/libvpx/vp8/encoder/arm/neon/vp8_shortwalsh4x4_neon.c b/source/libvpx/vp8/encoder/arm/neon/vp8_shortwalsh4x4_neon.c
index d6b67f8..ffa3d91 100644
--- a/source/libvpx/vp8/encoder/arm/neon/vp8_shortwalsh4x4_neon.c
+++ b/source/libvpx/vp8/encoder/arm/neon/vp8_shortwalsh4x4_neon.c
@@ -10,6 +10,18 @@
#include <arm_neon.h>
+#if (__GNUC__ == 4 && (__GNUC_MINOR__ == 6))
+#warning Using GCC 4.6 is not recommended
+// Some versions of gcc4.6 do not correctly process this function. When built
+// with any gcc4.6, use the C code.
+#include "./vp8_rtcd.h"
+void vp8_short_walsh4x4_neon(
+ int16_t *input,
+ int16_t *output,
+ int pitch) {
+ vp8_short_walsh4x4_c(input, output, pitch);
+}
+#else
void vp8_short_walsh4x4_neon(
int16_t *input,
int16_t *output,
@@ -116,3 +128,4 @@ void vp8_short_walsh4x4_neon(
vst1q_s16(output + 8, q1s16);
return;
}
+#endif // (__GNUC__ == 4 && (__GNUC_MINOR__ == 6))