summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorfbarchard@google.com <fbarchard@google.com>2014-10-07 01:57:34 +0000
committerfbarchard@google.com <fbarchard@google.com>2014-10-07 01:57:34 +0000
commit008ce53ac4dbbecee0d788f494fd6e87762674f2 (patch)
tree6a7999b9adf74f05bf6b1b14ca9e4dfaab3b4b67
parentca308327d234668d15ff85a1798e1d0c12a3b39c (diff)
downloadlibyuv-008ce53ac4dbbecee0d788f494fd6e87762674f2.tar.gz
pavgb with memory op requires alignment. This CL disables conversions that use pavgb, and resolves scale by 3/8 unittest for checking alignment works. The 3/8 code used a pavgb with a memory operand. tests are added for scaling and allow unaligning on purpose.
BUG=365 TESTED=local change to force unaligned memory fails on some conversions and scaling code. R=tpsiaki@google.com Review URL: https://webrtc-codereview.appspot.com/29699004 git-svn-id: http://libyuv.googlecode.com/svn/trunk@1114 16f28f9a-4ce2-e073-06de-1de4eb20be90
-rw-r--r--README.chromium2
-rw-r--r--include/libyuv/row.h10
-rw-r--r--include/libyuv/version.h2
-rw-r--r--source/convert.cc6
-rw-r--r--source/row_any.cc9
-rw-r--r--source/row_posix.cc139
-rw-r--r--source/scale_posix.cc3
-rw-r--r--source/scale_win.cc3
-rw-r--r--unit_test/scale_argb_test.cc1
-rw-r--r--unit_test/scale_test.cc1
-rw-r--r--unit_test/unit_test.h7
11 files changed, 27 insertions, 156 deletions
diff --git a/README.chromium b/README.chromium
index c4d4a4f..c1647af 100644
--- a/README.chromium
+++ b/README.chromium
@@ -1,6 +1,6 @@
Name: libyuv
URL: http://code.google.com/p/libyuv/
-Version: 1112
+Version: 1114
License: BSD
License File: LICENSE
diff --git a/include/libyuv/row.h b/include/libyuv/row.h
index f5abf1e..186dcbd 100644
--- a/include/libyuv/row.h
+++ b/include/libyuv/row.h
@@ -87,7 +87,6 @@ extern "C" {
#define HAS_ARGBSEPIAROW_SSSE3
#define HAS_ARGBSHADEROW_SSE2
#define HAS_ARGBSUBTRACTROW_SSE2
-#define HAS_ARGBTOUVROW_SSSE3
#define HAS_ARGBUNATTENUATEROW_SSE2
#define HAS_COMPUTECUMULATIVESUMROW_SSE2
#define HAS_CUMULATIVESUMTOAVERAGEROW_SSE2
@@ -101,7 +100,8 @@ extern "C" {
#define HAS_SOBELYROW_SSE2
// Conversions:
-#define HAS_ABGRTOUVROW_SSSE3
+//#define HAS_ARGBTOUVROW_SSSE3
+//#define HAS_ABGRTOUVROW_SSSE3
#define HAS_ABGRTOYROW_SSSE3
#define HAS_ARGB1555TOARGBROW_SSE2
#define HAS_ARGB4444TOARGBROW_SSE2
@@ -116,10 +116,10 @@ extern "C" {
#define HAS_ARGBTORGB565ROW_SSE2
#define HAS_ARGBTOUV422ROW_SSSE3
#define HAS_ARGBTOUV444ROW_SSSE3
-#define HAS_ARGBTOUVJROW_SSSE3
+//#define HAS_ARGBTOUVJROW_SSSE3
#define HAS_ARGBTOYJROW_SSSE3
#define HAS_ARGBTOYROW_SSSE3
-#define HAS_BGRATOUVROW_SSSE3
+//#define HAS_BGRATOUVROW_SSSE3
#define HAS_BGRATOYROW_SSSE3
#define HAS_COPYROW_ERMS
#define HAS_COPYROW_SSE2
@@ -153,7 +153,7 @@ extern "C" {
#define HAS_RGB24TOARGBROW_SSSE3
#define HAS_RGB24TOYROW_SSSE3
#define HAS_RGB565TOARGBROW_SSE2
-#define HAS_RGBATOUVROW_SSSE3
+//#define HAS_RGBATOUVROW_SSSE3
#define HAS_RGBATOYROW_SSSE3
#define HAS_SETROW_X86
#define HAS_SPLITUVROW_SSE2
diff --git a/include/libyuv/version.h b/include/libyuv/version.h
index 59aed1a..fb0e396 100644
--- a/include/libyuv/version.h
+++ b/include/libyuv/version.h
@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_
-#define LIBYUV_VERSION 1112
+#define LIBYUV_VERSION 1114
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
diff --git a/source/convert.cc b/source/convert.cc
index 19ad659..da3d4c6 100644
--- a/source/convert.cc
+++ b/source/convert.cc
@@ -735,7 +735,7 @@ int BGRAToI420(const uint8* src_bgra, int src_stride_bgra,
src_bgra = src_bgra + (height - 1) * src_stride_bgra;
src_stride_bgra = -src_stride_bgra;
}
-#if defined(HAS_BGRATOYROW_SSSE3)
+#if defined(HAS_BGRATOYROW_SSSE3) && defined(HAS_BGRATOUVROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
BGRAToUVRow = BGRAToUVRow_Any_SSSE3;
BGRAToYRow = BGRAToYRow_Any_SSSE3;
@@ -800,7 +800,7 @@ int ABGRToI420(const uint8* src_abgr, int src_stride_abgr,
src_abgr = src_abgr + (height - 1) * src_stride_abgr;
src_stride_abgr = -src_stride_abgr;
}
-#if defined(HAS_ABGRTOYROW_SSSE3)
+#if defined(HAS_ABGRTOYROW_SSSE3) && defined(HAS_ABGRTOUVROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
ABGRToUVRow = ABGRToUVRow_Any_SSSE3;
ABGRToYRow = ABGRToYRow_Any_SSSE3;
@@ -865,7 +865,7 @@ int RGBAToI420(const uint8* src_rgba, int src_stride_rgba,
src_rgba = src_rgba + (height - 1) * src_stride_rgba;
src_stride_rgba = -src_stride_rgba;
}
-#if defined(HAS_RGBATOYROW_SSSE3)
+#if defined(HAS_RGBATOYROW_SSSE3) && defined(HAS_RGBATOUVROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
RGBAToUVRow = RGBAToUVRow_Any_SSSE3;
RGBAToYRow = RGBAToYRow_Any_SSSE3;
diff --git a/source/row_any.cc b/source/row_any.cc
index 0d58f5d..9d8a5e5 100644
--- a/source/row_any.cc
+++ b/source/row_any.cc
@@ -350,11 +350,12 @@ UVANY(UYVYToUVRow_Any_AVX2, UYVYToUVRow_AVX2, UYVYToUVRow_C, 2, 31)
#endif
#ifdef HAS_ARGBTOUVROW_SSSE3
UVANY(ARGBToUVRow_Any_SSSE3, ARGBToUVRow_SSSE3, ARGBToUVRow_C, 4, 15)
-UVANY(ARGBToUVJRow_Any_SSSE3, ARGBToUVJRow_SSSE3, ARGBToUVJRow_C,
- 4, 15)
+UVANY(ARGBToUVJRow_Any_SSSE3, ARGBToUVJRow_SSSE3, ARGBToUVJRow_C, 4, 15)
UVANY(BGRAToUVRow_Any_SSSE3, BGRAToUVRow_SSSE3, BGRAToUVRow_C, 4, 15)
UVANY(ABGRToUVRow_Any_SSSE3, ABGRToUVRow_SSSE3, ABGRToUVRow_C, 4, 15)
UVANY(RGBAToUVRow_Any_SSSE3, RGBAToUVRow_SSSE3, RGBAToUVRow_C, 4, 15)
+#endif
+#ifdef HAS_YUY2TOUVROW_SSE2
UVANY(YUY2ToUVRow_Any_SSE2, YUY2ToUVRow_SSE2, YUY2ToUVRow_C, 2, 15)
UVANY(UYVYToUVRow_Any_SSE2, UYVYToUVRow_SSE2, UYVYToUVRow_C, 2, 15)
#endif
@@ -417,9 +418,11 @@ UV422ANY(YUY2ToUV422Row_Any_AVX2, YUY2ToUV422Row_AVX2,
UV422ANY(UYVYToUV422Row_Any_AVX2, UYVYToUV422Row_AVX2,
UYVYToUV422Row_C, 2, 31, 1)
#endif
-#ifdef HAS_ARGBTOUVROW_SSSE3
+#ifdef HAS_ARGBTOUV422ROW_SSSE3
UV422ANY(ARGBToUV422Row_Any_SSSE3, ARGBToUV422Row_SSSE3,
ARGBToUV422Row_C, 4, 15, 1)
+#endif
+#ifdef HAS_YUY2TOUV422ROW_SSE2
UV422ANY(YUY2ToUV422Row_Any_SSE2, YUY2ToUV422Row_SSE2,
YUY2ToUV422Row_C, 2, 15, 1)
UV422ANY(UYVYToUV422Row_Any_SSE2, UYVYToUV422Row_SSE2,
diff --git a/source/row_posix.cc b/source/row_posix.cc
index 94b24a6..15a8321 100644
--- a/source/row_posix.cc
+++ b/source/row_posix.cc
@@ -743,43 +743,6 @@ void ARGBToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
#endif
);
}
-
-void ARGBToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
- asm volatile (
- "movdqa %4,%%xmm5 \n"
- "movdqa %3,%%xmm4 \n"
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n"
- "movdqu " MEMACCESS2(0x30,0) ",%%xmm3 \n"
- "pmaddubsw %%xmm4,%%xmm0 \n"
- "pmaddubsw %%xmm4,%%xmm1 \n"
- "pmaddubsw %%xmm4,%%xmm2 \n"
- "pmaddubsw %%xmm4,%%xmm3 \n"
- "lea " MEMLEA(0x40,0) ",%0 \n"
- "phaddw %%xmm1,%%xmm0 \n"
- "phaddw %%xmm3,%%xmm2 \n"
- "psrlw $0x7,%%xmm0 \n"
- "psrlw $0x7,%%xmm2 \n"
- "packuswb %%xmm2,%%xmm0 \n"
- "paddb %%xmm5,%%xmm0 \n"
- "sub $0x10,%2 \n"
- "movdqu %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "jg 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_y), // %1
- "+r"(pix) // %2
- : "m"(kARGBToY), // %3
- "m"(kAddY16) // %4
- : "memory", "cc"
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
- );
-}
#endif // HAS_ARGBTOYROW_SSSE3
#ifdef HAS_ARGBTOYJROW_SSSE3
@@ -1027,71 +990,6 @@ void ARGBToUV444Row_SSSE3(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
);
}
-void ARGBToUV444Row_SSSE3(const uint8* src_argb, uint8* dst_u,
- uint8* dst_v, int width) {
- asm volatile (
- "movdqa %0,%%xmm4 \n"
- "movdqa %1,%%xmm3 \n"
- "movdqa %2,%%xmm5 \n"
- :
- : "m"(kARGBToU), // %0
- "m"(kARGBToV), // %1
- "m"(kAddUV128) // %2
- );
- asm volatile (
- "sub %1,%2 \n"
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n"
- "movdqu " MEMACCESS2(0x30,0) ",%%xmm6 \n"
- "pmaddubsw %%xmm4,%%xmm0 \n"
- "pmaddubsw %%xmm4,%%xmm1 \n"
- "pmaddubsw %%xmm4,%%xmm2 \n"
- "pmaddubsw %%xmm4,%%xmm6 \n"
- "phaddw %%xmm1,%%xmm0 \n"
- "phaddw %%xmm6,%%xmm2 \n"
- "psraw $0x8,%%xmm0 \n"
- "psraw $0x8,%%xmm2 \n"
- "packsswb %%xmm2,%%xmm0 \n"
- "paddb %%xmm5,%%xmm0 \n"
- "sub $0x10,%3 \n"
- "movdqu %%xmm0," MEMACCESS(1) " \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n"
- "movdqu " MEMACCESS2(0x30,0) ",%%xmm6 \n"
- "pmaddubsw %%xmm3,%%xmm0 \n"
- "pmaddubsw %%xmm3,%%xmm1 \n"
- "pmaddubsw %%xmm3,%%xmm2 \n"
- "pmaddubsw %%xmm3,%%xmm6 \n"
- "phaddw %%xmm1,%%xmm0 \n"
- "phaddw %%xmm6,%%xmm2 \n"
- "psraw $0x8,%%xmm0 \n"
- "psraw $0x8,%%xmm2 \n"
- "packsswb %%xmm2,%%xmm0 \n"
- "paddb %%xmm5,%%xmm0 \n"
- "lea " MEMLEA(0x40,0) ",%0 \n"
- BUNDLEALIGN
- MEMOPMEM(movdqu,xmm0,0x00,1,2,1) // movdqu %%xmm0,(%1,%2,1)
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "jg 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_u), // %1
- "+r"(dst_v), // %2
- "+rm"(width) // %3
- :
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm6"
-#endif
- );
-}
-
void ARGBToUV422Row_SSSE3(const uint8* src_argb0,
uint8* dst_u, uint8* dst_v, int width) {
asm volatile (
@@ -1330,43 +1228,6 @@ void RGBAToYRow_SSSE3(const uint8* src_rgba, uint8* dst_y, int pix) {
);
}
-void RGBAToYRow_SSSE3(const uint8* src_rgba, uint8* dst_y, int pix) {
- asm volatile (
- "movdqa %4,%%xmm5 \n"
- "movdqa %3,%%xmm4 \n"
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n"
- "movdqu " MEMACCESS2(0x30,0) ",%%xmm3 \n"
- "pmaddubsw %%xmm4,%%xmm0 \n"
- "pmaddubsw %%xmm4,%%xmm1 \n"
- "pmaddubsw %%xmm4,%%xmm2 \n"
- "pmaddubsw %%xmm4,%%xmm3 \n"
- "lea " MEMLEA(0x40,0) ",%0 \n"
- "phaddw %%xmm1,%%xmm0 \n"
- "phaddw %%xmm3,%%xmm2 \n"
- "psrlw $0x7,%%xmm0 \n"
- "psrlw $0x7,%%xmm2 \n"
- "packuswb %%xmm2,%%xmm0 \n"
- "paddb %%xmm5,%%xmm0 \n"
- "sub $0x10,%2 \n"
- "movdqu %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "jg 1b \n"
- : "+r"(src_rgba), // %0
- "+r"(dst_y), // %1
- "+r"(pix) // %2
- : "m"(kRGBAToY), // %3
- "m"(kAddY16) // %4
- : "memory", "cc"
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
- );
-}
-
void ABGRToUVRow_SSSE3(const uint8* src_abgr0, int src_stride_abgr,
uint8* dst_u, uint8* dst_v, int width) {
asm volatile (
diff --git a/source/scale_posix.cc b/source/scale_posix.cc
index 6320e50..92e3354 100644
--- a/source/scale_posix.cc
+++ b/source/scale_posix.cc
@@ -526,8 +526,9 @@ void ScaleRowDown38_2_Box_SSSE3(const uint8* src_ptr,
LABELALIGN
"1: \n"
"movdqu " MEMACCESS(0) ",%%xmm0 \n"
- MEMOPREG(pavgb,0x00,0,3,1,xmm0) // pavgb (%0,%3,1),%%xmm0
+ MEMOPREG(movdqu,0x00,0,3,1,xmm1) // movdqu (%0,%3,1),%%xmm1
"lea " MEMLEA(0x10,0) ",%0 \n"
+ "pavgb %%xmm1,%%xmm0 \n"
"movdqa %%xmm0,%%xmm1 \n"
"pshufb %%xmm2,%%xmm1 \n"
"movdqa %%xmm0,%%xmm6 \n"
diff --git a/source/scale_win.cc b/source/scale_win.cc
index 091587e..8370ef4 100644
--- a/source/scale_win.cc
+++ b/source/scale_win.cc
@@ -589,8 +589,9 @@ void ScaleRowDown38_2_Box_SSSE3(const uint8* src_ptr,
align 4
xloop:
movdqu xmm0, [eax] // average 2 rows into xmm0
- pavgb xmm0, [eax + esi]
+ movdqu xmm1, [eax + esi]
lea eax, [eax + 16]
+ pavgb xmm0, xmm1
movdqa xmm1, xmm0 // 16 pixels -> 0,1,2,3,4,5 of xmm1
pshufb xmm1, xmm2
diff --git a/unit_test/scale_argb_test.cc b/unit_test/scale_argb_test.cc
index 6a2bc79..bbeb4f8 100644
--- a/unit_test/scale_argb_test.cc
+++ b/unit_test/scale_argb_test.cc
@@ -223,6 +223,7 @@ TEST_FACTOR(2, 1 / 2, 1 / 2)
TEST_FACTOR(4, 1 / 4, 1 / 4)
TEST_FACTOR(8, 1 / 8, 1 / 8)
TEST_FACTOR(3by4, 3 / 4, 3 / 4)
+TEST_FACTOR(3by8, 3 / 8, 3 / 8)
#undef TEST_FACTOR1
#undef TEST_FACTOR
diff --git a/unit_test/scale_test.cc b/unit_test/scale_test.cc
index 00f0707..5d08365 100644
--- a/unit_test/scale_test.cc
+++ b/unit_test/scale_test.cc
@@ -288,6 +288,7 @@ TEST_FACTOR(2, 1 / 2, 1 / 2)
TEST_FACTOR(4, 1 / 4, 1 / 4)
TEST_FACTOR(8, 1 / 8, 1 / 8)
TEST_FACTOR(3by4, 3 / 4, 3 / 4)
+TEST_FACTOR(3by8, 3 / 8, 3 / 8)
#undef TEST_FACTOR1
#undef TEST_FACTOR
diff --git a/unit_test/unit_test.h b/unit_test/unit_test.h
index cfce548..0151796 100644
--- a/unit_test/unit_test.h
+++ b/unit_test/unit_test.h
@@ -26,11 +26,14 @@ static __inline int Abs(int v) {
return v >= 0 ? v : -v;
}
+#define OFFBY 0
+
#define align_buffer_page_end(var, size) \
uint8* var; \
uint8* var##_mem; \
- var##_mem = reinterpret_cast<uint8*>(malloc(((size) + 4095) & ~4095)); \
- var = var##_mem + (-(size) & 4095);
+ var##_mem = reinterpret_cast<uint8*>(malloc((((size) + 4095) & ~4095) + \
+ OFFBY)); \
+ var = var##_mem + (-(size) & 4095) + OFFBY;
#define free_aligned_buffer_page_end(var) \
free(var##_mem); \