diff options
author | fbarchard@google.com <fbarchard@google.com> | 2014-10-03 17:37:11 +0000 |
---|---|---|
committer | fbarchard@google.com <fbarchard@google.com> | 2014-10-03 17:37:11 +0000 |
commit | d83f63a3b4c364b1b1916f80e23abb84ad15c66d (patch) | |
tree | 7a4f677917e50993cd626ed30e08f3b602a59aa0 | |
parent | 0c603fbca341b1260fa4636a2d102419aab5ac41 (diff) | |
download | libyuv-d83f63a3b4c364b1b1916f80e23abb84ad15c66d.tar.gz |
InterpolateRow used for scale handle unaligned memory. Remove HalfRow which is not used.
BUG=367
TESTED=unittest on I422ToI420
R=harryjin@google.com
Review URL: https://webrtc-codereview.appspot.com/28639004
git-svn-id: http://libyuv.googlecode.com/svn/trunk@1107 16f28f9a-4ce2-e073-06de-1de4eb20be90
-rw-r--r-- | README.chromium | 2 | ||||
-rw-r--r-- | include/libyuv/row.h | 16 | ||||
-rw-r--r-- | include/libyuv/version.h | 2 | ||||
-rw-r--r-- | source/row_common.cc | 10 | ||||
-rw-r--r-- | source/row_neon.cc | 24 | ||||
-rw-r--r-- | source/row_neon64.cc | 26 | ||||
-rw-r--r-- | source/row_posix.cc | 109 | ||||
-rw-r--r-- | source/row_win.cc | 60 |
8 files changed, 53 insertions, 196 deletions
diff --git a/README.chromium b/README.chromium index cff56a3..baa50e9 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 1105 +Version: 1106 License: BSD License File: LICENSE diff --git a/include/libyuv/row.h b/include/libyuv/row.h index dd83afa..e9f62b8 100644 --- a/include/libyuv/row.h +++ b/include/libyuv/row.h @@ -124,7 +124,6 @@ extern "C" { #define HAS_COPYROW_ERMS #define HAS_COPYROW_SSE2 #define HAS_COPYROW_X86 -#define HAS_HALFROW_SSE2 #define HAS_I400TOARGBROW_SSE2 #define HAS_I411TOARGBROW_SSSE3 #define HAS_I422TOARGB1555ROW_SSSE3 @@ -212,7 +211,6 @@ extern "C" { #define HAS_ARGBTOUVROW_AVX2 #define HAS_ARGBTOYJROW_AVX2 #define HAS_ARGBTOYROW_AVX2 -#define HAS_HALFROW_AVX2 #define HAS_I422TOARGBROW_AVX2 #define HAS_INTERPOLATEROW_AVX2 #define HAS_MERGEUVROW_AVX2 @@ -300,7 +298,6 @@ extern "C" { #define HAS_UYVYTOUV422ROW_NEON #define HAS_YUY2TOUVROW_NEON #define HAS_UYVYTOUVROW_NEON -#define HAS_HALFROW_NEON #define HAS_ARGBTOBAYERROW_NEON #define HAS_ARGBTOBAYERGGROW_NEON #define HAS_ARGBSHUFFLEROW_NEON @@ -378,7 +375,6 @@ extern "C" { #define HAS_BGRATOUVROW_NEON #define HAS_BGRATOYROW_NEON #define HAS_COPYROW_NEON -#define HAS_HALFROW_NEON #define HAS_I400TOARGBROW_NEON #define HAS_I411TOARGBROW_NEON #define HAS_I422TOABGRROW_NEON @@ -1577,18 +1573,6 @@ void UYVYToUVRow_Any_NEON(const uint8* src_uyvy, int stride_uyvy, void UYVYToUV422Row_Any_NEON(const uint8* src_uyvy, uint8* dst_u, uint8* dst_v, int pix); -void HalfRow_C(const uint8* src_uv, int src_uv_stride, - uint8* dst_uv, int pix); -void HalfRow_SSE2(const uint8* src_uv, int src_uv_stride, - uint8* dst_uv, int pix); -void HalfRow_AVX2(const uint8* src_uv, int src_uv_stride, - uint8* dst_uv, int pix); -void HalfRow_NEON(const uint8* src_uv, int src_uv_stride, - uint8* dst_uv, int pix); - -void HalfRow_16_C(const uint16* src_uv, int src_uv_stride, - uint16* dst_uv, int pix); - void ARGBToBayerRow_C(const uint8* src_argb, uint8* dst_bayer, uint32 selector, int pix); void ARGBToBayerRow_SSSE3(const uint8* src_argb, uint8* dst_bayer, diff --git a/include/libyuv/version.h b/include/libyuv/version.h index 8a89025..24b888f 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 1105 +#define LIBYUV_VERSION 1106 #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT diff --git a/source/row_common.cc b/source/row_common.cc index b998819..40a8261 100644 --- a/source/row_common.cc +++ b/source/row_common.cc @@ -1885,17 +1885,17 @@ void ARGBAffineRow_C(const uint8* src_argb, int src_argb_stride, } } -// Blend 2 rows into 1 for conversions such as I422ToI420. -void HalfRow_C(const uint8* src_uv, int src_uv_stride, - uint8* dst_uv, int pix) { +// Blend 2 rows into 1. +static void HalfRow_C(const uint8* src_uv, int src_uv_stride, + uint8* dst_uv, int pix) { int x; for (x = 0; x < pix; ++x) { dst_uv[x] = (src_uv[x] + src_uv[src_uv_stride + x] + 1) >> 1; } } -void HalfRow_16_C(const uint16* src_uv, int src_uv_stride, - uint16* dst_uv, int pix) { +static void HalfRow_16_C(const uint16* src_uv, int src_uv_stride, + uint16* dst_uv, int pix) { int x; for (x = 0; x < pix; ++x) { dst_uv[x] = (src_uv[x] + src_uv[src_uv_stride + x] + 1) >> 1; diff --git a/source/row_neon.cc b/source/row_neon.cc index 12c294e..ac1c5e5 100644 --- a/source/row_neon.cc +++ b/source/row_neon.cc @@ -1274,30 +1274,6 @@ void UYVYToUVRow_NEON(const uint8* src_uyvy, int stride_uyvy, ); } -void HalfRow_NEON(const uint8* src_uv, int src_uv_stride, - uint8* dst_uv, int pix) { - asm volatile ( - // change the stride to row 2 pointer - "add %1, %0 \n" - "1: \n" - MEMACCESS(0) - "vld1.8 {q0}, [%0]! \n" // load row 1 16 pixels. - "subs %3, %3, #16 \n" // 16 processed per loop - MEMACCESS(1) - "vld1.8 {q1}, [%1]! \n" // load row 2 16 pixels. - "vrhadd.u8 q0, q1 \n" // average row 1 and 2 - MEMACCESS(2) - "vst1.8 {q0}, [%2]! \n" - "bgt 1b \n" - : "+r"(src_uv), // %0 - "+r"(src_uv_stride), // %1 - "+r"(dst_uv), // %2 - "+r"(pix) // %3 - : - : "cc", "memory", "q0", "q1" // Clobber List - ); -} - // Select 2 channels from ARGB on alternating pixels. e.g. BGBGBGBG void ARGBToBayerRow_NEON(const uint8* src_argb, uint8* dst_bayer, uint32 selector, int pix) { diff --git a/source/row_neon64.cc b/source/row_neon64.cc index 164786d..1829af8 100644 --- a/source/row_neon64.cc +++ b/source/row_neon64.cc @@ -1260,32 +1260,6 @@ void UYVYToUVRow_NEON(const uint8* src_uyvy, int stride_uyvy, } #endif // HAS_UYVYTOUVROW_NEON -#ifdef HAS_HALFROW_NEON -void HalfRow_NEON(const uint8* src_uv, int src_uv_stride, - uint8* dst_uv, int pix) { - const uint8* src_uvb = src_uv + src_uv_stride; - asm volatile ( - // change the stride to row 2 pointer - "1: \n" - MEMACCESS(0) - "ld1 {v0.16b}, [%0], #16 \n" // load row 1 16 pixels. - "subs %3, %3, #16 \n" // 16 processed per loop - MEMACCESS(1) - "ld1 {v1.16b}, [%1], #16 \n" // load row 2 16 pixels. - "urhadd v0.16b, v0.16b, v1.16b \n" // average row 1 and 2 - MEMACCESS(2) - "st1 {v0.16b}, [%2], #16 \n" - "b.gt 1b \n" - : "+r"(src_uv), // %0 - "+r"(src_uvb), // %1 - "+r"(dst_uv), // %2 - "+r"(pix) // %3 - : - : "cc", "memory", "v0", "v1" // Clobber List - ); -} -#endif // HAS_HALFROW_NEON - // Select 2 channels from ARGB on alternating pixels. e.g. BGBGBGBG #ifdef HAS_ARGBTOBAYERROW_NEON void ARGBToBayerRow_NEON(const uint8* src_argb, uint8* dst_bayer, diff --git a/source/row_posix.cc b/source/row_posix.cc index 5eefec4..36111c1 100644 --- a/source/row_posix.cc +++ b/source/row_posix.cc @@ -3182,14 +3182,14 @@ void MergeUVRow_SSE2(const uint8* src_u, const uint8* src_v, uint8* dst_uv, "sub %0,%1 \n" LABELALIGN "1: \n" - "movdqa " MEMACCESS(0) ",%%xmm0 \n" - MEMOPREG(movdqa,0x00,0,1,1,xmm1) // movdqa (%0,%1,1),%%xmm1 + "movdqu " MEMACCESS(0) ",%%xmm0 \n" + MEMOPREG(movdqu,0x00,0,1,1,xmm1) // movdqa (%0,%1,1),%%xmm1 "lea " MEMLEA(0x10,0) ",%0 \n" "movdqa %%xmm0,%%xmm2 \n" "punpcklbw %%xmm1,%%xmm0 \n" "punpckhbw %%xmm1,%%xmm2 \n" - "movdqa %%xmm0," MEMACCESS(2) " \n" - "movdqa %%xmm2," MEMACCESS2(0x10,2) " \n" + "movdqu %%xmm0," MEMACCESS(2) " \n" + "movdqu %%xmm2," MEMACCESS2(0x10,2) " \n" "lea " MEMLEA(0x20,2) ",%2 \n" "sub $0x10,%3 \n" "jg 1b \n" @@ -3246,11 +3246,11 @@ void CopyRow_SSE2(const uint8* src, uint8* dst, int count) { asm volatile ( LABELALIGN "1: \n" - "movdqa " MEMACCESS(0) ",%%xmm0 \n" - "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n" + "movdqu " MEMACCESS(0) ",%%xmm0 \n" + "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" "lea " MEMLEA(0x20,0) ",%0 \n" - "movdqa %%xmm0," MEMACCESS(1) " \n" - "movdqa %%xmm1," MEMACCESS2(0x10,1) " \n" + "movdqu %%xmm0," MEMACCESS(1) " \n" + "movdqu %%xmm1," MEMACCESS2(0x10,1) " \n" "lea " MEMLEA(0x20,1) ",%1 \n" "sub $0x20,%2 \n" "jg 1b \n" @@ -3331,19 +3331,19 @@ void ARGBCopyAlphaRow_SSE2(const uint8* src, uint8* dst, int width) { "psrld $0x8,%%xmm1 \n" LABELALIGN "1: \n" - "movdqa " MEMACCESS(0) ",%%xmm2 \n" - "movdqa " MEMACCESS2(0x10,0) ",%%xmm3 \n" + "movdqu " MEMACCESS(0) ",%%xmm2 \n" + "movdqu " MEMACCESS2(0x10,0) ",%%xmm3 \n" "lea " MEMLEA(0x20,0) ",%0 \n" - "movdqa " MEMACCESS(1) ",%%xmm4 \n" - "movdqa " MEMACCESS2(0x10,1) ",%%xmm5 \n" + "movdqu " MEMACCESS(1) ",%%xmm4 \n" + "movdqu " MEMACCESS2(0x10,1) ",%%xmm5 \n" "pand %%xmm0,%%xmm2 \n" "pand %%xmm0,%%xmm3 \n" "pand %%xmm1,%%xmm4 \n" "pand %%xmm1,%%xmm5 \n" "por %%xmm4,%%xmm2 \n" "por %%xmm5,%%xmm3 \n" - "movdqa %%xmm2," MEMACCESS(1) " \n" - "movdqa %%xmm3," MEMACCESS2(0x10,1) " \n" + "movdqu %%xmm2," MEMACCESS(1) " \n" + "movdqu %%xmm3," MEMACCESS2(0x10,1) " \n" "lea " MEMLEA(0x20,1) ",%1 \n" "sub $0x8,%2 \n" "jg 1b \n" @@ -5377,8 +5377,8 @@ void InterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr, // General purpose row blend. LABELALIGN "1: \n" - "movdqa " MEMACCESS(1) ",%%xmm0 \n" - MEMOPREG(movdqa,0x00,1,4,1,xmm2) + "movdqu " MEMACCESS(1) ",%%xmm0 \n" + MEMOPREG(movdqu,0x00,1,4,1,xmm2) "movdqa %%xmm0,%%xmm1 \n" "punpcklbw %%xmm2,%%xmm0 \n" "punpckhbw %%xmm2,%%xmm1 \n" @@ -5389,7 +5389,7 @@ void InterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr, "packuswb %%xmm1,%%xmm0 \n" "sub $0x10,%2 \n" BUNDLEALIGN - MEMOPMEM(movdqa,xmm0,0x00,1,0,1) + MEMOPMEM(movdqu,xmm0,0x00,1,0,1) "lea " MEMLEA(0x10,1) ",%1 \n" "jg 1b \n" "jmp 99f \n" @@ -5397,13 +5397,13 @@ void InterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr, // Blend 25 / 75. LABELALIGN "25: \n" - "movdqa " MEMACCESS(1) ",%%xmm0 \n" - MEMOPREG(movdqa,0x00,1,4,1,xmm1) + "movdqu " MEMACCESS(1) ",%%xmm0 \n" + MEMOPREG(movdqu,0x00,1,4,1,xmm1) "pavgb %%xmm1,%%xmm0 \n" "pavgb %%xmm1,%%xmm0 \n" "sub $0x10,%2 \n" BUNDLEALIGN - MEMOPMEM(movdqa,xmm0,0x00,1,0,1) + MEMOPMEM(movdqu,xmm0,0x00,1,0,1) "lea " MEMLEA(0x10,1) ",%1 \n" "jg 25b \n" "jmp 99f \n" @@ -5411,12 +5411,12 @@ void InterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr, // Blend 50 / 50. LABELALIGN "50: \n" - "movdqa " MEMACCESS(1) ",%%xmm0 \n" - MEMOPREG(movdqa,0x00,1,4,1,xmm1) + "movdqu " MEMACCESS(1) ",%%xmm0 \n" + MEMOPREG(movdqu,0x00,1,4,1,xmm1) "pavgb %%xmm1,%%xmm0 \n" "sub $0x10,%2 \n" BUNDLEALIGN - MEMOPMEM(movdqa,xmm0,0x00,1,0,1) + MEMOPMEM(movdqu,xmm0,0x00,1,0,1) "lea " MEMLEA(0x10,1) ",%1 \n" "jg 50b \n" "jmp 99f \n" @@ -5424,13 +5424,13 @@ void InterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr, // Blend 75 / 25. LABELALIGN "75: \n" - "movdqa " MEMACCESS(1) ",%%xmm1 \n" - MEMOPREG(movdqa,0x00,1,4,1,xmm0) + "movdqu " MEMACCESS(1) ",%%xmm1 \n" + MEMOPREG(movdqu,0x00,1,4,1,xmm0) "pavgb %%xmm1,%%xmm0 \n" "pavgb %%xmm1,%%xmm0 \n" "sub $0x10,%2 \n" BUNDLEALIGN - MEMOPMEM(movdqa,xmm0,0x00,1,0,1) + MEMOPMEM(movdqu,xmm0,0x00,1,0,1) "lea " MEMLEA(0x10,1) ",%1 \n" "jg 75b \n" "jmp 99f \n" @@ -5438,9 +5438,9 @@ void InterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr, // Blend 100 / 0 - Copy row unchanged. LABELALIGN "100: \n" - "movdqa " MEMACCESS(1) ",%%xmm0 \n" + "movdqu " MEMACCESS(1) ",%%xmm0 \n" "sub $0x10,%2 \n" - MEMOPMEM(movdqa,xmm0,0x00,1,0,1) + MEMOPMEM(movdqu,xmm0,0x00,1,0,1) "lea " MEMLEA(0x10,1) ",%1 \n" "jg 100b \n" @@ -5490,8 +5490,8 @@ void InterpolateRow_SSE2(uint8* dst_ptr, const uint8* src_ptr, // General purpose row blend. LABELALIGN "1: \n" - "movdqa " MEMACCESS(1) ",%%xmm0 \n" - MEMOPREG(movdqa,0x00,1,4,1,xmm2) // movdqa (%1,%4,1),%%xmm2 + "movdqu " MEMACCESS(1) ",%%xmm0 \n" + MEMOPREG(movdqu,0x00,1,4,1,xmm2) // movdqu (%1,%4,1),%%xmm2 "movdqa %%xmm0,%%xmm1 \n" "movdqa %%xmm2,%%xmm3 \n" "punpcklbw %%xmm4,%%xmm2 \n" @@ -5509,7 +5509,7 @@ void InterpolateRow_SSE2(uint8* dst_ptr, const uint8* src_ptr, "packuswb %%xmm1,%%xmm0 \n" "sub $0x10,%2 \n" BUNDLEALIGN - MEMOPMEM(movdqa,xmm0,0x00,1,0,1) // movdqa %%xmm0,(%1,%0,1) + MEMOPMEM(movdqu,xmm0,0x00,1,0,1) // movdqu %%xmm0,(%1,%0,1) "lea " MEMLEA(0x10,1) ",%1 \n" "jg 1b \n" "jmp 99f \n" @@ -5517,13 +5517,13 @@ void InterpolateRow_SSE2(uint8* dst_ptr, const uint8* src_ptr, // Blend 25 / 75. LABELALIGN "25: \n" - "movdqa " MEMACCESS(1) ",%%xmm0 \n" - MEMOPREG(movdqa,0x00,1,4,1,xmm1) // movdqa (%1,%4,1),%%xmm1 + "movdqu " MEMACCESS(1) ",%%xmm0 \n" + MEMOPREG(movdqu,0x00,1,4,1,xmm1) // movdqu (%1,%4,1),%%xmm1 "pavgb %%xmm1,%%xmm0 \n" "pavgb %%xmm1,%%xmm0 \n" "sub $0x10,%2 \n" BUNDLEALIGN - MEMOPMEM(movdqa,xmm0,0x00,1,0,1) // movdqa %%xmm0,(%1,%0,1) + MEMOPMEM(movdqu,xmm0,0x00,1,0,1) // movdqu %%xmm0,(%1,%0,1) "lea " MEMLEA(0x10,1) ",%1 \n" "jg 25b \n" "jmp 99f \n" @@ -5531,12 +5531,12 @@ void InterpolateRow_SSE2(uint8* dst_ptr, const uint8* src_ptr, // Blend 50 / 50. LABELALIGN "50: \n" - "movdqa " MEMACCESS(1) ",%%xmm0 \n" - MEMOPREG(movdqa,0x00,1,4,1,xmm1) // movdqa (%1,%4,1),%%xmm1 + "movdqu " MEMACCESS(1) ",%%xmm0 \n" + MEMOPREG(movdqu,0x00,1,4,1,xmm1) // movdqu (%1,%4,1),%%xmm1 "pavgb %%xmm1,%%xmm0 \n" "sub $0x10,%2 \n" BUNDLEALIGN - MEMOPMEM(movdqa,xmm0,0x00,1,0,1) // movdqa %%xmm0,(%1,%0,1) + MEMOPMEM(movdqu,xmm0,0x00,1,0,1) // movdqu %%xmm0,(%1,%0,1) "lea " MEMLEA(0x10,1) ",%1 \n" "jg 50b \n" "jmp 99f \n" @@ -5544,13 +5544,13 @@ void InterpolateRow_SSE2(uint8* dst_ptr, const uint8* src_ptr, // Blend 75 / 25. LABELALIGN "75: \n" - "movdqa " MEMACCESS(1) ",%%xmm1 \n" - MEMOPREG(movdqa,0x00,1,4,1,xmm0) // movdqa (%1,%4,1),%%xmm0 + "movdqu " MEMACCESS(1) ",%%xmm1 \n" + MEMOPREG(movdqu,0x00,1,4,1,xmm0) // movdqu (%1,%4,1),%%xmm0 "pavgb %%xmm1,%%xmm0 \n" "pavgb %%xmm1,%%xmm0 \n" "sub $0x10,%2 \n" BUNDLEALIGN - MEMOPMEM(movdqa,xmm0,0x00,1,0,1) // movdqa %%xmm0,(%1,%0,1) + MEMOPMEM(movdqu,xmm0,0x00,1,0,1) // movdqu %%xmm0,(%1,%0,1) "lea " MEMLEA(0x10,1) ",%1 \n" "jg 75b \n" "jmp 99f \n" @@ -5558,9 +5558,9 @@ void InterpolateRow_SSE2(uint8* dst_ptr, const uint8* src_ptr, // Blend 100 / 0 - Copy row unchanged. LABELALIGN "100: \n" - "movdqa " MEMACCESS(1) ",%%xmm0 \n" + "movdqu " MEMACCESS(1) ",%%xmm0 \n" "sub $0x10,%2 \n" - MEMOPMEM(movdqa,xmm0,0x00,1,0,1) // movdqa %%xmm0,(%1,%0,1) + MEMOPMEM(movdqu,xmm0,0x00,1,0,1) // movdqu %%xmm0,(%1,%0,1) "lea " MEMLEA(0x10,1) ",%1 \n" "jg 100b \n" @@ -5813,31 +5813,6 @@ void InterpolateRow_Unaligned_SSE2(uint8* dst_ptr, const uint8* src_ptr, } #endif // HAS_INTERPOLATEROW_SSE2 -#ifdef HAS_HALFROW_SSE2 -void HalfRow_SSE2(const uint8* src_uv, int src_uv_stride, - uint8* dst_uv, int pix) { - asm volatile ( - "sub %0,%1 \n" - LABELALIGN - "1: \n" - "movdqa " MEMACCESS(0) ",%%xmm0 \n" - MEMOPREG(pavgb,0x00,0,3,1,xmm0) // pavgb (%0,%3),%%xmm0 - "sub $0x10,%2 \n" - MEMOPMEM(movdqa,xmm0,0x00,0,1,1) // movdqa %%xmm0,(%0,%1) - "lea " MEMLEA(0x10,0) ",%0 \n" - "jg 1b \n" - : "+r"(src_uv), // %0 - "+r"(dst_uv), // %1 - "+r"(pix) // %2 - : "r"((intptr_t)(src_uv_stride)) // %3 - : "memory", "cc" -#if defined(__SSE2__) - , "xmm0" -#endif - ); -} -#endif // HAS_HALFROW_SSE2 - #ifdef HAS_ARGBTOBAYERROW_SSSE3 void ARGBToBayerRow_SSSE3(const uint8* src_argb, uint8* dst_bayer, uint32 selector, int pix) { diff --git a/source/row_win.cc b/source/row_win.cc index 61602d8..1cf0b9a 100644 --- a/source/row_win.cc +++ b/source/row_win.cc @@ -3674,11 +3674,11 @@ void CopyRow_SSE2(const uint8* src, uint8* dst, int count) { align 4 convertloop: - movdqa xmm0, [eax] - movdqa xmm1, [eax + 16] + movdqu xmm0, [eax] + movdqu xmm1, [eax + 16] lea eax, [eax + 32] - movdqa [edx], xmm0 - movdqa [edx + 16], xmm1 + movdqu [edx], xmm0 + movdqu [edx + 16], xmm1 lea edx, [edx + 32] sub ecx, 32 jg convertloop @@ -6541,58 +6541,6 @@ void InterpolateRow_SSE2(uint8* dst_ptr, const uint8* src_ptr, #endif // HAS_INTERPOLATEROW_SSE2 __declspec(naked) __declspec(align(16)) -void HalfRow_SSE2(const uint8* src_uv, int src_uv_stride, - uint8* dst_uv, int pix) { - __asm { - push edi - mov eax, [esp + 4 + 4] // src_uv - mov edx, [esp + 4 + 8] // src_uv_stride - mov edi, [esp + 4 + 12] // dst_v - mov ecx, [esp + 4 + 16] // pix - sub edi, eax - - align 4 - convertloop: - movdqa xmm0, [eax] - pavgb xmm0, [eax + edx] - sub ecx, 16 - movdqa [eax + edi], xmm0 - lea eax, [eax + 16] - jg convertloop - pop edi - ret - } -} - -#ifdef HAS_HALFROW_AVX2 -__declspec(naked) __declspec(align(16)) -void HalfRow_AVX2(const uint8* src_uv, int src_uv_stride, - uint8* dst_uv, int pix) { - __asm { - push edi - mov eax, [esp + 4 + 4] // src_uv - mov edx, [esp + 4 + 8] // src_uv_stride - mov edi, [esp + 4 + 12] // dst_v - mov ecx, [esp + 4 + 16] // pix - sub edi, eax - - align 4 - convertloop: - vmovdqu ymm0, [eax] - vpavgb ymm0, ymm0, [eax + edx] - sub ecx, 32 - vmovdqu [eax + edi], ymm0 - lea eax, [eax + 32] - jg convertloop - - pop edi - vzeroupper - ret - } -} -#endif // HAS_HALFROW_AVX2 - -__declspec(naked) __declspec(align(16)) void ARGBToBayerRow_SSSE3(const uint8* src_argb, uint8* dst_bayer, uint32 selector, int pix) { __asm { |