diff options
Diffstat (limited to 'files/source/rotate_argb.cc')
-rw-r--r-- | files/source/rotate_argb.cc | 128 |
1 files changed, 79 insertions, 49 deletions
diff --git a/files/source/rotate_argb.cc b/files/source/rotate_argb.cc index 9c994467..787c0ad1 100644 --- a/files/source/rotate_argb.cc +++ b/files/source/rotate_argb.cc @@ -4,7 +4,7 @@ * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may + * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ @@ -22,32 +22,41 @@ extern "C" { // ARGBScale has a function to copy pixels to a row, striding each source // pixel by a constant. -#if !defined(YUV_DISABLE_ASM) && (defined(_M_IX86) || \ - defined(__x86_64__) || defined(__i386__)) +#if !defined(LIBYUV_DISABLE_X86) && \ + (defined(_M_IX86) || \ + (defined(__x86_64__) && !defined(__native_client__)) || defined(__i386__)) #define HAS_SCALEARGBROWDOWNEVEN_SSE2 void ScaleARGBRowDownEven_SSE2(const uint8* src_ptr, int src_stride, - int src_stepx, - uint8* dst_ptr, int dst_width); + int src_stepx, uint8* dst_ptr, int dst_width); #endif +#if !defined(LIBYUV_DISABLE_NEON) && !defined(__native_client__) && \ + (defined(__ARM_NEON__) || defined(LIBYUV_NEON) || defined(__aarch64__)) +#define HAS_SCALEARGBROWDOWNEVEN_NEON +void ScaleARGBRowDownEven_NEON(const uint8* src_ptr, int src_stride, + int src_stepx, uint8* dst_ptr, int dst_width); +#endif + void ScaleARGBRowDownEven_C(const uint8* src_ptr, int, - int src_stepx, - uint8* dst_ptr, int dst_width); + int src_stepx, uint8* dst_ptr, int dst_width); static void ARGBTranspose(const uint8* src, int src_stride, - uint8* dst, int dst_stride, - int width, int height) { + uint8* dst, int dst_stride, int width, int height) { + int i; + int src_pixel_step = src_stride >> 2; void (*ScaleARGBRowDownEven)(const uint8* src_ptr, int src_stride, int src_step, uint8* dst_ptr, int dst_width) = ScaleARGBRowDownEven_C; #if defined(HAS_SCALEARGBROWDOWNEVEN_SSE2) - if (TestCpuFlag(kCpuHasSSE2) && - IS_ALIGNED(height, 4) && // width of dest. - IS_ALIGNED(dst, 16) && IS_ALIGNED(dst_stride, 16)) { + if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(height, 4)) { // Width of dest. ScaleARGBRowDownEven = ScaleARGBRowDownEven_SSE2; } #endif +#if defined(HAS_SCALEARGBROWDOWNEVEN_NEON) + if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(height, 4)) { // Width of dest. + ScaleARGBRowDownEven = ScaleARGBRowDownEven_NEON; + } +#endif - int src_pixel_step = src_stride / 4; - for (int i = 0; i < width; ++i) { // column of source to row of dest. + for (i = 0; i < width; ++i) { // column of source to row of dest. ScaleARGBRowDownEven(src, 0, src_pixel_step, dst, height); dst += dst_stride; src += 4; @@ -55,8 +64,7 @@ static void ARGBTranspose(const uint8* src, int src_stride, } void ARGBRotate90(const uint8* src, int src_stride, - uint8* dst, int dst_stride, - int width, int height) { + uint8* dst, int dst_stride, int width, int height) { // Rotate by 90 is a ARGBTranspose with the source read // from bottom to top. So set the source pointer to the end // of the buffer and flip the sign of the source stride. @@ -66,8 +74,7 @@ void ARGBRotate90(const uint8* src, int src_stride, } void ARGBRotate270(const uint8* src, int src_stride, - uint8* dst, int dst_stride, - int width, int height) { + uint8* dst, int dst_stride, int width, int height) { // Rotate by 270 is a ARGBTranspose with the destination written // from bottom to top. So set the destination pointer to the end // of the buffer and flip the sign of the destination stride. @@ -77,60 +84,83 @@ void ARGBRotate270(const uint8* src, int src_stride, } void ARGBRotate180(const uint8* src, int src_stride, - uint8* dst, int dst_stride, - int width, int height) { + uint8* dst, int dst_stride, int width, int height) { + // Swap first and last row and mirror the content. Uses a temporary row. + align_buffer_64(row, width * 4); + const uint8* src_bot = src + src_stride * (height - 1); + uint8* dst_bot = dst + dst_stride * (height - 1); + int half_height = (height + 1) >> 1; + int y; void (*ARGBMirrorRow)(const uint8* src, uint8* dst, int width) = ARGBMirrorRow_C; -#if defined(HAS_ARGBMIRRORROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 4) && - IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16) && - IS_ALIGNED(dst, 16) && IS_ALIGNED(dst_stride, 16)) { - ARGBMirrorRow = ARGBMirrorRow_SSSE3; + void (*CopyRow)(const uint8* src, uint8* dst, int width) = CopyRow_C; +#if defined(HAS_ARGBMIRRORROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + ARGBMirrorRow = ARGBMirrorRow_Any_NEON; + if (IS_ALIGNED(width, 4)) { + ARGBMirrorRow = ARGBMirrorRow_NEON; + } } #endif - void (*CopyRow)(const uint8* src, uint8* dst, int width) = CopyRow_C; -#if defined(HAS_COPYROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width * 4, 64)) { - CopyRow = CopyRow_NEON; +#if defined(HAS_ARGBMIRRORROW_SSE2) + if (TestCpuFlag(kCpuHasSSE2)) { + ARGBMirrorRow = ARGBMirrorRow_Any_SSE2; + if (IS_ALIGNED(width, 4)) { + ARGBMirrorRow = ARGBMirrorRow_SSE2; + } } #endif -#if defined(HAS_COPYROW_X86) - if (TestCpuFlag(kCpuHasX86)) { - CopyRow = CopyRow_X86; +#if defined(HAS_ARGBMIRRORROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ARGBMirrorRow = ARGBMirrorRow_Any_AVX2; + if (IS_ALIGNED(width, 8)) { + ARGBMirrorRow = ARGBMirrorRow_AVX2; + } } #endif #if defined(HAS_COPYROW_SSE2) - if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width * 4, 32) && - IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16) && - IS_ALIGNED(dst, 16) && IS_ALIGNED(dst_stride, 16)) { - CopyRow = CopyRow_SSE2; + if (TestCpuFlag(kCpuHasSSE2)) { + CopyRow = IS_ALIGNED(width * 4, 32) ? CopyRow_SSE2 : CopyRow_Any_SSE2; } #endif - if (width * 4 > kMaxStride) { - return; +#if defined(HAS_COPYROW_AVX) + if (TestCpuFlag(kCpuHasAVX)) { + CopyRow = IS_ALIGNED(width * 4, 64) ? CopyRow_AVX : CopyRow_Any_AVX; } - // Swap first and last row and mirror the content. Uses a temporary row. - SIMD_ALIGNED(uint8 row[kMaxStride]); - const uint8* src_bot = src + src_stride * (height - 1); - uint8* dst_bot = dst + dst_stride * (height - 1); - int half_height = (height + 1) >> 1; +#endif +#if defined(HAS_COPYROW_ERMS) + if (TestCpuFlag(kCpuHasERMS)) { + CopyRow = CopyRow_ERMS; + } +#endif +#if defined(HAS_COPYROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + CopyRow = IS_ALIGNED(width * 4, 32) ? CopyRow_NEON : CopyRow_Any_NEON; + } +#endif +#if defined(HAS_COPYROW_MIPS) + if (TestCpuFlag(kCpuHasMIPS)) { + CopyRow = CopyRow_MIPS; + } +#endif + // Odd height will harmlessly mirror the middle row twice. - for (int y = 0; y < half_height; ++y) { + for (y = 0; y < half_height; ++y) { ARGBMirrorRow(src, row, width); // Mirror first row into a buffer - src += src_stride; ARGBMirrorRow(src_bot, dst, width); // Mirror last row into first row - dst += dst_stride; CopyRow(row, dst_bot, width * 4); // Copy first mirrored row into last + src += src_stride; + dst += dst_stride; src_bot -= src_stride; dst_bot -= dst_stride; } + free_aligned_buffer_64(row); } LIBYUV_API int ARGBRotate(const uint8* src_argb, int src_stride_argb, - uint8* dst_argb, int dst_stride_argb, - int width, int height, - RotationMode mode) { + uint8* dst_argb, int dst_stride_argb, int width, int height, + enum RotationMode mode) { if (!src_argb || width <= 0 || height == 0 || !dst_argb) { return -1; } |