diff options
author | Frank Barchard <fbarchard@google.com> | 2020-06-03 16:43:55 -0700 |
---|---|---|
committer | Commit Bot <commit-bot@chromium.org> | 2020-06-04 18:24:45 +0000 |
commit | c5e45dcae58f5cb3eb893f8000c1de88a8fe3c4e (patch) | |
tree | 6f9ef752d9caae866e000f6eff1aac24e93ef7da | |
parent | ce5b333853c719a7d868fe08fc8fe7a9e6c56079 (diff) | |
download | libyuv-c5e45dcae58f5cb3eb893f8000c1de88a8fe3c4e.tar.gz |
Optimze ABGRToI420 for AVX2
libyuv_test --gunit_filter=*ABGRToI420_Opt --libyuv_width=1280 --libyuv_height=720 --libyuv_repeat=1000 --libyuv_flags=-1 --libyuv_cpu_info=-1
Was SSSE3 ABGRToI420_Opt (324 ms)
Now AVX2 ABGRToI420_Opt (253 ms)
Bug: b/155989084
Change-Id: I4f3831e29b379be758f9d3fcb244be088bb1ca3c
Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/2229606
Reviewed-by: Miguel Casas <mcasas@chromium.org>
Commit-Queue: Frank Barchard <fbarchard@chromium.org>
-rw-r--r-- | README.chromium | 2 | ||||
-rw-r--r-- | include/libyuv/convert_argb.h | 12 | ||||
-rw-r--r-- | include/libyuv/version.h | 2 | ||||
-rw-r--r-- | source/convert.cc | 18 | ||||
-rw-r--r-- | source/row_neon.cc | 4 | ||||
-rwxr-xr-x | source/test.sh | 35 |
6 files changed, 59 insertions, 14 deletions
diff --git a/README.chromium b/README.chromium index 860799e7..a9638f83 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 1758 +Version: 1759 License: BSD License File: LICENSE diff --git a/include/libyuv/convert_argb.h b/include/libyuv/convert_argb.h index a45b94c7..ce745732 100644 --- a/include/libyuv/convert_argb.h +++ b/include/libyuv/convert_argb.h @@ -39,10 +39,14 @@ LIBYUV_API extern const struct YuvConstants kYvu2020Constants; // BT.2020 #define kYuvJPEGConstantsVU kYvuJPEGConstants #define kYuvH709ConstantsVU kYvuH709Constants #define kYuv2020ConstantsVU kYvu2020Constants -#define NV12ToABGRMatrix(a, b, c, d, e, f, g, h, i) NV21ToARGBMatrix(a, b, c, d, e, f, g##VU, h, i) -#define NV21ToABGRMatrix(a, b, c, d, e, f, g, h, i) NV12ToARGBMatrix(a, b, c, d, e, f, g##VU, h, i) -#define NV12ToRAWMatrix(a, b, c, d, e, f, g, h, i) NV21ToRGB24Matrix(a, b, c, d, e, f, g##VU, h, i) -#define NV21ToRAWMatrix(a, b, c, d, e, f, g, h, i) NV12ToRGB24Matrix(a, b, c, d, e, f, g##VU, h, i) +#define NV12ToABGRMatrix(a, b, c, d, e, f, g, h, i) \ + NV21ToARGBMatrix(a, b, c, d, e, f, g##VU, h, i) +#define NV21ToABGRMatrix(a, b, c, d, e, f, g, h, i) \ + NV12ToARGBMatrix(a, b, c, d, e, f, g##VU, h, i) +#define NV12ToRAWMatrix(a, b, c, d, e, f, g, h, i) \ + NV21ToRGB24Matrix(a, b, c, d, e, f, g##VU, h, i) +#define NV21ToRAWMatrix(a, b, c, d, e, f, g, h, i) \ + NV12ToRGB24Matrix(a, b, c, d, e, f, g##VU, h, i) // Alias. #define ARGBToARGB ARGBCopy diff --git a/include/libyuv/version.h b/include/libyuv/version.h index ba3ef50c..d6ee0838 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 1758 +#define LIBYUV_VERSION 1759 #endif // INCLUDE_LIBYUV_VERSION_H_ diff --git a/source/convert.cc b/source/convert.cc index 3886f3f0..98258b9b 100644 --- a/source/convert.cc +++ b/source/convert.cc @@ -608,11 +608,9 @@ int NV21ToI420(const uint8_t* src_y, int dst_stride_v, int width, int height) { - return NV12ToI420(src_y, src_stride_y, - src_vu, src_stride_vu, - dst_y, dst_stride_y, - dst_v, dst_stride_v, - dst_u, dst_stride_u, width, height); + return NV12ToI420(src_y, src_stride_y, src_vu, src_stride_vu, dst_y, + dst_stride_y, dst_v, dst_stride_v, dst_u, dst_stride_u, + width, height); } // Convert YUY2 to I420. @@ -1170,6 +1168,16 @@ int ABGRToI420(const uint8_t* src_abgr, } } #endif +#if defined(HAS_ABGRTOYROW_AVX2) && defined(HAS_ABGRTOUVROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ABGRToUVRow = ABGRToUVRow_Any_AVX2; + ABGRToYRow = ABGRToYRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + ABGRToUVRow = ABGRToUVRow_AVX2; + ABGRToYRow = ABGRToYRow_AVX2; + } + } +#endif #if defined(HAS_ABGRTOYROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { ABGRToYRow = ABGRToYRow_Any_NEON; diff --git a/source/row_neon.cc b/source/row_neon.cc index 13707338..b81c53ff 100644 --- a/source/row_neon.cc +++ b/source/row_neon.cc @@ -349,9 +349,7 @@ void I400ToARGBRow_NEON(const uint8_t* src_y, asm volatile( YUVTORGB_SETUP "vmov.u8 d23, #255 \n" - "1: \n" - READYUV400 - YUVTORGB + "1: \n" READYUV400 YUVTORGB "subs %2, %2, #8 \n" "vst4.8 {d20, d21, d22, d23}, [%1]! \n" "bgt 1b \n" diff --git a/source/test.sh b/source/test.sh new file mode 100755 index 00000000..7f12c3c1 --- /dev/null +++ b/source/test.sh @@ -0,0 +1,35 @@ +#!/bin/bash +set -x + +function runbenchmark1 { + perf record /google/src/cloud/fbarchard/clean/google3/blaze-bin/third_party/libyuv/libyuv_test --gunit_filter=*$1 --libyuv_width=1280 --libyuv_height=720 --libyuv_repeat=1000 --libyuv_flags=-1 --libyuv_cpu_info=-1 + perf report | grep AVX +} + +runbenchmark1 ABGRToI420 +runbenchmark1 Android420ToI420 +runbenchmark1 ARGBToI420 +runbenchmark1 Convert16To8Plane +runbenchmark1 ConvertToARGB +runbenchmark1 ConvertToI420 +runbenchmark1 CopyPlane +runbenchmark1 H010ToAB30 +runbenchmark1 H010ToAR30 +runbenchmark1 HalfFloatPlane +runbenchmark1 I010ToAB30 +runbenchmark1 I010ToAR30 +runbenchmark1 I420Copy +runbenchmark1 I420Psnr +runbenchmark1 I420Scale +runbenchmark1 I420Ssim +runbenchmark1 I420ToARGB +runbenchmark1 I420ToNV12 +runbenchmark1 I420ToUYVY +runbenchmark1 I422ToI420 +runbenchmark1 InitCpuFlags +runbenchmark1 J420ToARGB +runbenchmark1 NV12ToARGB +runbenchmark1 NV12ToI420 +runbenchmark1 NV12ToI420Rotate +runbenchmark1 SetCpuFlags +runbenchmark1 YUY2ToI420 |