aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFrank Barchard <fbarchard@google.com>2020-06-03 16:43:55 -0700
committerCommit Bot <commit-bot@chromium.org>2020-06-04 18:24:45 +0000
commitc5e45dcae58f5cb3eb893f8000c1de88a8fe3c4e (patch)
tree6f9ef752d9caae866e000f6eff1aac24e93ef7da
parentce5b333853c719a7d868fe08fc8fe7a9e6c56079 (diff)
downloadlibyuv-c5e45dcae58f5cb3eb893f8000c1de88a8fe3c4e.tar.gz
Optimze ABGRToI420 for AVX2
libyuv_test --gunit_filter=*ABGRToI420_Opt --libyuv_width=1280 --libyuv_height=720 --libyuv_repeat=1000 --libyuv_flags=-1 --libyuv_cpu_info=-1 Was SSSE3 ABGRToI420_Opt (324 ms) Now AVX2 ABGRToI420_Opt (253 ms) Bug: b/155989084 Change-Id: I4f3831e29b379be758f9d3fcb244be088bb1ca3c Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/2229606 Reviewed-by: Miguel Casas <mcasas@chromium.org> Commit-Queue: Frank Barchard <fbarchard@chromium.org>
-rw-r--r--README.chromium2
-rw-r--r--include/libyuv/convert_argb.h12
-rw-r--r--include/libyuv/version.h2
-rw-r--r--source/convert.cc18
-rw-r--r--source/row_neon.cc4
-rwxr-xr-xsource/test.sh35
6 files changed, 59 insertions, 14 deletions
diff --git a/README.chromium b/README.chromium
index 860799e7..a9638f83 100644
--- a/README.chromium
+++ b/README.chromium
@@ -1,6 +1,6 @@
Name: libyuv
URL: http://code.google.com/p/libyuv/
-Version: 1758
+Version: 1759
License: BSD
License File: LICENSE
diff --git a/include/libyuv/convert_argb.h b/include/libyuv/convert_argb.h
index a45b94c7..ce745732 100644
--- a/include/libyuv/convert_argb.h
+++ b/include/libyuv/convert_argb.h
@@ -39,10 +39,14 @@ LIBYUV_API extern const struct YuvConstants kYvu2020Constants; // BT.2020
#define kYuvJPEGConstantsVU kYvuJPEGConstants
#define kYuvH709ConstantsVU kYvuH709Constants
#define kYuv2020ConstantsVU kYvu2020Constants
-#define NV12ToABGRMatrix(a, b, c, d, e, f, g, h, i) NV21ToARGBMatrix(a, b, c, d, e, f, g##VU, h, i)
-#define NV21ToABGRMatrix(a, b, c, d, e, f, g, h, i) NV12ToARGBMatrix(a, b, c, d, e, f, g##VU, h, i)
-#define NV12ToRAWMatrix(a, b, c, d, e, f, g, h, i) NV21ToRGB24Matrix(a, b, c, d, e, f, g##VU, h, i)
-#define NV21ToRAWMatrix(a, b, c, d, e, f, g, h, i) NV12ToRGB24Matrix(a, b, c, d, e, f, g##VU, h, i)
+#define NV12ToABGRMatrix(a, b, c, d, e, f, g, h, i) \
+ NV21ToARGBMatrix(a, b, c, d, e, f, g##VU, h, i)
+#define NV21ToABGRMatrix(a, b, c, d, e, f, g, h, i) \
+ NV12ToARGBMatrix(a, b, c, d, e, f, g##VU, h, i)
+#define NV12ToRAWMatrix(a, b, c, d, e, f, g, h, i) \
+ NV21ToRGB24Matrix(a, b, c, d, e, f, g##VU, h, i)
+#define NV21ToRAWMatrix(a, b, c, d, e, f, g, h, i) \
+ NV12ToRGB24Matrix(a, b, c, d, e, f, g##VU, h, i)
// Alias.
#define ARGBToARGB ARGBCopy
diff --git a/include/libyuv/version.h b/include/libyuv/version.h
index ba3ef50c..d6ee0838 100644
--- a/include/libyuv/version.h
+++ b/include/libyuv/version.h
@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_
#define INCLUDE_LIBYUV_VERSION_H_
-#define LIBYUV_VERSION 1758
+#define LIBYUV_VERSION 1759
#endif // INCLUDE_LIBYUV_VERSION_H_
diff --git a/source/convert.cc b/source/convert.cc
index 3886f3f0..98258b9b 100644
--- a/source/convert.cc
+++ b/source/convert.cc
@@ -608,11 +608,9 @@ int NV21ToI420(const uint8_t* src_y,
int dst_stride_v,
int width,
int height) {
- return NV12ToI420(src_y, src_stride_y,
- src_vu, src_stride_vu,
- dst_y, dst_stride_y,
- dst_v, dst_stride_v,
- dst_u, dst_stride_u, width, height);
+ return NV12ToI420(src_y, src_stride_y, src_vu, src_stride_vu, dst_y,
+ dst_stride_y, dst_v, dst_stride_v, dst_u, dst_stride_u,
+ width, height);
}
// Convert YUY2 to I420.
@@ -1170,6 +1168,16 @@ int ABGRToI420(const uint8_t* src_abgr,
}
}
#endif
+#if defined(HAS_ABGRTOYROW_AVX2) && defined(HAS_ABGRTOUVROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ ABGRToUVRow = ABGRToUVRow_Any_AVX2;
+ ABGRToYRow = ABGRToYRow_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ ABGRToUVRow = ABGRToUVRow_AVX2;
+ ABGRToYRow = ABGRToYRow_AVX2;
+ }
+ }
+#endif
#if defined(HAS_ABGRTOYROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ABGRToYRow = ABGRToYRow_Any_NEON;
diff --git a/source/row_neon.cc b/source/row_neon.cc
index 13707338..b81c53ff 100644
--- a/source/row_neon.cc
+++ b/source/row_neon.cc
@@ -349,9 +349,7 @@ void I400ToARGBRow_NEON(const uint8_t* src_y,
asm volatile(
YUVTORGB_SETUP
"vmov.u8 d23, #255 \n"
- "1: \n"
- READYUV400
- YUVTORGB
+ "1: \n" READYUV400 YUVTORGB
"subs %2, %2, #8 \n"
"vst4.8 {d20, d21, d22, d23}, [%1]! \n"
"bgt 1b \n"
diff --git a/source/test.sh b/source/test.sh
new file mode 100755
index 00000000..7f12c3c1
--- /dev/null
+++ b/source/test.sh
@@ -0,0 +1,35 @@
+#!/bin/bash
+set -x
+
+function runbenchmark1 {
+ perf record /google/src/cloud/fbarchard/clean/google3/blaze-bin/third_party/libyuv/libyuv_test --gunit_filter=*$1 --libyuv_width=1280 --libyuv_height=720 --libyuv_repeat=1000 --libyuv_flags=-1 --libyuv_cpu_info=-1
+ perf report | grep AVX
+}
+
+runbenchmark1 ABGRToI420
+runbenchmark1 Android420ToI420
+runbenchmark1 ARGBToI420
+runbenchmark1 Convert16To8Plane
+runbenchmark1 ConvertToARGB
+runbenchmark1 ConvertToI420
+runbenchmark1 CopyPlane
+runbenchmark1 H010ToAB30
+runbenchmark1 H010ToAR30
+runbenchmark1 HalfFloatPlane
+runbenchmark1 I010ToAB30
+runbenchmark1 I010ToAR30
+runbenchmark1 I420Copy
+runbenchmark1 I420Psnr
+runbenchmark1 I420Scale
+runbenchmark1 I420Ssim
+runbenchmark1 I420ToARGB
+runbenchmark1 I420ToNV12
+runbenchmark1 I420ToUYVY
+runbenchmark1 I422ToI420
+runbenchmark1 InitCpuFlags
+runbenchmark1 J420ToARGB
+runbenchmark1 NV12ToARGB
+runbenchmark1 NV12ToI420
+runbenchmark1 NV12ToI420Rotate
+runbenchmark1 SetCpuFlags
+runbenchmark1 YUY2ToI420