aboutsummaryrefslogtreecommitdiff
path: root/files/unit_test/planar_test.cc
diff options
context:
space:
mode:
authorAndroid Build Coastguard Worker <android-build-coastguard-worker@google.com>2023-07-07 04:56:02 +0000
committerAndroid Build Coastguard Worker <android-build-coastguard-worker@google.com>2023-07-07 04:56:02 +0000
commit1fa594b2aa576922fd01c702585fcdb1a2e6f311 (patch)
tree25cbeae94c26cad28a13bb089a662291b4b5d905 /files/unit_test/planar_test.cc
parentba751146302bf8c030e9d4f7f580d5e1ff36dfdb (diff)
parent3c4c137522d3a6759f2d96aee67149410ca2c1b9 (diff)
downloadlibyuv-1fa594b2aa576922fd01c702585fcdb1a2e6f311.tar.gz
Snap for 10453563 from 3c4c137522d3a6759f2d96aee67149410ca2c1b9 to mainline-extservices-releaseaml_ext_341027030
Change-Id: I9f5633c3ed357c6759beb58d98c26a0a2283896b
Diffstat (limited to 'files/unit_test/planar_test.cc')
-rw-r--r--files/unit_test/planar_test.cc1548
1 files changed, 1304 insertions, 244 deletions
diff --git a/files/unit_test/planar_test.cc b/files/unit_test/planar_test.cc
index 70f8966e..3a8c470b 100644
--- a/files/unit_test/planar_test.cc
+++ b/files/unit_test/planar_test.cc
@@ -12,9 +12,6 @@
#include <stdlib.h>
#include <time.h>
-// row.h defines SIMD_ALIGNED, overriding unit_test.h
-#include "libyuv/row.h" /* For ScaleSumSamples_Neon */
-
#include "../unit_test/unit_test.h"
#include "libyuv/compare.h"
#include "libyuv/convert.h"
@@ -24,6 +21,19 @@
#include "libyuv/cpu_id.h"
#include "libyuv/planar_functions.h"
#include "libyuv/rotate.h"
+#include "libyuv/scale.h"
+
+#ifdef ENABLE_ROW_TESTS
+// row.h defines SIMD_ALIGNED, overriding unit_test.h
+// TODO(fbarchard): Remove row.h from unittests. Test public functions.
+#include "libyuv/row.h" /* For ScaleSumSamples_Neon */
+#endif
+
+#if defined(LIBYUV_BIT_EXACT)
+#define EXPECTED_ATTENUATE_DIFF 0
+#else
+#define EXPECTED_ATTENUATE_DIFF 2
+#endif
namespace libyuv {
@@ -96,9 +106,9 @@ TEST_F(LibYUVPlanarTest, TestAttenuate) {
EXPECT_EQ(32, atten_pixels[128 * 4 + 1]);
EXPECT_EQ(21, atten_pixels[128 * 4 + 2]);
EXPECT_EQ(128, atten_pixels[128 * 4 + 3]);
- EXPECT_NEAR(255, atten_pixels[255 * 4 + 0], 1);
- EXPECT_NEAR(127, atten_pixels[255 * 4 + 1], 1);
- EXPECT_NEAR(85, atten_pixels[255 * 4 + 2], 1);
+ EXPECT_NEAR(254, atten_pixels[255 * 4 + 0], EXPECTED_ATTENUATE_DIFF);
+ EXPECT_NEAR(127, atten_pixels[255 * 4 + 1], EXPECTED_ATTENUATE_DIFF);
+ EXPECT_NEAR(85, atten_pixels[255 * 4 + 2], EXPECTED_ATTENUATE_DIFF);
EXPECT_EQ(255, atten_pixels[255 * 4 + 3]);
free_aligned_buffer_page_end(atten2_pixels);
@@ -151,31 +161,32 @@ static int TestAttenuateI(int width,
}
TEST_F(LibYUVPlanarTest, ARGBAttenuate_Any) {
- int max_diff = TestAttenuateI(benchmark_width_ - 1, benchmark_height_,
+ int max_diff = TestAttenuateI(benchmark_width_ + 1, benchmark_height_,
benchmark_iterations_, disable_cpu_flags_,
benchmark_cpu_info_, +1, 0);
- EXPECT_LE(max_diff, 2);
+
+ EXPECT_LE(max_diff, EXPECTED_ATTENUATE_DIFF);
}
TEST_F(LibYUVPlanarTest, ARGBAttenuate_Unaligned) {
int max_diff =
TestAttenuateI(benchmark_width_, benchmark_height_, benchmark_iterations_,
disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
- EXPECT_LE(max_diff, 2);
+ EXPECT_LE(max_diff, EXPECTED_ATTENUATE_DIFF);
}
TEST_F(LibYUVPlanarTest, ARGBAttenuate_Invert) {
int max_diff =
TestAttenuateI(benchmark_width_, benchmark_height_, benchmark_iterations_,
disable_cpu_flags_, benchmark_cpu_info_, -1, 0);
- EXPECT_LE(max_diff, 2);
+ EXPECT_LE(max_diff, EXPECTED_ATTENUATE_DIFF);
}
TEST_F(LibYUVPlanarTest, ARGBAttenuate_Opt) {
int max_diff =
TestAttenuateI(benchmark_width_, benchmark_height_, benchmark_iterations_,
disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
- EXPECT_LE(max_diff, 2);
+ EXPECT_LE(max_diff, EXPECTED_ATTENUATE_DIFF);
}
static int TestUnattenuateI(int width,
@@ -224,31 +235,31 @@ static int TestUnattenuateI(int width,
}
TEST_F(LibYUVPlanarTest, ARGBUnattenuate_Any) {
- int max_diff = TestUnattenuateI(benchmark_width_ - 1, benchmark_height_,
+ int max_diff = TestUnattenuateI(benchmark_width_ + 1, benchmark_height_,
benchmark_iterations_, disable_cpu_flags_,
benchmark_cpu_info_, +1, 0);
- EXPECT_LE(max_diff, 2);
+ EXPECT_LE(max_diff, EXPECTED_ATTENUATE_DIFF);
}
TEST_F(LibYUVPlanarTest, ARGBUnattenuate_Unaligned) {
int max_diff = TestUnattenuateI(benchmark_width_, benchmark_height_,
benchmark_iterations_, disable_cpu_flags_,
benchmark_cpu_info_, +1, 1);
- EXPECT_LE(max_diff, 2);
+ EXPECT_LE(max_diff, EXPECTED_ATTENUATE_DIFF);
}
TEST_F(LibYUVPlanarTest, ARGBUnattenuate_Invert) {
int max_diff = TestUnattenuateI(benchmark_width_, benchmark_height_,
benchmark_iterations_, disable_cpu_flags_,
benchmark_cpu_info_, -1, 0);
- EXPECT_LE(max_diff, 2);
+ EXPECT_LE(max_diff, EXPECTED_ATTENUATE_DIFF);
}
TEST_F(LibYUVPlanarTest, ARGBUnattenuate_Opt) {
int max_diff = TestUnattenuateI(benchmark_width_, benchmark_height_,
benchmark_iterations_, disable_cpu_flags_,
benchmark_cpu_info_, +1, 0);
- EXPECT_LE(max_diff, 2);
+ EXPECT_LE(max_diff, EXPECTED_ATTENUATE_DIFF);
}
TEST_F(LibYUVPlanarTest, TestARGBComputeCumulativeSum) {
@@ -277,6 +288,7 @@ TEST_F(LibYUVPlanarTest, TestARGBComputeCumulativeSum) {
}
}
+// near is for legacy platforms.
TEST_F(LibYUVPlanarTest, TestARGBGray) {
SIMD_ALIGNED(uint8_t orig_pixels[1280][4]);
memset(orig_pixels, 0, sizeof(orig_pixels));
@@ -313,17 +325,17 @@ TEST_F(LibYUVPlanarTest, TestARGBGray) {
orig_pixels[5][3] = 224u;
// Do 16 to test asm version.
ARGBGray(&orig_pixels[0][0], 0, 0, 0, 16, 1);
- EXPECT_EQ(30u, orig_pixels[0][0]);
- EXPECT_EQ(30u, orig_pixels[0][1]);
- EXPECT_EQ(30u, orig_pixels[0][2]);
+ EXPECT_NEAR(29u, orig_pixels[0][0], 1);
+ EXPECT_NEAR(29u, orig_pixels[0][1], 1);
+ EXPECT_NEAR(29u, orig_pixels[0][2], 1);
EXPECT_EQ(128u, orig_pixels[0][3]);
EXPECT_EQ(149u, orig_pixels[1][0]);
EXPECT_EQ(149u, orig_pixels[1][1]);
EXPECT_EQ(149u, orig_pixels[1][2]);
EXPECT_EQ(0u, orig_pixels[1][3]);
- EXPECT_EQ(76u, orig_pixels[2][0]);
- EXPECT_EQ(76u, orig_pixels[2][1]);
- EXPECT_EQ(76u, orig_pixels[2][2]);
+ EXPECT_NEAR(77u, orig_pixels[2][0], 1);
+ EXPECT_NEAR(77u, orig_pixels[2][1], 1);
+ EXPECT_NEAR(77u, orig_pixels[2][2], 1);
EXPECT_EQ(255u, orig_pixels[2][3]);
EXPECT_EQ(0u, orig_pixels[3][0]);
EXPECT_EQ(0u, orig_pixels[3][1]);
@@ -333,9 +345,9 @@ TEST_F(LibYUVPlanarTest, TestARGBGray) {
EXPECT_EQ(255u, orig_pixels[4][1]);
EXPECT_EQ(255u, orig_pixels[4][2]);
EXPECT_EQ(255u, orig_pixels[4][3]);
- EXPECT_EQ(96u, orig_pixels[5][0]);
- EXPECT_EQ(96u, orig_pixels[5][1]);
- EXPECT_EQ(96u, orig_pixels[5][2]);
+ EXPECT_NEAR(97u, orig_pixels[5][0], 1);
+ EXPECT_NEAR(97u, orig_pixels[5][1], 1);
+ EXPECT_NEAR(97u, orig_pixels[5][2], 1);
EXPECT_EQ(224u, orig_pixels[5][3]);
for (int i = 0; i < 1280; ++i) {
orig_pixels[i][0] = i;
@@ -385,30 +397,30 @@ TEST_F(LibYUVPlanarTest, TestARGBGrayTo) {
orig_pixels[5][3] = 224u;
// Do 16 to test asm version.
ARGBGrayTo(&orig_pixels[0][0], 0, &gray_pixels[0][0], 0, 16, 1);
- EXPECT_EQ(30u, gray_pixels[0][0]);
- EXPECT_EQ(30u, gray_pixels[0][1]);
- EXPECT_EQ(30u, gray_pixels[0][2]);
- EXPECT_EQ(128u, gray_pixels[0][3]);
- EXPECT_EQ(149u, gray_pixels[1][0]);
- EXPECT_EQ(149u, gray_pixels[1][1]);
- EXPECT_EQ(149u, gray_pixels[1][2]);
- EXPECT_EQ(0u, gray_pixels[1][3]);
- EXPECT_EQ(76u, gray_pixels[2][0]);
- EXPECT_EQ(76u, gray_pixels[2][1]);
- EXPECT_EQ(76u, gray_pixels[2][2]);
- EXPECT_EQ(255u, gray_pixels[2][3]);
- EXPECT_EQ(0u, gray_pixels[3][0]);
- EXPECT_EQ(0u, gray_pixels[3][1]);
- EXPECT_EQ(0u, gray_pixels[3][2]);
- EXPECT_EQ(255u, gray_pixels[3][3]);
- EXPECT_EQ(255u, gray_pixels[4][0]);
- EXPECT_EQ(255u, gray_pixels[4][1]);
- EXPECT_EQ(255u, gray_pixels[4][2]);
- EXPECT_EQ(255u, gray_pixels[4][3]);
- EXPECT_EQ(96u, gray_pixels[5][0]);
- EXPECT_EQ(96u, gray_pixels[5][1]);
- EXPECT_EQ(96u, gray_pixels[5][2]);
- EXPECT_EQ(224u, gray_pixels[5][3]);
+ EXPECT_NEAR(30u, gray_pixels[0][0], 1);
+ EXPECT_NEAR(30u, gray_pixels[0][1], 1);
+ EXPECT_NEAR(30u, gray_pixels[0][2], 1);
+ EXPECT_NEAR(128u, gray_pixels[0][3], 1);
+ EXPECT_NEAR(149u, gray_pixels[1][0], 1);
+ EXPECT_NEAR(149u, gray_pixels[1][1], 1);
+ EXPECT_NEAR(149u, gray_pixels[1][2], 1);
+ EXPECT_NEAR(0u, gray_pixels[1][3], 1);
+ EXPECT_NEAR(76u, gray_pixels[2][0], 1);
+ EXPECT_NEAR(76u, gray_pixels[2][1], 1);
+ EXPECT_NEAR(76u, gray_pixels[2][2], 1);
+ EXPECT_NEAR(255u, gray_pixels[2][3], 1);
+ EXPECT_NEAR(0u, gray_pixels[3][0], 1);
+ EXPECT_NEAR(0u, gray_pixels[3][1], 1);
+ EXPECT_NEAR(0u, gray_pixels[3][2], 1);
+ EXPECT_NEAR(255u, gray_pixels[3][3], 1);
+ EXPECT_NEAR(255u, gray_pixels[4][0], 1);
+ EXPECT_NEAR(255u, gray_pixels[4][1], 1);
+ EXPECT_NEAR(255u, gray_pixels[4][2], 1);
+ EXPECT_NEAR(255u, gray_pixels[4][3], 1);
+ EXPECT_NEAR(96u, gray_pixels[5][0], 1);
+ EXPECT_NEAR(96u, gray_pixels[5][1], 1);
+ EXPECT_NEAR(96u, gray_pixels[5][2], 1);
+ EXPECT_NEAR(224u, gray_pixels[5][3], 1);
for (int i = 0; i < 1280; ++i) {
orig_pixels[i][0] = i;
orig_pixels[i][1] = i / 2;
@@ -418,6 +430,20 @@ TEST_F(LibYUVPlanarTest, TestARGBGrayTo) {
for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
ARGBGrayTo(&orig_pixels[0][0], 0, &gray_pixels[0][0], 0, 1280, 1);
}
+
+ for (int i = 0; i < 256; ++i) {
+ orig_pixels[i][0] = i;
+ orig_pixels[i][1] = i;
+ orig_pixels[i][2] = i;
+ orig_pixels[i][3] = i;
+ }
+ ARGBGray(&orig_pixels[0][0], 0, 0, 0, 256, 1);
+ for (int i = 0; i < 256; ++i) {
+ EXPECT_EQ(i, orig_pixels[i][0]);
+ EXPECT_EQ(i, orig_pixels[i][1]);
+ EXPECT_EQ(i, orig_pixels[i][2]);
+ EXPECT_EQ(i, orig_pixels[i][3]);
+ }
}
TEST_F(LibYUVPlanarTest, TestARGBSepia) {
@@ -763,27 +789,75 @@ TEST_F(LibYUVPlanarTest, TestARGBQuantize) {
}
}
-TEST_F(LibYUVPlanarTest, TestARGBMirror) {
- SIMD_ALIGNED(uint8_t orig_pixels[1280][4]);
- SIMD_ALIGNED(uint8_t dst_pixels[1280][4]);
+TEST_F(LibYUVPlanarTest, ARGBMirror_Opt) {
+ align_buffer_page_end(src_pixels, benchmark_width_ * benchmark_height_ * 4);
+ align_buffer_page_end(dst_pixels_opt,
+ benchmark_width_ * benchmark_height_ * 4);
+ align_buffer_page_end(dst_pixels_c, benchmark_width_ * benchmark_height_ * 4);
- for (int i = 0; i < 1280; ++i) {
- orig_pixels[i][0] = i;
- orig_pixels[i][1] = i / 2;
- orig_pixels[i][2] = i / 3;
- orig_pixels[i][3] = i / 4;
+ MemRandomize(src_pixels, benchmark_width_ * benchmark_height_ * 4);
+ MaskCpuFlags(disable_cpu_flags_);
+ ARGBMirror(src_pixels, benchmark_width_ * 4, dst_pixels_c,
+ benchmark_width_ * 4, benchmark_width_, benchmark_height_);
+ MaskCpuFlags(benchmark_cpu_info_);
+
+ for (int i = 0; i < benchmark_iterations_; ++i) {
+ ARGBMirror(src_pixels, benchmark_width_ * 4, dst_pixels_opt,
+ benchmark_width_ * 4, benchmark_width_, benchmark_height_);
}
- ARGBMirror(&orig_pixels[0][0], 0, &dst_pixels[0][0], 0, 1280, 1);
+ for (int i = 0; i < benchmark_width_ * benchmark_height_ * 4; ++i) {
+ EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
+ }
+ free_aligned_buffer_page_end(src_pixels);
+ free_aligned_buffer_page_end(dst_pixels_opt);
+ free_aligned_buffer_page_end(dst_pixels_c);
+}
- for (int i = 0; i < 1280; ++i) {
- EXPECT_EQ(i & 255, dst_pixels[1280 - 1 - i][0]);
- EXPECT_EQ((i / 2) & 255, dst_pixels[1280 - 1 - i][1]);
- EXPECT_EQ((i / 3) & 255, dst_pixels[1280 - 1 - i][2]);
- EXPECT_EQ((i / 4) & 255, dst_pixels[1280 - 1 - i][3]);
+TEST_F(LibYUVPlanarTest, MirrorPlane_Opt) {
+ align_buffer_page_end(src_pixels, benchmark_width_ * benchmark_height_);
+ align_buffer_page_end(dst_pixels_opt, benchmark_width_ * benchmark_height_);
+ align_buffer_page_end(dst_pixels_c, benchmark_width_ * benchmark_height_);
+
+ MemRandomize(src_pixels, benchmark_width_ * benchmark_height_);
+ MaskCpuFlags(disable_cpu_flags_);
+ MirrorPlane(src_pixels, benchmark_width_, dst_pixels_c, benchmark_width_,
+ benchmark_width_, benchmark_height_);
+ MaskCpuFlags(benchmark_cpu_info_);
+
+ for (int i = 0; i < benchmark_iterations_; ++i) {
+ MirrorPlane(src_pixels, benchmark_width_, dst_pixels_opt, benchmark_width_,
+ benchmark_width_, benchmark_height_);
}
- for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
- ARGBMirror(&orig_pixels[0][0], 0, &dst_pixels[0][0], 0, 1280, 1);
+ for (int i = 0; i < benchmark_width_ * benchmark_height_; ++i) {
+ EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
+ }
+ free_aligned_buffer_page_end(src_pixels);
+ free_aligned_buffer_page_end(dst_pixels_opt);
+ free_aligned_buffer_page_end(dst_pixels_c);
+}
+
+TEST_F(LibYUVPlanarTest, MirrorUVPlane_Opt) {
+ align_buffer_page_end(src_pixels, benchmark_width_ * benchmark_height_ * 2);
+ align_buffer_page_end(dst_pixels_opt,
+ benchmark_width_ * benchmark_height_ * 2);
+ align_buffer_page_end(dst_pixels_c, benchmark_width_ * benchmark_height_ * 2);
+
+ MemRandomize(src_pixels, benchmark_width_ * benchmark_height_ * 2);
+ MaskCpuFlags(disable_cpu_flags_);
+ MirrorUVPlane(src_pixels, benchmark_width_ * 2, dst_pixels_c,
+ benchmark_width_ * 2, benchmark_width_, benchmark_height_);
+ MaskCpuFlags(benchmark_cpu_info_);
+
+ for (int i = 0; i < benchmark_iterations_; ++i) {
+ MirrorUVPlane(src_pixels, benchmark_width_ * 2, dst_pixels_opt,
+ benchmark_width_ * 2, benchmark_width_, benchmark_height_);
}
+ for (int i = 0; i < benchmark_width_ * benchmark_height_ * 2; ++i) {
+ EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
+ }
+ free_aligned_buffer_page_end(src_pixels);
+ free_aligned_buffer_page_end(dst_pixels_opt);
+ free_aligned_buffer_page_end(dst_pixels_c);
}
TEST_F(LibYUVPlanarTest, TestShade) {
@@ -1006,10 +1080,91 @@ TEST_F(LibYUVPlanarTest, TestInterpolatePlane) {
}
}
+TEST_F(LibYUVPlanarTest, TestInterpolatePlane_16) {
+ SIMD_ALIGNED(uint16_t orig_pixels_0[1280]);
+ SIMD_ALIGNED(uint16_t orig_pixels_1[1280]);
+ SIMD_ALIGNED(uint16_t interpolate_pixels[1280]);
+ memset(orig_pixels_0, 0, sizeof(orig_pixels_0));
+ memset(orig_pixels_1, 0, sizeof(orig_pixels_1));
+
+ orig_pixels_0[0] = 16u;
+ orig_pixels_0[1] = 32u;
+ orig_pixels_0[2] = 64u;
+ orig_pixels_0[3] = 128u;
+ orig_pixels_0[4] = 0u;
+ orig_pixels_0[5] = 0u;
+ orig_pixels_0[6] = 0u;
+ orig_pixels_0[7] = 255u;
+ orig_pixels_0[8] = 0u;
+ orig_pixels_0[9] = 0u;
+ orig_pixels_0[10] = 0u;
+ orig_pixels_0[11] = 0u;
+ orig_pixels_0[12] = 0u;
+ orig_pixels_0[13] = 0u;
+ orig_pixels_0[14] = 0u;
+ orig_pixels_0[15] = 0u;
+
+ orig_pixels_1[0] = 0u;
+ orig_pixels_1[1] = 0u;
+ orig_pixels_1[2] = 0u;
+ orig_pixels_1[3] = 0u;
+ orig_pixels_1[4] = 0u;
+ orig_pixels_1[5] = 0u;
+ orig_pixels_1[6] = 0u;
+ orig_pixels_1[7] = 0u;
+ orig_pixels_1[8] = 0u;
+ orig_pixels_1[9] = 0u;
+ orig_pixels_1[10] = 0u;
+ orig_pixels_1[11] = 0u;
+ orig_pixels_1[12] = 255u;
+ orig_pixels_1[13] = 255u;
+ orig_pixels_1[14] = 255u;
+ orig_pixels_1[15] = 255u;
+
+ InterpolatePlane_16(&orig_pixels_0[0], 0, &orig_pixels_1[0], 0,
+ &interpolate_pixels[0], 0, 16, 1, 128);
+ EXPECT_EQ(8u, interpolate_pixels[0]);
+ EXPECT_EQ(16u, interpolate_pixels[1]);
+ EXPECT_EQ(32u, interpolate_pixels[2]);
+ EXPECT_EQ(64u, interpolate_pixels[3]);
+ EXPECT_EQ(0u, interpolate_pixels[4]);
+ EXPECT_EQ(0u, interpolate_pixels[5]);
+ EXPECT_EQ(0u, interpolate_pixels[6]);
+ EXPECT_EQ(128u, interpolate_pixels[7]);
+ EXPECT_EQ(0u, interpolate_pixels[8]);
+ EXPECT_EQ(0u, interpolate_pixels[9]);
+ EXPECT_EQ(0u, interpolate_pixels[10]);
+ EXPECT_EQ(0u, interpolate_pixels[11]);
+ EXPECT_EQ(128u, interpolate_pixels[12]);
+ EXPECT_EQ(128u, interpolate_pixels[13]);
+ EXPECT_EQ(128u, interpolate_pixels[14]);
+ EXPECT_EQ(128u, interpolate_pixels[15]);
+
+ InterpolatePlane_16(&orig_pixels_0[0], 0, &orig_pixels_1[0], 0,
+ &interpolate_pixels[0], 0, 16, 1, 0);
+ EXPECT_EQ(16u, interpolate_pixels[0]);
+ EXPECT_EQ(32u, interpolate_pixels[1]);
+ EXPECT_EQ(64u, interpolate_pixels[2]);
+ EXPECT_EQ(128u, interpolate_pixels[3]);
+
+ InterpolatePlane_16(&orig_pixels_0[0], 0, &orig_pixels_1[0], 0,
+ &interpolate_pixels[0], 0, 16, 1, 192);
+
+ EXPECT_EQ(4u, interpolate_pixels[0]);
+ EXPECT_EQ(8u, interpolate_pixels[1]);
+ EXPECT_EQ(16u, interpolate_pixels[2]);
+ EXPECT_EQ(32u, interpolate_pixels[3]);
+
+ for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
+ InterpolatePlane_16(&orig_pixels_0[0], 0, &orig_pixels_1[0], 0,
+ &interpolate_pixels[0], 0, 1280, 1, 123);
+ }
+}
+
#define TESTTERP(FMT_A, BPP_A, STRIDE_A, FMT_B, BPP_B, STRIDE_B, W1280, TERP, \
N, NEG, OFF) \
TEST_F(LibYUVPlanarTest, ARGBInterpolate##TERP##N) { \
- const int kWidth = ((W1280) > 0) ? (W1280) : 1; \
+ const int kWidth = W1280; \
const int kHeight = benchmark_height_; \
const int kStrideA = \
(kWidth * BPP_A + STRIDE_A - 1) / STRIDE_A * STRIDE_A; \
@@ -1041,7 +1196,7 @@ TEST_F(LibYUVPlanarTest, TestInterpolatePlane) {
}
#define TESTINTERPOLATE(TERP) \
- TESTTERP(ARGB, 4, 1, ARGB, 4, 1, benchmark_width_ - 1, TERP, _Any, +, 0) \
+ TESTTERP(ARGB, 4, 1, ARGB, 4, 1, benchmark_width_ + 1, TERP, _Any, +, 0) \
TESTTERP(ARGB, 4, 1, ARGB, 4, 1, benchmark_width_, TERP, _Unaligned, +, 1) \
TESTTERP(ARGB, 4, 1, ARGB, 4, 1, benchmark_width_, TERP, _Invert, -, 0) \
TESTTERP(ARGB, 4, 1, ARGB, 4, 1, benchmark_width_, TERP, _Opt, +, 0)
@@ -1058,7 +1213,8 @@ static int TestBlend(int width,
int disable_cpu_flags,
int benchmark_cpu_info,
int invert,
- int off) {
+ int off,
+ int attenuate) {
if (width < 1) {
width = 1;
}
@@ -1072,10 +1228,12 @@ static int TestBlend(int width,
src_argb_a[i + off] = (fastrand() & 0xff);
src_argb_b[i + off] = (fastrand() & 0xff);
}
- ARGBAttenuate(src_argb_a + off, kStride, src_argb_a + off, kStride, width,
- height);
- ARGBAttenuate(src_argb_b + off, kStride, src_argb_b + off, kStride, width,
- height);
+ MemRandomize(src_argb_a, kStride * height + off);
+ MemRandomize(src_argb_b, kStride * height + off);
+ if (attenuate) {
+ ARGBAttenuate(src_argb_a + off, kStride, src_argb_a + off, kStride, width,
+ height);
+ }
memset(dst_argb_c, 255, kStride * height);
memset(dst_argb_opt, 255, kStride * height);
@@ -1104,29 +1262,36 @@ static int TestBlend(int width,
TEST_F(LibYUVPlanarTest, ARGBBlend_Any) {
int max_diff =
- TestBlend(benchmark_width_ - 4, benchmark_height_, benchmark_iterations_,
- disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
+ TestBlend(benchmark_width_ + 1, benchmark_height_, benchmark_iterations_,
+ disable_cpu_flags_, benchmark_cpu_info_, +1, 0, 1);
EXPECT_LE(max_diff, 1);
}
TEST_F(LibYUVPlanarTest, ARGBBlend_Unaligned) {
int max_diff =
TestBlend(benchmark_width_, benchmark_height_, benchmark_iterations_,
- disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
+ disable_cpu_flags_, benchmark_cpu_info_, +1, 1, 1);
EXPECT_LE(max_diff, 1);
}
TEST_F(LibYUVPlanarTest, ARGBBlend_Invert) {
int max_diff =
TestBlend(benchmark_width_, benchmark_height_, benchmark_iterations_,
- disable_cpu_flags_, benchmark_cpu_info_, -1, 0);
+ disable_cpu_flags_, benchmark_cpu_info_, -1, 0, 1);
+ EXPECT_LE(max_diff, 1);
+}
+
+TEST_F(LibYUVPlanarTest, ARGBBlend_Unattenuated) {
+ int max_diff =
+ TestBlend(benchmark_width_, benchmark_height_, benchmark_iterations_,
+ disable_cpu_flags_, benchmark_cpu_info_, +1, 0, 0);
EXPECT_LE(max_diff, 1);
}
TEST_F(LibYUVPlanarTest, ARGBBlend_Opt) {
int max_diff =
TestBlend(benchmark_width_, benchmark_height_, benchmark_iterations_,
- disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
+ disable_cpu_flags_, benchmark_cpu_info_, +1, 0, 1);
EXPECT_LE(max_diff, 1);
}
@@ -1203,7 +1368,7 @@ TEST_F(LibYUVPlanarTest, BlendPlane_Unaligned) {
disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
}
TEST_F(LibYUVPlanarTest, BlendPlane_Any) {
- TestBlendPlane(benchmark_width_ - 4, benchmark_height_, benchmark_iterations_,
+ TestBlendPlane(benchmark_width_ + 1, benchmark_height_, benchmark_iterations_,
disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
}
TEST_F(LibYUVPlanarTest, BlendPlane_Invert) {
@@ -1298,7 +1463,7 @@ TEST_F(LibYUVPlanarTest, I420Blend_Unaligned) {
// TODO(fbarchard): DISABLED because _Any uses C. Avoid C and re-enable.
TEST_F(LibYUVPlanarTest, DISABLED_I420Blend_Any) {
- TestI420Blend(benchmark_width_ - 4, benchmark_height_, benchmark_iterations_,
+ TestI420Blend(benchmark_width_ + 1, benchmark_height_, benchmark_iterations_,
disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
}
TEST_F(LibYUVPlanarTest, I420Blend_Invert) {
@@ -1400,6 +1565,212 @@ TEST_F(LibYUVPlanarTest, TestCopyPlane) {
EXPECT_EQ(0, err);
}
+TEST_F(LibYUVPlanarTest, CopyPlane_Opt) {
+ int i;
+ int y_plane_size = benchmark_width_ * benchmark_height_;
+ align_buffer_page_end(orig_y, y_plane_size);
+ align_buffer_page_end(dst_c, y_plane_size);
+ align_buffer_page_end(dst_opt, y_plane_size);
+
+ MemRandomize(orig_y, y_plane_size);
+ memset(dst_c, 1, y_plane_size);
+ memset(dst_opt, 2, y_plane_size);
+
+ // Disable all optimizations.
+ MaskCpuFlags(disable_cpu_flags_);
+ for (i = 0; i < benchmark_iterations_; i++) {
+ CopyPlane(orig_y, benchmark_width_, dst_c, benchmark_width_,
+ benchmark_width_, benchmark_height_);
+ }
+
+ // Enable optimizations.
+ MaskCpuFlags(benchmark_cpu_info_);
+ for (i = 0; i < benchmark_iterations_; i++) {
+ CopyPlane(orig_y, benchmark_width_, dst_opt, benchmark_width_,
+ benchmark_width_, benchmark_height_);
+ }
+
+ for (i = 0; i < y_plane_size; ++i) {
+ EXPECT_EQ(dst_c[i], dst_opt[i]);
+ }
+
+ free_aligned_buffer_page_end(orig_y);
+ free_aligned_buffer_page_end(dst_c);
+ free_aligned_buffer_page_end(dst_opt);
+}
+
+TEST_F(LibYUVPlanarTest, TestCopyPlaneZero) {
+ // Test to verify copying a rect with a zero height or width does
+ // not touch destination memory.
+ uint8_t src = 42;
+ uint8_t dst = 0;
+
+ // Disable all optimizations.
+ MaskCpuFlags(disable_cpu_flags_);
+ CopyPlane(&src, 0, &dst, 0, 0, 0);
+ EXPECT_EQ(src, 42);
+ EXPECT_EQ(dst, 0);
+
+ CopyPlane(&src, 1, &dst, 1, 1, 0);
+ EXPECT_EQ(src, 42);
+ EXPECT_EQ(dst, 0);
+
+ CopyPlane(&src, 1, &dst, 1, 0, 1);
+ EXPECT_EQ(src, 42);
+ EXPECT_EQ(dst, 0);
+
+ // Enable optimizations.
+ MaskCpuFlags(benchmark_cpu_info_);
+ CopyPlane(&src, 0, &dst, 0, 0, 0);
+ EXPECT_EQ(src, 42);
+ EXPECT_EQ(dst, 0);
+
+ CopyPlane(&src, 1, &dst, 1, 1, 0);
+ EXPECT_EQ(src, 42);
+ EXPECT_EQ(dst, 0);
+
+ CopyPlane(&src, 1, &dst, 1, 0, 1);
+ EXPECT_EQ(src, 42);
+ EXPECT_EQ(dst, 0);
+}
+
+TEST_F(LibYUVPlanarTest, TestDetilePlane) {
+ int i, j;
+
+ // orig is tiled. Allocate enough memory for tiles.
+ int orig_width = (benchmark_width_ + 15) & ~15;
+ int orig_height = (benchmark_height_ + 15) & ~15;
+ int orig_plane_size = orig_width * orig_height;
+ int y_plane_size = benchmark_width_ * benchmark_height_;
+ align_buffer_page_end(orig_y, orig_plane_size);
+ align_buffer_page_end(dst_c, y_plane_size);
+ align_buffer_page_end(dst_opt, y_plane_size);
+
+ MemRandomize(orig_y, orig_plane_size);
+ memset(dst_c, 0, y_plane_size);
+ memset(dst_opt, 0, y_plane_size);
+
+ // Disable all optimizations.
+ MaskCpuFlags(disable_cpu_flags_);
+ for (j = 0; j < benchmark_iterations_; j++) {
+ DetilePlane(orig_y, orig_width, dst_c, benchmark_width_, benchmark_width_,
+ benchmark_height_, 16);
+ }
+
+ // Enable optimizations.
+ MaskCpuFlags(benchmark_cpu_info_);
+ for (j = 0; j < benchmark_iterations_; j++) {
+ DetilePlane(orig_y, orig_width, dst_opt, benchmark_width_, benchmark_width_,
+ benchmark_height_, 16);
+ }
+
+ for (i = 0; i < y_plane_size; ++i) {
+ EXPECT_EQ(dst_c[i], dst_opt[i]);
+ }
+
+ free_aligned_buffer_page_end(orig_y);
+ free_aligned_buffer_page_end(dst_c);
+ free_aligned_buffer_page_end(dst_opt);
+}
+
+// Compares DetileSplitUV to 2 step Detile + SplitUV
+TEST_F(LibYUVPlanarTest, TestDetileSplitUVPlane_Correctness) {
+ int i, j;
+
+ // orig is tiled. Allocate enough memory for tiles.
+ int orig_width = (benchmark_width_ + 15) & ~15;
+ int orig_height = (benchmark_height_ + 15) & ~15;
+ int orig_plane_size = orig_width * orig_height;
+ int uv_plane_size = ((benchmark_width_ + 1) / 2) * benchmark_height_;
+ align_buffer_page_end(orig_uv, orig_plane_size);
+ align_buffer_page_end(detiled_uv, orig_plane_size);
+ align_buffer_page_end(dst_u_two_stage, uv_plane_size);
+ align_buffer_page_end(dst_u_opt, uv_plane_size);
+ align_buffer_page_end(dst_v_two_stage, uv_plane_size);
+ align_buffer_page_end(dst_v_opt, uv_plane_size);
+
+ MemRandomize(orig_uv, orig_plane_size);
+ memset(detiled_uv, 0, orig_plane_size);
+ memset(dst_u_two_stage, 0, uv_plane_size);
+ memset(dst_u_opt, 0, uv_plane_size);
+ memset(dst_v_two_stage, 0, uv_plane_size);
+ memset(dst_v_opt, 0, uv_plane_size);
+
+ DetileSplitUVPlane(orig_uv, orig_width, dst_u_opt, (benchmark_width_ + 1) / 2,
+ dst_v_opt, (benchmark_width_ + 1) / 2, benchmark_width_,
+ benchmark_height_, 16);
+
+ // Benchmark 2 step conversion for comparison.
+ for (j = 0; j < benchmark_iterations_; j++) {
+ DetilePlane(orig_uv, orig_width, detiled_uv, benchmark_width_,
+ benchmark_width_, benchmark_height_, 16);
+ SplitUVPlane(detiled_uv, orig_width, dst_u_two_stage,
+ (benchmark_width_ + 1) / 2, dst_v_two_stage,
+ (benchmark_width_ + 1) / 2, (benchmark_width_ + 1) / 2,
+ benchmark_height_);
+ }
+
+ for (i = 0; i < uv_plane_size; ++i) {
+ EXPECT_EQ(dst_u_two_stage[i], dst_u_opt[i]);
+ EXPECT_EQ(dst_v_two_stage[i], dst_v_opt[i]);
+ }
+
+ free_aligned_buffer_page_end(orig_uv);
+ free_aligned_buffer_page_end(detiled_uv);
+ free_aligned_buffer_page_end(dst_u_two_stage);
+ free_aligned_buffer_page_end(dst_u_opt);
+ free_aligned_buffer_page_end(dst_v_two_stage);
+ free_aligned_buffer_page_end(dst_v_opt);
+}
+
+TEST_F(LibYUVPlanarTest, TestDetileSplitUVPlane_Benchmark) {
+ int i, j;
+
+ // orig is tiled. Allocate enough memory for tiles.
+ int orig_width = (benchmark_width_ + 15) & ~15;
+ int orig_height = (benchmark_height_ + 15) & ~15;
+ int orig_plane_size = orig_width * orig_height;
+ int uv_plane_size = ((benchmark_width_ + 1) / 2) * benchmark_height_;
+ align_buffer_page_end(orig_uv, orig_plane_size);
+ align_buffer_page_end(dst_u_c, uv_plane_size);
+ align_buffer_page_end(dst_u_opt, uv_plane_size);
+ align_buffer_page_end(dst_v_c, uv_plane_size);
+ align_buffer_page_end(dst_v_opt, uv_plane_size);
+
+ MemRandomize(orig_uv, orig_plane_size);
+ memset(dst_u_c, 0, uv_plane_size);
+ memset(dst_u_opt, 0, uv_plane_size);
+ memset(dst_v_c, 0, uv_plane_size);
+ memset(dst_v_opt, 0, uv_plane_size);
+
+ // Disable all optimizations.
+ MaskCpuFlags(disable_cpu_flags_);
+
+ DetileSplitUVPlane(orig_uv, orig_width, dst_u_c, (benchmark_width_ + 1) / 2,
+ dst_v_c, (benchmark_width_ + 1) / 2, benchmark_width_,
+ benchmark_height_, 16);
+
+ // Enable optimizations.
+ MaskCpuFlags(benchmark_cpu_info_);
+
+ for (j = 0; j < benchmark_iterations_; j++) {
+ DetileSplitUVPlane(
+ orig_uv, orig_width, dst_u_opt, (benchmark_width_ + 1) / 2, dst_v_opt,
+ (benchmark_width_ + 1) / 2, benchmark_width_, benchmark_height_, 16);
+ }
+
+ for (i = 0; i < uv_plane_size; ++i) {
+ EXPECT_EQ(dst_u_c[i], dst_u_opt[i]);
+ EXPECT_EQ(dst_v_c[i], dst_v_opt[i]);
+ }
+
+ free_aligned_buffer_page_end(orig_uv);
+ free_aligned_buffer_page_end(dst_u_c);
+ free_aligned_buffer_page_end(dst_u_opt);
+ free_aligned_buffer_page_end(dst_v_c);
+ free_aligned_buffer_page_end(dst_v_opt);
+}
+
static int TestMultiply(int width,
int height,
int benchmark_iterations,
@@ -1447,7 +1818,7 @@ static int TestMultiply(int width,
}
TEST_F(LibYUVPlanarTest, ARGBMultiply_Any) {
- int max_diff = TestMultiply(benchmark_width_ - 1, benchmark_height_,
+ int max_diff = TestMultiply(benchmark_width_ + 1, benchmark_height_,
benchmark_iterations_, disable_cpu_flags_,
benchmark_cpu_info_, +1, 0);
EXPECT_LE(max_diff, 1);
@@ -1522,7 +1893,7 @@ static int TestAdd(int width,
TEST_F(LibYUVPlanarTest, ARGBAdd_Any) {
int max_diff =
- TestAdd(benchmark_width_ - 1, benchmark_height_, benchmark_iterations_,
+ TestAdd(benchmark_width_ + 1, benchmark_height_, benchmark_iterations_,
disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
EXPECT_LE(max_diff, 1);
}
@@ -1595,7 +1966,7 @@ static int TestSubtract(int width,
}
TEST_F(LibYUVPlanarTest, ARGBSubtract_Any) {
- int max_diff = TestSubtract(benchmark_width_ - 1, benchmark_height_,
+ int max_diff = TestSubtract(benchmark_width_ + 1, benchmark_height_,
benchmark_iterations_, disable_cpu_flags_,
benchmark_cpu_info_, +1, 0);
EXPECT_LE(max_diff, 1);
@@ -1668,7 +2039,7 @@ static int TestSobel(int width,
TEST_F(LibYUVPlanarTest, ARGBSobel_Any) {
int max_diff =
- TestSobel(benchmark_width_ - 1, benchmark_height_, benchmark_iterations_,
+ TestSobel(benchmark_width_ + 1, benchmark_height_, benchmark_iterations_,
disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
EXPECT_EQ(0, max_diff);
}
@@ -1741,7 +2112,7 @@ static int TestSobelToPlane(int width,
}
TEST_F(LibYUVPlanarTest, ARGBSobelToPlane_Any) {
- int max_diff = TestSobelToPlane(benchmark_width_ - 1, benchmark_height_,
+ int max_diff = TestSobelToPlane(benchmark_width_ + 1, benchmark_height_,
benchmark_iterations_, disable_cpu_flags_,
benchmark_cpu_info_, +1, 0);
EXPECT_EQ(0, max_diff);
@@ -1813,7 +2184,7 @@ static int TestSobelXY(int width,
}
TEST_F(LibYUVPlanarTest, ARGBSobelXY_Any) {
- int max_diff = TestSobelXY(benchmark_width_ - 1, benchmark_height_,
+ int max_diff = TestSobelXY(benchmark_width_ + 1, benchmark_height_,
benchmark_iterations_, disable_cpu_flags_,
benchmark_cpu_info_, +1, 0);
EXPECT_EQ(0, max_diff);
@@ -1889,29 +2260,35 @@ static int TestBlur(int width,
return max_diff;
}
+#if !defined(DISABLE_SLOW_TESTS) || defined(__x86_64__) || defined(__i386__)
+#define DISABLED_ARM(name) name
+#else
+#define DISABLED_ARM(name) DISABLED_##name
+#endif
+
static const int kBlurSize = 55;
-TEST_F(LibYUVPlanarTest, ARGBBlur_Any) {
+TEST_F(LibYUVPlanarTest, DISABLED_ARM(ARGBBlur_Any)) {
int max_diff =
- TestBlur(benchmark_width_ - 1, benchmark_height_, benchmark_iterations_,
+ TestBlur(benchmark_width_ + 1, benchmark_height_, benchmark_iterations_,
disable_cpu_flags_, benchmark_cpu_info_, +1, 0, kBlurSize);
EXPECT_LE(max_diff, 1);
}
-TEST_F(LibYUVPlanarTest, ARGBBlur_Unaligned) {
+TEST_F(LibYUVPlanarTest, DISABLED_ARM(ARGBBlur_Unaligned)) {
int max_diff =
TestBlur(benchmark_width_, benchmark_height_, benchmark_iterations_,
disable_cpu_flags_, benchmark_cpu_info_, +1, 1, kBlurSize);
EXPECT_LE(max_diff, 1);
}
-TEST_F(LibYUVPlanarTest, ARGBBlur_Invert) {
+TEST_F(LibYUVPlanarTest, DISABLED_ARM(ARGBBlur_Invert)) {
int max_diff =
TestBlur(benchmark_width_, benchmark_height_, benchmark_iterations_,
disable_cpu_flags_, benchmark_cpu_info_, -1, 0, kBlurSize);
EXPECT_LE(max_diff, 1);
}
-TEST_F(LibYUVPlanarTest, ARGBBlur_Opt) {
+TEST_F(LibYUVPlanarTest, DISABLED_ARM(ARGBBlur_Opt)) {
int max_diff =
TestBlur(benchmark_width_, benchmark_height_, benchmark_iterations_,
disable_cpu_flags_, benchmark_cpu_info_, +1, 0, kBlurSize);
@@ -1919,35 +2296,35 @@ TEST_F(LibYUVPlanarTest, ARGBBlur_Opt) {
}
static const int kBlurSmallSize = 5;
-TEST_F(LibYUVPlanarTest, ARGBBlurSmall_Any) {
+TEST_F(LibYUVPlanarTest, DISABLED_ARM(ARGBBlurSmall_Any)) {
int max_diff =
- TestBlur(benchmark_width_ - 1, benchmark_height_, benchmark_iterations_,
+ TestBlur(benchmark_width_ + 1, benchmark_height_, benchmark_iterations_,
disable_cpu_flags_, benchmark_cpu_info_, +1, 0, kBlurSmallSize);
EXPECT_LE(max_diff, 1);
}
-TEST_F(LibYUVPlanarTest, ARGBBlurSmall_Unaligned) {
+TEST_F(LibYUVPlanarTest, DISABLED_ARM(ARGBBlurSmall_Unaligned)) {
int max_diff =
TestBlur(benchmark_width_, benchmark_height_, benchmark_iterations_,
disable_cpu_flags_, benchmark_cpu_info_, +1, 1, kBlurSmallSize);
EXPECT_LE(max_diff, 1);
}
-TEST_F(LibYUVPlanarTest, ARGBBlurSmall_Invert) {
+TEST_F(LibYUVPlanarTest, DISABLED_ARM(ARGBBlurSmall_Invert)) {
int max_diff =
TestBlur(benchmark_width_, benchmark_height_, benchmark_iterations_,
disable_cpu_flags_, benchmark_cpu_info_, -1, 0, kBlurSmallSize);
EXPECT_LE(max_diff, 1);
}
-TEST_F(LibYUVPlanarTest, ARGBBlurSmall_Opt) {
+TEST_F(LibYUVPlanarTest, DISABLED_ARM(ARGBBlurSmall_Opt)) {
int max_diff =
TestBlur(benchmark_width_, benchmark_height_, benchmark_iterations_,
disable_cpu_flags_, benchmark_cpu_info_, +1, 0, kBlurSmallSize);
EXPECT_LE(max_diff, 1);
}
-TEST_F(LibYUVPlanarTest, TestARGBPolynomial) {
+TEST_F(LibYUVPlanarTest, DISABLED_ARM(TestARGBPolynomial)) {
SIMD_ALIGNED(uint8_t orig_pixels[1280][4]);
SIMD_ALIGNED(uint8_t dst_pixels_opt[1280][4]);
SIMD_ALIGNED(uint8_t dst_pixels_c[1280][4]);
@@ -2426,7 +2803,7 @@ static int TestARGBRect(int width,
}
TEST_F(LibYUVPlanarTest, ARGBRect_Any) {
- int max_diff = TestARGBRect(benchmark_width_ - 1, benchmark_height_,
+ int max_diff = TestARGBRect(benchmark_width_ + 1, benchmark_height_,
benchmark_iterations_, disable_cpu_flags_,
benchmark_cpu_info_, +1, 0, 4);
EXPECT_EQ(0, max_diff);
@@ -2454,7 +2831,7 @@ TEST_F(LibYUVPlanarTest, ARGBRect_Opt) {
}
TEST_F(LibYUVPlanarTest, SetPlane_Any) {
- int max_diff = TestARGBRect(benchmark_width_ - 1, benchmark_height_,
+ int max_diff = TestARGBRect(benchmark_width_ + 1, benchmark_height_,
benchmark_iterations_, disable_cpu_flags_,
benchmark_cpu_info_, +1, 0, 1);
EXPECT_EQ(0, max_diff);
@@ -2483,33 +2860,24 @@ TEST_F(LibYUVPlanarTest, SetPlane_Opt) {
TEST_F(LibYUVPlanarTest, MergeUVPlane_Opt) {
const int kPixels = benchmark_width_ * benchmark_height_;
- align_buffer_page_end(src_pixels, kPixels * 2);
- align_buffer_page_end(tmp_pixels_u, kPixels);
- align_buffer_page_end(tmp_pixels_v, kPixels);
+ align_buffer_page_end(src_pixels_u, kPixels);
+ align_buffer_page_end(src_pixels_v, kPixels);
align_buffer_page_end(dst_pixels_opt, kPixels * 2);
align_buffer_page_end(dst_pixels_c, kPixels * 2);
- MemRandomize(src_pixels, kPixels * 2);
- MemRandomize(tmp_pixels_u, kPixels);
- MemRandomize(tmp_pixels_v, kPixels);
+ MemRandomize(src_pixels_u, kPixels);
+ MemRandomize(src_pixels_v, kPixels);
MemRandomize(dst_pixels_opt, kPixels * 2);
MemRandomize(dst_pixels_c, kPixels * 2);
MaskCpuFlags(disable_cpu_flags_);
- SplitUVPlane(src_pixels, benchmark_width_ * 2, tmp_pixels_u, benchmark_width_,
- tmp_pixels_v, benchmark_width_, benchmark_width_,
- benchmark_height_);
- MergeUVPlane(tmp_pixels_u, benchmark_width_, tmp_pixels_v, benchmark_width_,
+ MergeUVPlane(src_pixels_u, benchmark_width_, src_pixels_v, benchmark_width_,
dst_pixels_c, benchmark_width_ * 2, benchmark_width_,
benchmark_height_);
MaskCpuFlags(benchmark_cpu_info_);
- SplitUVPlane(src_pixels, benchmark_width_ * 2, tmp_pixels_u, benchmark_width_,
- tmp_pixels_v, benchmark_width_, benchmark_width_,
- benchmark_height_);
-
for (int i = 0; i < benchmark_iterations_; ++i) {
- MergeUVPlane(tmp_pixels_u, benchmark_width_, tmp_pixels_v, benchmark_width_,
+ MergeUVPlane(src_pixels_u, benchmark_width_, src_pixels_v, benchmark_width_,
dst_pixels_opt, benchmark_width_ * 2, benchmark_width_,
benchmark_height_);
}
@@ -2518,9 +2886,43 @@ TEST_F(LibYUVPlanarTest, MergeUVPlane_Opt) {
EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
}
- free_aligned_buffer_page_end(src_pixels);
- free_aligned_buffer_page_end(tmp_pixels_u);
- free_aligned_buffer_page_end(tmp_pixels_v);
+ free_aligned_buffer_page_end(src_pixels_u);
+ free_aligned_buffer_page_end(src_pixels_v);
+ free_aligned_buffer_page_end(dst_pixels_opt);
+ free_aligned_buffer_page_end(dst_pixels_c);
+}
+
+// 16 bit channel split and merge
+TEST_F(LibYUVPlanarTest, MergeUVPlane_16_Opt) {
+ const int kPixels = benchmark_width_ * benchmark_height_;
+ align_buffer_page_end(src_pixels_u, kPixels * 2);
+ align_buffer_page_end(src_pixels_v, kPixels * 2);
+ align_buffer_page_end(dst_pixels_opt, kPixels * 2 * 2);
+ align_buffer_page_end(dst_pixels_c, kPixels * 2 * 2);
+ MemRandomize(src_pixels_u, kPixels * 2);
+ MemRandomize(src_pixels_v, kPixels * 2);
+ MemRandomize(dst_pixels_opt, kPixels * 2 * 2);
+ MemRandomize(dst_pixels_c, kPixels * 2 * 2);
+
+ MaskCpuFlags(disable_cpu_flags_);
+ MergeUVPlane_16((const uint16_t*)src_pixels_u, benchmark_width_,
+ (const uint16_t*)src_pixels_v, benchmark_width_,
+ (uint16_t*)dst_pixels_c, benchmark_width_ * 2,
+ benchmark_width_, benchmark_height_, 12);
+ MaskCpuFlags(benchmark_cpu_info_);
+
+ for (int i = 0; i < benchmark_iterations_; ++i) {
+ MergeUVPlane_16((const uint16_t*)src_pixels_u, benchmark_width_,
+ (const uint16_t*)src_pixels_v, benchmark_width_,
+ (uint16_t*)dst_pixels_opt, benchmark_width_ * 2,
+ benchmark_width_, benchmark_height_, 12);
+ }
+
+ for (int i = 0; i < kPixels * 2 * 2; ++i) {
+ EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
+ }
+ free_aligned_buffer_page_end(src_pixels_u);
+ free_aligned_buffer_page_end(src_pixels_v);
free_aligned_buffer_page_end(dst_pixels_opt);
free_aligned_buffer_page_end(dst_pixels_c);
}
@@ -2528,47 +2930,112 @@ TEST_F(LibYUVPlanarTest, MergeUVPlane_Opt) {
TEST_F(LibYUVPlanarTest, SplitUVPlane_Opt) {
const int kPixels = benchmark_width_ * benchmark_height_;
align_buffer_page_end(src_pixels, kPixels * 2);
- align_buffer_page_end(tmp_pixels_u, kPixels);
- align_buffer_page_end(tmp_pixels_v, kPixels);
+ align_buffer_page_end(dst_pixels_u_c, kPixels);
+ align_buffer_page_end(dst_pixels_v_c, kPixels);
+ align_buffer_page_end(dst_pixels_u_opt, kPixels);
+ align_buffer_page_end(dst_pixels_v_opt, kPixels);
+
+ MemRandomize(src_pixels, kPixels * 2);
+ MemRandomize(dst_pixels_u_c, kPixels);
+ MemRandomize(dst_pixels_v_c, kPixels);
+ MemRandomize(dst_pixels_u_opt, kPixels);
+ MemRandomize(dst_pixels_v_opt, kPixels);
+
+ MaskCpuFlags(disable_cpu_flags_);
+ SplitUVPlane(src_pixels, benchmark_width_ * 2, dst_pixels_u_c,
+ benchmark_width_, dst_pixels_v_c, benchmark_width_,
+ benchmark_width_, benchmark_height_);
+ MaskCpuFlags(benchmark_cpu_info_);
+
+ for (int i = 0; i < benchmark_iterations_; ++i) {
+ SplitUVPlane(src_pixels, benchmark_width_ * 2, dst_pixels_u_opt,
+ benchmark_width_, dst_pixels_v_opt, benchmark_width_,
+ benchmark_width_, benchmark_height_);
+ }
+
+ for (int i = 0; i < kPixels; ++i) {
+ EXPECT_EQ(dst_pixels_u_c[i], dst_pixels_u_opt[i]);
+ EXPECT_EQ(dst_pixels_v_c[i], dst_pixels_v_opt[i]);
+ }
+
+ free_aligned_buffer_page_end(src_pixels);
+ free_aligned_buffer_page_end(dst_pixels_u_c);
+ free_aligned_buffer_page_end(dst_pixels_v_c);
+ free_aligned_buffer_page_end(dst_pixels_u_opt);
+ free_aligned_buffer_page_end(dst_pixels_v_opt);
+}
+
+// 16 bit channel split
+TEST_F(LibYUVPlanarTest, SplitUVPlane_16_Opt) {
+ const int kPixels = benchmark_width_ * benchmark_height_;
+ align_buffer_page_end(src_pixels, kPixels * 2 * 2);
+ align_buffer_page_end(dst_pixels_u_c, kPixels * 2);
+ align_buffer_page_end(dst_pixels_v_c, kPixels * 2);
+ align_buffer_page_end(dst_pixels_u_opt, kPixels * 2);
+ align_buffer_page_end(dst_pixels_v_opt, kPixels * 2);
+ MemRandomize(src_pixels, kPixels * 2 * 2);
+ MemRandomize(dst_pixels_u_c, kPixels * 2);
+ MemRandomize(dst_pixels_v_c, kPixels * 2);
+ MemRandomize(dst_pixels_u_opt, kPixels * 2);
+ MemRandomize(dst_pixels_v_opt, kPixels * 2);
+
+ MaskCpuFlags(disable_cpu_flags_);
+ SplitUVPlane_16((const uint16_t*)src_pixels, benchmark_width_ * 2,
+ (uint16_t*)dst_pixels_u_c, benchmark_width_,
+ (uint16_t*)dst_pixels_v_c, benchmark_width_, benchmark_width_,
+ benchmark_height_, 10);
+ MaskCpuFlags(benchmark_cpu_info_);
+
+ for (int i = 0; i < benchmark_iterations_; ++i) {
+ SplitUVPlane_16((const uint16_t*)src_pixels, benchmark_width_ * 2,
+ (uint16_t*)dst_pixels_u_opt, benchmark_width_,
+ (uint16_t*)dst_pixels_v_opt, benchmark_width_,
+ benchmark_width_, benchmark_height_, 10);
+ }
+
+ for (int i = 0; i < kPixels * 2; ++i) {
+ EXPECT_EQ(dst_pixels_u_c[i], dst_pixels_u_opt[i]);
+ EXPECT_EQ(dst_pixels_v_c[i], dst_pixels_v_opt[i]);
+ }
+ free_aligned_buffer_page_end(src_pixels);
+ free_aligned_buffer_page_end(dst_pixels_u_c);
+ free_aligned_buffer_page_end(dst_pixels_v_c);
+ free_aligned_buffer_page_end(dst_pixels_u_opt);
+ free_aligned_buffer_page_end(dst_pixels_v_opt);
+}
+
+TEST_F(LibYUVPlanarTest, SwapUVPlane_Opt) {
+ // Round count up to multiple of 16
+ const int kPixels = benchmark_width_ * benchmark_height_;
+ align_buffer_page_end(src_pixels, kPixels * 2);
align_buffer_page_end(dst_pixels_opt, kPixels * 2);
align_buffer_page_end(dst_pixels_c, kPixels * 2);
MemRandomize(src_pixels, kPixels * 2);
- MemRandomize(tmp_pixels_u, kPixels);
- MemRandomize(tmp_pixels_v, kPixels);
MemRandomize(dst_pixels_opt, kPixels * 2);
MemRandomize(dst_pixels_c, kPixels * 2);
MaskCpuFlags(disable_cpu_flags_);
- SplitUVPlane(src_pixels, benchmark_width_ * 2, tmp_pixels_u, benchmark_width_,
- tmp_pixels_v, benchmark_width_, benchmark_width_,
- benchmark_height_);
- MergeUVPlane(tmp_pixels_u, benchmark_width_, tmp_pixels_v, benchmark_width_,
- dst_pixels_c, benchmark_width_ * 2, benchmark_width_,
- benchmark_height_);
+ SwapUVPlane(src_pixels, benchmark_width_ * 2, dst_pixels_c,
+ benchmark_width_ * 2, benchmark_width_, benchmark_height_);
MaskCpuFlags(benchmark_cpu_info_);
for (int i = 0; i < benchmark_iterations_; ++i) {
- SplitUVPlane(src_pixels, benchmark_width_ * 2, tmp_pixels_u,
- benchmark_width_, tmp_pixels_v, benchmark_width_,
- benchmark_width_, benchmark_height_);
+ SwapUVPlane(src_pixels, benchmark_width_ * 2, dst_pixels_opt,
+ benchmark_width_ * 2, benchmark_width_, benchmark_height_);
}
- MergeUVPlane(tmp_pixels_u, benchmark_width_, tmp_pixels_v, benchmark_width_,
- dst_pixels_opt, benchmark_width_ * 2, benchmark_width_,
- benchmark_height_);
for (int i = 0; i < kPixels * 2; ++i) {
EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
}
free_aligned_buffer_page_end(src_pixels);
- free_aligned_buffer_page_end(tmp_pixels_u);
- free_aligned_buffer_page_end(tmp_pixels_v);
free_aligned_buffer_page_end(dst_pixels_opt);
free_aligned_buffer_page_end(dst_pixels_c);
}
TEST_F(LibYUVPlanarTest, MergeRGBPlane_Opt) {
+ // Round count up to multiple of 16
const int kPixels = benchmark_width_ * benchmark_height_;
align_buffer_page_end(src_pixels, kPixels * 3);
align_buffer_page_end(tmp_pixels_r, kPixels);
@@ -2617,6 +3084,7 @@ TEST_F(LibYUVPlanarTest, MergeRGBPlane_Opt) {
}
TEST_F(LibYUVPlanarTest, SplitRGBPlane_Opt) {
+ // Round count up to multiple of 16
const int kPixels = benchmark_width_ * benchmark_height_;
align_buffer_page_end(src_pixels, kPixels * 3);
align_buffer_page_end(tmp_pixels_r, kPixels);
@@ -2663,10 +3131,373 @@ TEST_F(LibYUVPlanarTest, SplitRGBPlane_Opt) {
free_aligned_buffer_page_end(dst_pixels_c);
}
+TEST_F(LibYUVPlanarTest, MergeARGBPlane_Opt) {
+ const int kPixels = benchmark_width_ * benchmark_height_;
+ align_buffer_page_end(src_pixels, kPixels * 4);
+ align_buffer_page_end(tmp_pixels_r, kPixels);
+ align_buffer_page_end(tmp_pixels_g, kPixels);
+ align_buffer_page_end(tmp_pixels_b, kPixels);
+ align_buffer_page_end(tmp_pixels_a, kPixels);
+ align_buffer_page_end(dst_pixels_opt, kPixels * 4);
+ align_buffer_page_end(dst_pixels_c, kPixels * 4);
+
+ MemRandomize(src_pixels, kPixels * 4);
+ MemRandomize(tmp_pixels_r, kPixels);
+ MemRandomize(tmp_pixels_g, kPixels);
+ MemRandomize(tmp_pixels_b, kPixels);
+ MemRandomize(tmp_pixels_a, kPixels);
+ MemRandomize(dst_pixels_opt, kPixels * 4);
+ MemRandomize(dst_pixels_c, kPixels * 4);
+
+ MaskCpuFlags(disable_cpu_flags_);
+ SplitARGBPlane(src_pixels, benchmark_width_ * 4, tmp_pixels_r,
+ benchmark_width_, tmp_pixels_g, benchmark_width_, tmp_pixels_b,
+ benchmark_width_, tmp_pixels_a, benchmark_width_,
+ benchmark_width_, benchmark_height_);
+ MergeARGBPlane(tmp_pixels_r, benchmark_width_, tmp_pixels_g, benchmark_width_,
+ tmp_pixels_b, benchmark_width_, tmp_pixels_a, benchmark_width_,
+ dst_pixels_c, benchmark_width_ * 4, benchmark_width_,
+ benchmark_height_);
+
+ MaskCpuFlags(benchmark_cpu_info_);
+ SplitARGBPlane(src_pixels, benchmark_width_ * 4, tmp_pixels_r,
+ benchmark_width_, tmp_pixels_g, benchmark_width_, tmp_pixels_b,
+ benchmark_width_, tmp_pixels_a, benchmark_width_,
+ benchmark_width_, benchmark_height_);
+
+ for (int i = 0; i < benchmark_iterations_; ++i) {
+ MergeARGBPlane(tmp_pixels_r, benchmark_width_, tmp_pixels_g,
+ benchmark_width_, tmp_pixels_b, benchmark_width_,
+ tmp_pixels_a, benchmark_width_, dst_pixels_opt,
+ benchmark_width_ * 4, benchmark_width_, benchmark_height_);
+ }
+
+ for (int i = 0; i < kPixels * 4; ++i) {
+ EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
+ }
+
+ free_aligned_buffer_page_end(src_pixels);
+ free_aligned_buffer_page_end(tmp_pixels_r);
+ free_aligned_buffer_page_end(tmp_pixels_g);
+ free_aligned_buffer_page_end(tmp_pixels_b);
+ free_aligned_buffer_page_end(tmp_pixels_a);
+ free_aligned_buffer_page_end(dst_pixels_opt);
+ free_aligned_buffer_page_end(dst_pixels_c);
+}
+
+TEST_F(LibYUVPlanarTest, SplitARGBPlane_Opt) {
+ const int kPixels = benchmark_width_ * benchmark_height_;
+ align_buffer_page_end(src_pixels, kPixels * 4);
+ align_buffer_page_end(tmp_pixels_r, kPixels);
+ align_buffer_page_end(tmp_pixels_g, kPixels);
+ align_buffer_page_end(tmp_pixels_b, kPixels);
+ align_buffer_page_end(tmp_pixels_a, kPixels);
+ align_buffer_page_end(dst_pixels_opt, kPixels * 4);
+ align_buffer_page_end(dst_pixels_c, kPixels * 4);
+
+ MemRandomize(src_pixels, kPixels * 4);
+ MemRandomize(tmp_pixels_r, kPixels);
+ MemRandomize(tmp_pixels_g, kPixels);
+ MemRandomize(tmp_pixels_b, kPixels);
+ MemRandomize(tmp_pixels_a, kPixels);
+ MemRandomize(dst_pixels_opt, kPixels * 4);
+ MemRandomize(dst_pixels_c, kPixels * 4);
+
+ MaskCpuFlags(disable_cpu_flags_);
+ SplitARGBPlane(src_pixels, benchmark_width_ * 4, tmp_pixels_r,
+ benchmark_width_, tmp_pixels_g, benchmark_width_, tmp_pixels_b,
+ benchmark_width_, tmp_pixels_a, benchmark_width_,
+ benchmark_width_, benchmark_height_);
+ MergeARGBPlane(tmp_pixels_r, benchmark_width_, tmp_pixels_g, benchmark_width_,
+ tmp_pixels_b, benchmark_width_, tmp_pixels_a, benchmark_width_,
+ dst_pixels_c, benchmark_width_ * 4, benchmark_width_,
+ benchmark_height_);
+
+ MaskCpuFlags(benchmark_cpu_info_);
+ for (int i = 0; i < benchmark_iterations_; ++i) {
+ SplitARGBPlane(src_pixels, benchmark_width_ * 4, tmp_pixels_r,
+ benchmark_width_, tmp_pixels_g, benchmark_width_,
+ tmp_pixels_b, benchmark_width_, tmp_pixels_a,
+ benchmark_width_, benchmark_width_, benchmark_height_);
+ }
+
+ MergeARGBPlane(tmp_pixels_r, benchmark_width_, tmp_pixels_g, benchmark_width_,
+ tmp_pixels_b, benchmark_width_, tmp_pixels_a, benchmark_width_,
+ dst_pixels_opt, benchmark_width_ * 4, benchmark_width_,
+ benchmark_height_);
+
+ for (int i = 0; i < kPixels * 4; ++i) {
+ EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
+ }
+
+ free_aligned_buffer_page_end(src_pixels);
+ free_aligned_buffer_page_end(tmp_pixels_r);
+ free_aligned_buffer_page_end(tmp_pixels_g);
+ free_aligned_buffer_page_end(tmp_pixels_b);
+ free_aligned_buffer_page_end(tmp_pixels_a);
+ free_aligned_buffer_page_end(dst_pixels_opt);
+ free_aligned_buffer_page_end(dst_pixels_c);
+}
+
+TEST_F(LibYUVPlanarTest, MergeXRGBPlane_Opt) {
+ const int kPixels = benchmark_width_ * benchmark_height_;
+ align_buffer_page_end(src_pixels, kPixels * 4);
+ align_buffer_page_end(tmp_pixels_r, kPixels);
+ align_buffer_page_end(tmp_pixels_g, kPixels);
+ align_buffer_page_end(tmp_pixels_b, kPixels);
+ align_buffer_page_end(dst_pixels_opt, kPixels * 4);
+ align_buffer_page_end(dst_pixels_c, kPixels * 4);
+
+ MemRandomize(src_pixels, kPixels * 4);
+ MemRandomize(tmp_pixels_r, kPixels);
+ MemRandomize(tmp_pixels_g, kPixels);
+ MemRandomize(tmp_pixels_b, kPixels);
+ MemRandomize(dst_pixels_opt, kPixels * 4);
+ MemRandomize(dst_pixels_c, kPixels * 4);
+
+ MaskCpuFlags(disable_cpu_flags_);
+ SplitARGBPlane(src_pixels, benchmark_width_ * 4, tmp_pixels_r,
+ benchmark_width_, tmp_pixels_g, benchmark_width_, tmp_pixels_b,
+ benchmark_width_, NULL, 0, benchmark_width_,
+ benchmark_height_);
+ MergeARGBPlane(tmp_pixels_r, benchmark_width_, tmp_pixels_g, benchmark_width_,
+ tmp_pixels_b, benchmark_width_, NULL, 0, dst_pixels_c,
+ benchmark_width_ * 4, benchmark_width_, benchmark_height_);
+
+ MaskCpuFlags(benchmark_cpu_info_);
+ SplitARGBPlane(src_pixels, benchmark_width_ * 4, tmp_pixels_r,
+ benchmark_width_, tmp_pixels_g, benchmark_width_, tmp_pixels_b,
+ benchmark_width_, NULL, 0, benchmark_width_,
+ benchmark_height_);
+
+ for (int i = 0; i < benchmark_iterations_; ++i) {
+ MergeARGBPlane(tmp_pixels_r, benchmark_width_, tmp_pixels_g,
+ benchmark_width_, tmp_pixels_b, benchmark_width_, NULL, 0,
+ dst_pixels_opt, benchmark_width_ * 4, benchmark_width_,
+ benchmark_height_);
+ }
+
+ for (int i = 0; i < kPixels * 4; ++i) {
+ EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
+ }
+
+ free_aligned_buffer_page_end(src_pixels);
+ free_aligned_buffer_page_end(tmp_pixels_r);
+ free_aligned_buffer_page_end(tmp_pixels_g);
+ free_aligned_buffer_page_end(tmp_pixels_b);
+ free_aligned_buffer_page_end(dst_pixels_opt);
+ free_aligned_buffer_page_end(dst_pixels_c);
+}
+
+TEST_F(LibYUVPlanarTest, SplitXRGBPlane_Opt) {
+ const int kPixels = benchmark_width_ * benchmark_height_;
+ align_buffer_page_end(src_pixels, kPixels * 4);
+ align_buffer_page_end(tmp_pixels_r, kPixels);
+ align_buffer_page_end(tmp_pixels_g, kPixels);
+ align_buffer_page_end(tmp_pixels_b, kPixels);
+ align_buffer_page_end(dst_pixels_opt, kPixels * 4);
+ align_buffer_page_end(dst_pixels_c, kPixels * 4);
+
+ MemRandomize(src_pixels, kPixels * 4);
+ MemRandomize(tmp_pixels_r, kPixels);
+ MemRandomize(tmp_pixels_g, kPixels);
+ MemRandomize(tmp_pixels_b, kPixels);
+ MemRandomize(dst_pixels_opt, kPixels * 4);
+ MemRandomize(dst_pixels_c, kPixels * 4);
+
+ MaskCpuFlags(disable_cpu_flags_);
+ SplitARGBPlane(src_pixels, benchmark_width_ * 4, tmp_pixels_r,
+ benchmark_width_, tmp_pixels_g, benchmark_width_, tmp_pixels_b,
+ benchmark_width_, NULL, 0, benchmark_width_,
+ benchmark_height_);
+ MergeARGBPlane(tmp_pixels_r, benchmark_width_, tmp_pixels_g, benchmark_width_,
+ tmp_pixels_b, benchmark_width_, NULL, 0, dst_pixels_c,
+ benchmark_width_ * 4, benchmark_width_, benchmark_height_);
+
+ MaskCpuFlags(benchmark_cpu_info_);
+ for (int i = 0; i < benchmark_iterations_; ++i) {
+ SplitARGBPlane(src_pixels, benchmark_width_ * 4, tmp_pixels_r,
+ benchmark_width_, tmp_pixels_g, benchmark_width_,
+ tmp_pixels_b, benchmark_width_, NULL, 0, benchmark_width_,
+ benchmark_height_);
+ }
+
+ MergeARGBPlane(tmp_pixels_r, benchmark_width_, tmp_pixels_g, benchmark_width_,
+ tmp_pixels_b, benchmark_width_, NULL, 0, dst_pixels_opt,
+ benchmark_width_ * 4, benchmark_width_, benchmark_height_);
+
+ for (int i = 0; i < kPixels * 4; ++i) {
+ EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
+ }
+
+ free_aligned_buffer_page_end(src_pixels);
+ free_aligned_buffer_page_end(tmp_pixels_r);
+ free_aligned_buffer_page_end(tmp_pixels_g);
+ free_aligned_buffer_page_end(tmp_pixels_b);
+ free_aligned_buffer_page_end(dst_pixels_opt);
+ free_aligned_buffer_page_end(dst_pixels_c);
+}
+
+// Merge 4 channels
+#define TESTQPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, W1280, N, NEG, OFF) \
+ TEST_F(LibYUVPlanarTest, FUNC##Plane_##DEPTH##N) { \
+ const int kWidth = W1280; \
+ const int kPixels = kWidth * benchmark_height_; \
+ align_buffer_page_end(src_memory_r, kPixels * sizeof(STYPE) + OFF); \
+ align_buffer_page_end(src_memory_g, kPixels * sizeof(STYPE) + OFF); \
+ align_buffer_page_end(src_memory_b, kPixels * sizeof(STYPE) + OFF); \
+ align_buffer_page_end(src_memory_a, kPixels * sizeof(STYPE) + OFF); \
+ align_buffer_page_end(dst_memory_c, kPixels * 4 * sizeof(DTYPE)); \
+ align_buffer_page_end(dst_memory_opt, kPixels * 4 * sizeof(DTYPE)); \
+ MemRandomize(src_memory_r, kPixels * sizeof(STYPE) + OFF); \
+ MemRandomize(src_memory_g, kPixels * sizeof(STYPE) + OFF); \
+ MemRandomize(src_memory_b, kPixels * sizeof(STYPE) + OFF); \
+ MemRandomize(src_memory_a, kPixels * sizeof(STYPE) + OFF); \
+ memset(dst_memory_c, 0, kPixels * 4 * sizeof(DTYPE)); \
+ memset(dst_memory_opt, 0, kPixels * 4 * sizeof(DTYPE)); \
+ STYPE* src_pixels_r = reinterpret_cast<STYPE*>(src_memory_r + OFF); \
+ STYPE* src_pixels_g = reinterpret_cast<STYPE*>(src_memory_g + OFF); \
+ STYPE* src_pixels_b = reinterpret_cast<STYPE*>(src_memory_b + OFF); \
+ STYPE* src_pixels_a = reinterpret_cast<STYPE*>(src_memory_a + OFF); \
+ DTYPE* dst_pixels_c = reinterpret_cast<DTYPE*>(dst_memory_c); \
+ DTYPE* dst_pixels_opt = reinterpret_cast<DTYPE*>(dst_memory_opt); \
+ MaskCpuFlags(disable_cpu_flags_); \
+ FUNC##Plane(src_pixels_r, kWidth, src_pixels_g, kWidth, src_pixels_b, \
+ kWidth, src_pixels_a, kWidth, dst_pixels_c, kWidth * 4, \
+ kWidth, NEG benchmark_height_, DEPTH); \
+ MaskCpuFlags(benchmark_cpu_info_); \
+ for (int i = 0; i < benchmark_iterations_; ++i) { \
+ FUNC##Plane(src_pixels_r, kWidth, src_pixels_g, kWidth, src_pixels_b, \
+ kWidth, src_pixels_a, kWidth, dst_pixels_opt, kWidth * 4, \
+ kWidth, NEG benchmark_height_, DEPTH); \
+ } \
+ for (int i = 0; i < kPixels * 4; ++i) { \
+ EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); \
+ } \
+ free_aligned_buffer_page_end(src_memory_r); \
+ free_aligned_buffer_page_end(src_memory_g); \
+ free_aligned_buffer_page_end(src_memory_b); \
+ free_aligned_buffer_page_end(src_memory_a); \
+ free_aligned_buffer_page_end(dst_memory_c); \
+ free_aligned_buffer_page_end(dst_memory_opt); \
+ }
+
+// Merge 3 channel RGB into 4 channel XRGB with opaque alpha
+#define TESTQPLANAROTOPI(FUNC, STYPE, DTYPE, DEPTH, W1280, N, NEG, OFF) \
+ TEST_F(LibYUVPlanarTest, FUNC##Plane_Opaque_##DEPTH##N) { \
+ const int kWidth = W1280; \
+ const int kPixels = kWidth * benchmark_height_; \
+ align_buffer_page_end(src_memory_r, kPixels * sizeof(STYPE) + OFF); \
+ align_buffer_page_end(src_memory_g, kPixels * sizeof(STYPE) + OFF); \
+ align_buffer_page_end(src_memory_b, kPixels * sizeof(STYPE) + OFF); \
+ align_buffer_page_end(dst_memory_c, kPixels * 4 * sizeof(DTYPE)); \
+ align_buffer_page_end(dst_memory_opt, kPixels * 4 * sizeof(DTYPE)); \
+ MemRandomize(src_memory_r, kPixels * sizeof(STYPE) + OFF); \
+ MemRandomize(src_memory_g, kPixels * sizeof(STYPE) + OFF); \
+ MemRandomize(src_memory_b, kPixels * sizeof(STYPE) + OFF); \
+ memset(dst_memory_c, 0, kPixels * 4 * sizeof(DTYPE)); \
+ memset(dst_memory_opt, 0, kPixels * 4 * sizeof(DTYPE)); \
+ STYPE* src_pixels_r = reinterpret_cast<STYPE*>(src_memory_r + OFF); \
+ STYPE* src_pixels_g = reinterpret_cast<STYPE*>(src_memory_g + OFF); \
+ STYPE* src_pixels_b = reinterpret_cast<STYPE*>(src_memory_b + OFF); \
+ DTYPE* dst_pixels_c = reinterpret_cast<DTYPE*>(dst_memory_c); \
+ DTYPE* dst_pixels_opt = reinterpret_cast<DTYPE*>(dst_memory_opt); \
+ MaskCpuFlags(disable_cpu_flags_); \
+ FUNC##Plane(src_pixels_r, kWidth, src_pixels_g, kWidth, src_pixels_b, \
+ kWidth, NULL, 0, dst_pixels_c, kWidth * 4, kWidth, \
+ NEG benchmark_height_, DEPTH); \
+ MaskCpuFlags(benchmark_cpu_info_); \
+ for (int i = 0; i < benchmark_iterations_; ++i) { \
+ FUNC##Plane(src_pixels_r, kWidth, src_pixels_g, kWidth, src_pixels_b, \
+ kWidth, NULL, 0, dst_pixels_opt, kWidth * 4, kWidth, \
+ NEG benchmark_height_, DEPTH); \
+ } \
+ for (int i = 0; i < kPixels * 4; ++i) { \
+ EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); \
+ } \
+ free_aligned_buffer_page_end(src_memory_r); \
+ free_aligned_buffer_page_end(src_memory_g); \
+ free_aligned_buffer_page_end(src_memory_b); \
+ free_aligned_buffer_page_end(dst_memory_c); \
+ free_aligned_buffer_page_end(dst_memory_opt); \
+ }
+
+#define TESTQPLANARTOP(FUNC, STYPE, DTYPE, DEPTH) \
+ TESTQPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_ + 1, _Any, +, 0) \
+ TESTQPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_, _Unaligned, +, \
+ 2) \
+ TESTQPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_, _Invert, -, 0) \
+ TESTQPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_, _Opt, +, 0) \
+ TESTQPLANAROTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_ + 1, _Any, +, \
+ 0) \
+ TESTQPLANAROTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_, _Unaligned, +, \
+ 2) \
+ TESTQPLANAROTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_, _Invert, -, 0) \
+ TESTQPLANAROTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_, _Opt, +, 0)
+
+TESTQPLANARTOP(MergeAR64, uint16_t, uint16_t, 10)
+TESTQPLANARTOP(MergeAR64, uint16_t, uint16_t, 12)
+TESTQPLANARTOP(MergeAR64, uint16_t, uint16_t, 16)
+TESTQPLANARTOP(MergeARGB16To8, uint16_t, uint8_t, 10)
+TESTQPLANARTOP(MergeARGB16To8, uint16_t, uint8_t, 12)
+TESTQPLANARTOP(MergeARGB16To8, uint16_t, uint8_t, 16)
+
+#define TESTTPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, W1280, N, NEG, OFF) \
+ TEST_F(LibYUVPlanarTest, FUNC##Plane_##DEPTH##N) { \
+ const int kWidth = W1280; \
+ const int kPixels = kWidth * benchmark_height_; \
+ align_buffer_page_end(src_memory_r, kPixels * sizeof(STYPE) + OFF); \
+ align_buffer_page_end(src_memory_g, kPixels * sizeof(STYPE) + OFF); \
+ align_buffer_page_end(src_memory_b, kPixels * sizeof(STYPE) + OFF); \
+ align_buffer_page_end(dst_memory_c, kPixels * 4 * sizeof(DTYPE)); \
+ align_buffer_page_end(dst_memory_opt, kPixels * 4 * sizeof(DTYPE)); \
+ MemRandomize(src_memory_r, kPixels * sizeof(STYPE) + OFF); \
+ MemRandomize(src_memory_g, kPixels * sizeof(STYPE) + OFF); \
+ MemRandomize(src_memory_b, kPixels * sizeof(STYPE) + OFF); \
+ STYPE* src_pixels_r = reinterpret_cast<STYPE*>(src_memory_r + OFF); \
+ STYPE* src_pixels_g = reinterpret_cast<STYPE*>(src_memory_g + OFF); \
+ STYPE* src_pixels_b = reinterpret_cast<STYPE*>(src_memory_b + OFF); \
+ DTYPE* dst_pixels_c = reinterpret_cast<DTYPE*>(dst_memory_c); \
+ DTYPE* dst_pixels_opt = reinterpret_cast<DTYPE*>(dst_memory_opt); \
+ memset(dst_pixels_c, 1, kPixels * 4 * sizeof(DTYPE)); \
+ memset(dst_pixels_opt, 2, kPixels * 4 * sizeof(DTYPE)); \
+ MaskCpuFlags(disable_cpu_flags_); \
+ FUNC##Plane(src_pixels_r, kWidth, src_pixels_g, kWidth, src_pixels_b, \
+ kWidth, dst_pixels_c, kWidth * 4, kWidth, \
+ NEG benchmark_height_, DEPTH); \
+ MaskCpuFlags(benchmark_cpu_info_); \
+ for (int i = 0; i < benchmark_iterations_; ++i) { \
+ FUNC##Plane(src_pixels_r, kWidth, src_pixels_g, kWidth, src_pixels_b, \
+ kWidth, dst_pixels_opt, kWidth * 4, kWidth, \
+ NEG benchmark_height_, DEPTH); \
+ } \
+ for (int i = 0; i < kPixels * 4; ++i) { \
+ EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); \
+ } \
+ free_aligned_buffer_page_end(src_memory_r); \
+ free_aligned_buffer_page_end(src_memory_g); \
+ free_aligned_buffer_page_end(src_memory_b); \
+ free_aligned_buffer_page_end(dst_memory_c); \
+ free_aligned_buffer_page_end(dst_memory_opt); \
+ }
+
+#define TESTTPLANARTOP(FUNC, STYPE, DTYPE, DEPTH) \
+ TESTTPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_ + 1, _Any, +, 0) \
+ TESTTPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_, _Unaligned, +, \
+ 2) \
+ TESTTPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_, _Invert, -, 0) \
+ TESTTPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_, _Opt, +, 0)
+
+TESTTPLANARTOP(MergeXR30, uint16_t, uint8_t, 10)
+TESTTPLANARTOP(MergeXR30, uint16_t, uint8_t, 12)
+TESTTPLANARTOP(MergeXR30, uint16_t, uint8_t, 16)
+
// TODO(fbarchard): improve test for platforms and cpu detect
#ifdef HAS_MERGEUVROW_16_AVX2
TEST_F(LibYUVPlanarTest, MergeUVRow_16_Opt) {
- const int kPixels = benchmark_width_ * benchmark_height_;
+ // Round count up to multiple of 16
+ const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15;
+
align_buffer_page_end(src_pixels_u, kPixels * 2);
align_buffer_page_end(src_pixels_v, kPixels * 2);
align_buffer_page_end(dst_pixels_uv_opt, kPixels * 2 * 2);
@@ -2679,19 +3510,19 @@ TEST_F(LibYUVPlanarTest, MergeUVRow_16_Opt) {
MergeUVRow_16_C(reinterpret_cast<const uint16_t*>(src_pixels_u),
reinterpret_cast<const uint16_t*>(src_pixels_v),
- reinterpret_cast<uint16_t*>(dst_pixels_uv_c), 64, kPixels);
+ reinterpret_cast<uint16_t*>(dst_pixels_uv_c), 16, kPixels);
int has_avx2 = TestCpuFlag(kCpuHasAVX2);
for (int i = 0; i < benchmark_iterations_; ++i) {
if (has_avx2) {
MergeUVRow_16_AVX2(reinterpret_cast<const uint16_t*>(src_pixels_u),
reinterpret_cast<const uint16_t*>(src_pixels_v),
- reinterpret_cast<uint16_t*>(dst_pixels_uv_opt), 64,
+ reinterpret_cast<uint16_t*>(dst_pixels_uv_opt), 16,
kPixels);
} else {
MergeUVRow_16_C(reinterpret_cast<const uint16_t*>(src_pixels_u),
reinterpret_cast<const uint16_t*>(src_pixels_v),
- reinterpret_cast<uint16_t*>(dst_pixels_uv_opt), 64,
+ reinterpret_cast<uint16_t*>(dst_pixels_uv_opt), 16,
kPixels);
}
}
@@ -2710,7 +3541,9 @@ TEST_F(LibYUVPlanarTest, MergeUVRow_16_Opt) {
// TODO(fbarchard): Improve test for more platforms.
#ifdef HAS_MULTIPLYROW_16_AVX2
TEST_F(LibYUVPlanarTest, MultiplyRow_16_Opt) {
- const int kPixels = benchmark_width_ * benchmark_height_;
+ // Round count up to multiple of 32
+ const int kPixels = (benchmark_width_ * benchmark_height_ + 31) & ~31;
+
align_buffer_page_end(src_pixels_y, kPixels * 2);
align_buffer_page_end(dst_pixels_y_opt, kPixels * 2);
align_buffer_page_end(dst_pixels_y_c, kPixels * 2);
@@ -2776,6 +3609,65 @@ TEST_F(LibYUVPlanarTest, Convert16To8Plane) {
free_aligned_buffer_page_end(dst_pixels_y_c);
}
+TEST_F(LibYUVPlanarTest, YUY2ToY) {
+ const int kPixels = benchmark_width_ * benchmark_height_;
+ align_buffer_page_end(src_pixels_y, kPixels * 2);
+ align_buffer_page_end(dst_pixels_y_opt, kPixels);
+ align_buffer_page_end(dst_pixels_y_c, kPixels);
+
+ MemRandomize(src_pixels_y, kPixels * 2);
+ memset(dst_pixels_y_opt, 0, kPixels);
+ memset(dst_pixels_y_c, 1, kPixels);
+
+ MaskCpuFlags(disable_cpu_flags_);
+ YUY2ToY(src_pixels_y, benchmark_width_ * 2, dst_pixels_y_c, benchmark_width_,
+ benchmark_width_, benchmark_height_);
+ MaskCpuFlags(benchmark_cpu_info_);
+
+ for (int i = 0; i < benchmark_iterations_; ++i) {
+ YUY2ToY(src_pixels_y, benchmark_width_ * 2, dst_pixels_y_opt,
+ benchmark_width_, benchmark_width_, benchmark_height_);
+ }
+
+ for (int i = 0; i < kPixels; ++i) {
+ EXPECT_EQ(dst_pixels_y_opt[i], dst_pixels_y_c[i]);
+ }
+
+ free_aligned_buffer_page_end(src_pixels_y);
+ free_aligned_buffer_page_end(dst_pixels_y_opt);
+ free_aligned_buffer_page_end(dst_pixels_y_c);
+}
+
+TEST_F(LibYUVPlanarTest, UYVYToY) {
+ const int kPixels = benchmark_width_ * benchmark_height_;
+ align_buffer_page_end(src_pixels_y, kPixels * 2);
+ align_buffer_page_end(dst_pixels_y_opt, kPixels);
+ align_buffer_page_end(dst_pixels_y_c, kPixels);
+
+ MemRandomize(src_pixels_y, kPixels * 2);
+ memset(dst_pixels_y_opt, 0, kPixels);
+ memset(dst_pixels_y_c, 1, kPixels);
+
+ MaskCpuFlags(disable_cpu_flags_);
+ UYVYToY(src_pixels_y, benchmark_width_ * 2, dst_pixels_y_c, benchmark_width_,
+ benchmark_width_, benchmark_height_);
+ MaskCpuFlags(benchmark_cpu_info_);
+
+ for (int i = 0; i < benchmark_iterations_; ++i) {
+ UYVYToY(src_pixels_y, benchmark_width_ * 2, dst_pixels_y_opt,
+ benchmark_width_, benchmark_width_, benchmark_height_);
+ }
+
+ for (int i = 0; i < kPixels; ++i) {
+ EXPECT_EQ(dst_pixels_y_opt[i], dst_pixels_y_c[i]);
+ }
+
+ free_aligned_buffer_page_end(src_pixels_y);
+ free_aligned_buffer_page_end(dst_pixels_y_opt);
+ free_aligned_buffer_page_end(dst_pixels_y_c);
+}
+
+#ifdef ENABLE_ROW_TESTS
// TODO(fbarchard): Improve test for more platforms.
#ifdef HAS_CONVERT16TO8ROW_AVX2
TEST_F(LibYUVPlanarTest, Convert16To8Row_Opt) {
@@ -2822,6 +3714,36 @@ TEST_F(LibYUVPlanarTest, Convert16To8Row_Opt) {
}
#endif // HAS_CONVERT16TO8ROW_AVX2
+#ifdef HAS_UYVYTOYROW_NEON
+TEST_F(LibYUVPlanarTest, UYVYToYRow_Opt) {
+ // NEON does multiple of 16, so round count up
+ const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15;
+ align_buffer_page_end(src_pixels_y, kPixels * 2);
+ align_buffer_page_end(dst_pixels_y_opt, kPixels);
+ align_buffer_page_end(dst_pixels_y_c, kPixels);
+
+ MemRandomize(src_pixels_y, kPixels * 2);
+ memset(dst_pixels_y_opt, 0, kPixels);
+ memset(dst_pixels_y_c, 1, kPixels);
+
+ UYVYToYRow_C(src_pixels_y, dst_pixels_y_c, kPixels);
+
+ for (int i = 0; i < benchmark_iterations_; ++i) {
+ UYVYToYRow_NEON(src_pixels_y, dst_pixels_y_opt, kPixels);
+ }
+
+ for (int i = 0; i < kPixels; ++i) {
+ EXPECT_EQ(dst_pixels_y_opt[i], dst_pixels_y_c[i]);
+ }
+
+ free_aligned_buffer_page_end(src_pixels_y);
+ free_aligned_buffer_page_end(dst_pixels_y_opt);
+ free_aligned_buffer_page_end(dst_pixels_y_c);
+}
+#endif // HAS_UYVYTOYROW_NEON
+
+#endif // ENABLE_ROW_TESTS
+
TEST_F(LibYUVPlanarTest, Convert8To16Plane) {
const int kPixels = benchmark_width_ * benchmark_height_;
align_buffer_page_end(src_pixels_y, kPixels);
@@ -2855,6 +3777,7 @@ TEST_F(LibYUVPlanarTest, Convert8To16Plane) {
free_aligned_buffer_page_end(dst_pixels_y_c);
}
+#ifdef ENABLE_ROW_TESTS
// TODO(fbarchard): Improve test for more platforms.
#ifdef HAS_CONVERT8TO16ROW_AVX2
TEST_F(LibYUVPlanarTest, Convert8To16Row_Opt) {
@@ -3173,33 +4096,33 @@ extern "C" void GaussRow_NEON(const uint32_t* src, uint16_t* dst, int width);
extern "C" void GaussRow_C(const uint32_t* src, uint16_t* dst, int width);
TEST_F(LibYUVPlanarTest, TestGaussRow_Opt) {
- SIMD_ALIGNED(uint32_t orig_pixels[640 + 4]);
- SIMD_ALIGNED(uint16_t dst_pixels_c[640]);
- SIMD_ALIGNED(uint16_t dst_pixels_opt[640]);
+ SIMD_ALIGNED(uint32_t orig_pixels[1280 + 8]);
+ SIMD_ALIGNED(uint16_t dst_pixels_c[1280]);
+ SIMD_ALIGNED(uint16_t dst_pixels_opt[1280]);
memset(orig_pixels, 0, sizeof(orig_pixels));
memset(dst_pixels_c, 1, sizeof(dst_pixels_c));
memset(dst_pixels_opt, 2, sizeof(dst_pixels_opt));
- for (int i = 0; i < 640 + 4; ++i) {
+ for (int i = 0; i < 1280 + 8; ++i) {
orig_pixels[i] = i * 256;
}
- GaussRow_C(&orig_pixels[0], &dst_pixels_c[0], 640);
- for (int i = 0; i < benchmark_pixels_div1280_ * 2; ++i) {
+ GaussRow_C(&orig_pixels[0], &dst_pixels_c[0], 1280);
+ for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
#if !defined(LIBYUV_DISABLE_NEON) && \
(defined(__aarch64__) || defined(__ARM_NEON__) || defined(LIBYUV_NEON))
int has_neon = TestCpuFlag(kCpuHasNEON);
if (has_neon) {
- GaussRow_NEON(&orig_pixels[0], &dst_pixels_opt[0], 640);
+ GaussRow_NEON(&orig_pixels[0], &dst_pixels_opt[0], 1280);
} else {
- GaussRow_C(&orig_pixels[0], &dst_pixels_opt[0], 640);
+ GaussRow_C(&orig_pixels[0], &dst_pixels_opt[0], 1280);
}
#else
- GaussRow_C(&orig_pixels[0], &dst_pixels_opt[0], 640);
+ GaussRow_C(&orig_pixels[0], &dst_pixels_opt[0], 1280);
#endif
}
- for (int i = 0; i < 640; ++i) {
+ for (int i = 0; i < 1280; ++i) {
EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
}
@@ -3225,141 +4148,139 @@ extern "C" void GaussCol_C(const uint16_t* src0,
int width);
TEST_F(LibYUVPlanarTest, TestGaussCol_Opt) {
- SIMD_ALIGNED(uint16_t orig_pixels[640 * 5]);
- SIMD_ALIGNED(uint32_t dst_pixels_c[640]);
- SIMD_ALIGNED(uint32_t dst_pixels_opt[640]);
+ SIMD_ALIGNED(uint16_t orig_pixels[1280 * 5]);
+ SIMD_ALIGNED(uint32_t dst_pixels_c[1280]);
+ SIMD_ALIGNED(uint32_t dst_pixels_opt[1280]);
memset(orig_pixels, 0, sizeof(orig_pixels));
memset(dst_pixels_c, 1, sizeof(dst_pixels_c));
memset(dst_pixels_opt, 2, sizeof(dst_pixels_opt));
- for (int i = 0; i < 640 * 5; ++i) {
- orig_pixels[i] = i;
+ for (int i = 0; i < 1280 * 5; ++i) {
+ orig_pixels[i] = static_cast<float>(i);
}
- GaussCol_C(&orig_pixels[0], &orig_pixels[640], &orig_pixels[640 * 2],
- &orig_pixels[640 * 3], &orig_pixels[640 * 4], &dst_pixels_c[0],
- 640);
- for (int i = 0; i < benchmark_pixels_div1280_ * 2; ++i) {
+ GaussCol_C(&orig_pixels[0], &orig_pixels[1280], &orig_pixels[1280 * 2],
+ &orig_pixels[1280 * 3], &orig_pixels[1280 * 4], &dst_pixels_c[0],
+ 1280);
+ for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
#if !defined(LIBYUV_DISABLE_NEON) && \
(defined(__aarch64__) || defined(__ARM_NEON__) || defined(LIBYUV_NEON))
int has_neon = TestCpuFlag(kCpuHasNEON);
if (has_neon) {
- GaussCol_NEON(&orig_pixels[0], &orig_pixels[640], &orig_pixels[640 * 2],
- &orig_pixels[640 * 3], &orig_pixels[640 * 4],
- &dst_pixels_opt[0], 640);
+ GaussCol_NEON(&orig_pixels[0], &orig_pixels[1280], &orig_pixels[1280 * 2],
+ &orig_pixels[1280 * 3], &orig_pixels[1280 * 4],
+ &dst_pixels_opt[0], 1280);
} else {
- GaussCol_C(&orig_pixels[0], &orig_pixels[640], &orig_pixels[640 * 2],
- &orig_pixels[640 * 3], &orig_pixels[640 * 4],
- &dst_pixels_opt[0], 640);
+ GaussCol_C(&orig_pixels[0], &orig_pixels[1280], &orig_pixels[1280 * 2],
+ &orig_pixels[1280 * 3], &orig_pixels[1280 * 4],
+ &dst_pixels_opt[0], 1280);
}
#else
- GaussCol_C(&orig_pixels[0], &orig_pixels[640], &orig_pixels[640 * 2],
- &orig_pixels[640 * 3], &orig_pixels[640 * 4], &dst_pixels_opt[0],
- 640);
+ GaussCol_C(&orig_pixels[0], &orig_pixels[1280], &orig_pixels[1280 * 2],
+ &orig_pixels[1280 * 3], &orig_pixels[1280 * 4],
+ &dst_pixels_opt[0], 1280);
#endif
}
- for (int i = 0; i < 640; ++i) {
+ for (int i = 0; i < 1280; ++i) {
EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
}
-
- EXPECT_EQ(dst_pixels_c[0],
- static_cast<uint32_t>(0 * 1 + 640 * 4 + 640 * 2 * 6 + 640 * 3 * 4 +
- 640 * 4 * 1));
- EXPECT_EQ(dst_pixels_c[639], static_cast<uint32_t>(30704));
}
-float TestFloatDivToByte(int benchmark_width,
- int benchmark_height,
- int benchmark_iterations,
- float scale,
- bool opt) {
- int i, j;
- // NEON does multiple of 8, so round count up
- const int kPixels = (benchmark_width * benchmark_height + 7) & ~7;
- align_buffer_page_end(src_weights, kPixels * 4);
- align_buffer_page_end(src_values, kPixels * 4);
- align_buffer_page_end(dst_out_c, kPixels);
- align_buffer_page_end(dst_out_opt, kPixels);
- align_buffer_page_end(dst_mask_c, kPixels);
- align_buffer_page_end(dst_mask_opt, kPixels);
-
- // Randomize works but may contain some denormals affecting performance.
- // MemRandomize(orig_y, kPixels * 4);
- // large values are problematic. audio is really -1 to 1.
- for (i = 0; i < kPixels; ++i) {
- (reinterpret_cast<float*>(src_weights))[i] = scale;
- (reinterpret_cast<float*>(src_values))[i] =
- sinf(static_cast<float>(i) * 0.1f);
- }
- memset(dst_out_c, 0, kPixels);
- memset(dst_out_opt, 1, kPixels);
- memset(dst_mask_c, 2, kPixels);
- memset(dst_mask_opt, 3, kPixels);
+TEST_F(LibYUVPlanarTest, TestGaussRow_F32_Opt) {
+ SIMD_ALIGNED(float orig_pixels[1280 + 4]);
+ SIMD_ALIGNED(float dst_pixels_c[1280]);
+ SIMD_ALIGNED(float dst_pixels_opt[1280]);
- FloatDivToByteRow_C(reinterpret_cast<float*>(src_weights),
- reinterpret_cast<float*>(src_values), dst_out_c,
- dst_mask_c, kPixels);
+ memset(orig_pixels, 0, sizeof(orig_pixels));
+ memset(dst_pixels_c, 1, sizeof(dst_pixels_c));
+ memset(dst_pixels_opt, 2, sizeof(dst_pixels_opt));
- for (j = 0; j < benchmark_iterations; j++) {
- if (opt) {
-#ifdef HAS_FLOATDIVTOBYTEROW_NEON
- FloatDivToByteRow_NEON(reinterpret_cast<float*>(src_weights),
- reinterpret_cast<float*>(src_values), dst_out_opt,
- dst_mask_opt, kPixels);
-#else
- FloatDivToByteRow_C(reinterpret_cast<float*>(src_weights),
- reinterpret_cast<float*>(src_values), dst_out_opt,
- dst_mask_opt, kPixels);
-#endif
+ for (int i = 0; i < 1280 + 4; ++i) {
+ orig_pixels[i] = static_cast<float>(i);
+ }
+ GaussRow_F32_C(&orig_pixels[0], &dst_pixels_c[0], 1280);
+ for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
+#if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
+ int has_neon = TestCpuFlag(kCpuHasNEON);
+ if (has_neon) {
+ GaussRow_F32_NEON(&orig_pixels[0], &dst_pixels_opt[0], 1280);
} else {
- FloatDivToByteRow_C(reinterpret_cast<float*>(src_weights),
- reinterpret_cast<float*>(src_values), dst_out_opt,
- dst_mask_opt, kPixels);
+ GaussRow_F32_C(&orig_pixels[0], &dst_pixels_opt[0], 1280);
}
+#else
+ GaussRow_F32_C(&orig_pixels[0], &dst_pixels_opt[0], 1280);
+#endif
}
- uint8_t max_diff = 0;
- for (i = 0; i < kPixels; ++i) {
- uint8_t abs_diff = abs(dst_out_c[i] - dst_out_opt[i]) +
- abs(dst_mask_c[i] - dst_mask_opt[i]);
- if (abs_diff > max_diff) {
- max_diff = abs_diff;
- }
+ for (int i = 0; i < 1280; ++i) {
+ EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
}
+}
- free_aligned_buffer_page_end(src_weights);
- free_aligned_buffer_page_end(src_values);
- free_aligned_buffer_page_end(dst_out_c);
- free_aligned_buffer_page_end(dst_out_opt);
- free_aligned_buffer_page_end(dst_mask_c);
- free_aligned_buffer_page_end(dst_mask_opt);
+TEST_F(LibYUVPlanarTest, TestGaussCol_F32_Opt) {
+ SIMD_ALIGNED(float dst_pixels_c[1280]);
+ SIMD_ALIGNED(float dst_pixels_opt[1280]);
+ align_buffer_page_end(orig_pixels_buf, 1280 * 5 * 4); // 5 rows
+ float* orig_pixels = reinterpret_cast<float*>(orig_pixels_buf);
- return max_diff;
-}
+ memset(orig_pixels, 0, 1280 * 5 * 4);
+ memset(dst_pixels_c, 1, sizeof(dst_pixels_c));
+ memset(dst_pixels_opt, 2, sizeof(dst_pixels_opt));
-TEST_F(LibYUVPlanarTest, TestFloatDivToByte_C) {
- float diff = TestFloatDivToByte(benchmark_width_, benchmark_height_,
- benchmark_iterations_, 1.2f, false);
- EXPECT_EQ(0, diff);
-}
+ for (int i = 0; i < 1280 * 5; ++i) {
+ orig_pixels[i] = static_cast<float>(i);
+ }
+ GaussCol_F32_C(&orig_pixels[0], &orig_pixels[1280], &orig_pixels[1280 * 2],
+ &orig_pixels[1280 * 3], &orig_pixels[1280 * 4],
+ &dst_pixels_c[0], 1280);
+ for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
+#if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
+ int has_neon = TestCpuFlag(kCpuHasNEON);
+ if (has_neon) {
+ GaussCol_F32_NEON(&orig_pixels[0], &orig_pixels[1280],
+ &orig_pixels[1280 * 2], &orig_pixels[1280 * 3],
+ &orig_pixels[1280 * 4], &dst_pixels_opt[0], 1280);
+ } else {
+ GaussCol_F32_C(&orig_pixels[0], &orig_pixels[1280],
+ &orig_pixels[1280 * 2], &orig_pixels[1280 * 3],
+ &orig_pixels[1280 * 4], &dst_pixels_opt[0], 1280);
+ }
+#else
+ GaussCol_F32_C(&orig_pixels[0], &orig_pixels[1280], &orig_pixels[1280 * 2],
+ &orig_pixels[1280 * 3], &orig_pixels[1280 * 4],
+ &dst_pixels_opt[0], 1280);
+#endif
+ }
-TEST_F(LibYUVPlanarTest, TestFloatDivToByte_Opt) {
- float diff = TestFloatDivToByte(benchmark_width_, benchmark_height_,
- benchmark_iterations_, 1.2f, true);
- EXPECT_EQ(0, diff);
+ for (int i = 0; i < 1280; ++i) {
+ EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
+ }
+ free_aligned_buffer_page_end(orig_pixels_buf);
}
-TEST_F(LibYUVPlanarTest, UVToVURow) {
+TEST_F(LibYUVPlanarTest, SwapUVRow) {
const int kPixels = benchmark_width_ * benchmark_height_;
+ void (*SwapUVRow)(const uint8_t* src_uv, uint8_t* dst_vu, int width) =
+ SwapUVRow_C;
+
align_buffer_page_end(src_pixels_vu, kPixels * 2);
align_buffer_page_end(dst_pixels_uv, kPixels * 2);
-
MemRandomize(src_pixels_vu, kPixels * 2);
memset(dst_pixels_uv, 1, kPixels * 2);
- UVToVURow_C(src_pixels_vu, dst_pixels_uv, kPixels);
+#if defined(HAS_SWAPUVROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ SwapUVRow = SwapUVRow_Any_NEON;
+ if (IS_ALIGNED(kPixels, 16)) {
+ SwapUVRow = SwapUVRow_NEON;
+ }
+ }
+#endif
+ for (int j = 0; j < benchmark_iterations_; j++) {
+ SwapUVRow(src_pixels_vu, dst_pixels_uv, kPixels);
+ }
for (int i = 0; i < kPixels; ++i) {
EXPECT_EQ(dst_pixels_uv[i * 2 + 0], src_pixels_vu[i * 2 + 1]);
EXPECT_EQ(dst_pixels_uv[i * 2 + 1], src_pixels_vu[i * 2 + 0]);
@@ -3368,5 +4289,144 @@ TEST_F(LibYUVPlanarTest, UVToVURow) {
free_aligned_buffer_page_end(src_pixels_vu);
free_aligned_buffer_page_end(dst_pixels_uv);
}
+#endif // ENABLE_ROW_TESTS
+
+TEST_F(LibYUVPlanarTest, TestGaussPlane_F32) {
+ const int kSize = benchmark_width_ * benchmark_height_ * 4;
+ align_buffer_page_end(orig_pixels, kSize);
+ align_buffer_page_end(dst_pixels_opt, kSize);
+ align_buffer_page_end(dst_pixels_c, kSize);
+
+ for (int i = 0; i < benchmark_width_ * benchmark_height_; ++i) {
+ ((float*)(orig_pixels))[i] = (i & 1023) * 3.14f;
+ }
+ memset(dst_pixels_opt, 1, kSize);
+ memset(dst_pixels_c, 2, kSize);
+
+ MaskCpuFlags(disable_cpu_flags_);
+ GaussPlane_F32((const float*)(orig_pixels), benchmark_width_,
+ (float*)(dst_pixels_c), benchmark_width_, benchmark_width_,
+ benchmark_height_);
+ MaskCpuFlags(benchmark_cpu_info_);
+
+ for (int i = 0; i < benchmark_iterations_; ++i) {
+ GaussPlane_F32((const float*)(orig_pixels), benchmark_width_,
+ (float*)(dst_pixels_opt), benchmark_width_, benchmark_width_,
+ benchmark_height_);
+ }
+ for (int i = 0; i < benchmark_width_ * benchmark_height_; ++i) {
+ EXPECT_NEAR(((float*)(dst_pixels_c))[i], ((float*)(dst_pixels_opt))[i], 1.f)
+ << i;
+ }
+
+ free_aligned_buffer_page_end(dst_pixels_c);
+ free_aligned_buffer_page_end(dst_pixels_opt);
+ free_aligned_buffer_page_end(orig_pixels);
+}
+
+TEST_F(LibYUVPlanarTest, HalfMergeUVPlane_Opt) {
+ int dst_width = (benchmark_width_ + 1) / 2;
+ int dst_height = (benchmark_height_ + 1) / 2;
+ align_buffer_page_end(src_pixels_u, benchmark_width_ * benchmark_height_);
+ align_buffer_page_end(src_pixels_v, benchmark_width_ * benchmark_height_);
+ align_buffer_page_end(tmp_pixels_u, dst_width * dst_height);
+ align_buffer_page_end(tmp_pixels_v, dst_width * dst_height);
+ align_buffer_page_end(dst_pixels_uv_opt, dst_width * 2 * dst_height);
+ align_buffer_page_end(dst_pixels_uv_c, dst_width * 2 * dst_height);
+
+ MemRandomize(src_pixels_u, benchmark_width_ * benchmark_height_);
+ MemRandomize(src_pixels_v, benchmark_width_ * benchmark_height_);
+ MemRandomize(tmp_pixels_u, dst_width * dst_height);
+ MemRandomize(tmp_pixels_v, dst_width * dst_height);
+ MemRandomize(dst_pixels_uv_opt, dst_width * 2 * dst_height);
+ MemRandomize(dst_pixels_uv_c, dst_width * 2 * dst_height);
+
+ MaskCpuFlags(disable_cpu_flags_);
+ HalfMergeUVPlane(src_pixels_u, benchmark_width_, src_pixels_v,
+ benchmark_width_, dst_pixels_uv_c, dst_width * 2,
+ benchmark_width_, benchmark_height_);
+ MaskCpuFlags(benchmark_cpu_info_);
+
+ for (int i = 0; i < benchmark_iterations_; ++i) {
+ HalfMergeUVPlane(src_pixels_u, benchmark_width_, src_pixels_v,
+ benchmark_width_, dst_pixels_uv_opt, dst_width * 2,
+ benchmark_width_, benchmark_height_);
+ }
+
+ for (int i = 0; i < dst_width * 2 * dst_height; ++i) {
+ EXPECT_EQ(dst_pixels_uv_c[i], dst_pixels_uv_opt[i]);
+ }
+
+ free_aligned_buffer_page_end(src_pixels_u);
+ free_aligned_buffer_page_end(src_pixels_v);
+ free_aligned_buffer_page_end(tmp_pixels_u);
+ free_aligned_buffer_page_end(tmp_pixels_v);
+ free_aligned_buffer_page_end(dst_pixels_uv_opt);
+ free_aligned_buffer_page_end(dst_pixels_uv_c);
+}
+
+TEST_F(LibYUVPlanarTest, NV12Copy) {
+ const int halfwidth = (benchmark_width_ + 1) >> 1;
+ const int halfheight = (benchmark_height_ + 1) >> 1;
+ align_buffer_page_end(src_y, benchmark_width_ * benchmark_height_);
+ align_buffer_page_end(src_uv, halfwidth * 2 * halfheight);
+ align_buffer_page_end(dst_y, benchmark_width_ * benchmark_height_);
+ align_buffer_page_end(dst_uv, halfwidth * 2 * halfheight);
+
+ MemRandomize(src_y, benchmark_width_ * benchmark_height_);
+ MemRandomize(src_uv, halfwidth * 2 * halfheight);
+ MemRandomize(dst_y, benchmark_width_ * benchmark_height_);
+ MemRandomize(dst_uv, halfwidth * 2 * halfheight);
+
+ for (int i = 0; i < benchmark_iterations_; ++i) {
+ NV12Copy(src_y, benchmark_width_, src_uv, halfwidth * 2, dst_y,
+ benchmark_width_, dst_uv, halfwidth * 2, benchmark_width_,
+ benchmark_height_);
+ }
+
+ for (int i = 0; i < benchmark_width_ * benchmark_height_; ++i) {
+ EXPECT_EQ(src_y[i], dst_y[i]);
+ }
+ for (int i = 0; i < halfwidth * 2 * halfheight; ++i) {
+ EXPECT_EQ(src_uv[i], dst_uv[i]);
+ }
+
+ free_aligned_buffer_page_end(src_y);
+ free_aligned_buffer_page_end(src_uv);
+ free_aligned_buffer_page_end(dst_y);
+ free_aligned_buffer_page_end(dst_uv);
+}
+
+TEST_F(LibYUVPlanarTest, NV21Copy) {
+ const int halfwidth = (benchmark_width_ + 1) >> 1;
+ const int halfheight = (benchmark_height_ + 1) >> 1;
+ align_buffer_page_end(src_y, benchmark_width_ * benchmark_height_);
+ align_buffer_page_end(src_vu, halfwidth * 2 * halfheight);
+ align_buffer_page_end(dst_y, benchmark_width_ * benchmark_height_);
+ align_buffer_page_end(dst_vu, halfwidth * 2 * halfheight);
+
+ MemRandomize(src_y, benchmark_width_ * benchmark_height_);
+ MemRandomize(src_vu, halfwidth * 2 * halfheight);
+ MemRandomize(dst_y, benchmark_width_ * benchmark_height_);
+ MemRandomize(dst_vu, halfwidth * 2 * halfheight);
+
+ for (int i = 0; i < benchmark_iterations_; ++i) {
+ NV21Copy(src_y, benchmark_width_, src_vu, halfwidth * 2, dst_y,
+ benchmark_width_, dst_vu, halfwidth * 2, benchmark_width_,
+ benchmark_height_);
+ }
+
+ for (int i = 0; i < benchmark_width_ * benchmark_height_; ++i) {
+ EXPECT_EQ(src_y[i], dst_y[i]);
+ }
+ for (int i = 0; i < halfwidth * 2 * halfheight; ++i) {
+ EXPECT_EQ(src_vu[i], dst_vu[i]);
+ }
+
+ free_aligned_buffer_page_end(src_y);
+ free_aligned_buffer_page_end(src_vu);
+ free_aligned_buffer_page_end(dst_y);
+ free_aligned_buffer_page_end(dst_vu);
+}
} // namespace libyuv