diff options
author | Android Build Coastguard Worker <android-build-coastguard-worker@google.com> | 2023-07-07 04:56:02 +0000 |
---|---|---|
committer | Android Build Coastguard Worker <android-build-coastguard-worker@google.com> | 2023-07-07 04:56:02 +0000 |
commit | 1fa594b2aa576922fd01c702585fcdb1a2e6f311 (patch) | |
tree | 25cbeae94c26cad28a13bb089a662291b4b5d905 /files/unit_test/planar_test.cc | |
parent | ba751146302bf8c030e9d4f7f580d5e1ff36dfdb (diff) | |
parent | 3c4c137522d3a6759f2d96aee67149410ca2c1b9 (diff) | |
download | libyuv-1fa594b2aa576922fd01c702585fcdb1a2e6f311.tar.gz |
Snap for 10453563 from 3c4c137522d3a6759f2d96aee67149410ca2c1b9 to mainline-extservices-releaseaml_ext_341027030
Change-Id: I9f5633c3ed357c6759beb58d98c26a0a2283896b
Diffstat (limited to 'files/unit_test/planar_test.cc')
-rw-r--r-- | files/unit_test/planar_test.cc | 1548 |
1 files changed, 1304 insertions, 244 deletions
diff --git a/files/unit_test/planar_test.cc b/files/unit_test/planar_test.cc index 70f8966e..3a8c470b 100644 --- a/files/unit_test/planar_test.cc +++ b/files/unit_test/planar_test.cc @@ -12,9 +12,6 @@ #include <stdlib.h> #include <time.h> -// row.h defines SIMD_ALIGNED, overriding unit_test.h -#include "libyuv/row.h" /* For ScaleSumSamples_Neon */ - #include "../unit_test/unit_test.h" #include "libyuv/compare.h" #include "libyuv/convert.h" @@ -24,6 +21,19 @@ #include "libyuv/cpu_id.h" #include "libyuv/planar_functions.h" #include "libyuv/rotate.h" +#include "libyuv/scale.h" + +#ifdef ENABLE_ROW_TESTS +// row.h defines SIMD_ALIGNED, overriding unit_test.h +// TODO(fbarchard): Remove row.h from unittests. Test public functions. +#include "libyuv/row.h" /* For ScaleSumSamples_Neon */ +#endif + +#if defined(LIBYUV_BIT_EXACT) +#define EXPECTED_ATTENUATE_DIFF 0 +#else +#define EXPECTED_ATTENUATE_DIFF 2 +#endif namespace libyuv { @@ -96,9 +106,9 @@ TEST_F(LibYUVPlanarTest, TestAttenuate) { EXPECT_EQ(32, atten_pixels[128 * 4 + 1]); EXPECT_EQ(21, atten_pixels[128 * 4 + 2]); EXPECT_EQ(128, atten_pixels[128 * 4 + 3]); - EXPECT_NEAR(255, atten_pixels[255 * 4 + 0], 1); - EXPECT_NEAR(127, atten_pixels[255 * 4 + 1], 1); - EXPECT_NEAR(85, atten_pixels[255 * 4 + 2], 1); + EXPECT_NEAR(254, atten_pixels[255 * 4 + 0], EXPECTED_ATTENUATE_DIFF); + EXPECT_NEAR(127, atten_pixels[255 * 4 + 1], EXPECTED_ATTENUATE_DIFF); + EXPECT_NEAR(85, atten_pixels[255 * 4 + 2], EXPECTED_ATTENUATE_DIFF); EXPECT_EQ(255, atten_pixels[255 * 4 + 3]); free_aligned_buffer_page_end(atten2_pixels); @@ -151,31 +161,32 @@ static int TestAttenuateI(int width, } TEST_F(LibYUVPlanarTest, ARGBAttenuate_Any) { - int max_diff = TestAttenuateI(benchmark_width_ - 1, benchmark_height_, + int max_diff = TestAttenuateI(benchmark_width_ + 1, benchmark_height_, benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_, +1, 0); - EXPECT_LE(max_diff, 2); + + EXPECT_LE(max_diff, EXPECTED_ATTENUATE_DIFF); } TEST_F(LibYUVPlanarTest, ARGBAttenuate_Unaligned) { int max_diff = TestAttenuateI(benchmark_width_, benchmark_height_, benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_, +1, 1); - EXPECT_LE(max_diff, 2); + EXPECT_LE(max_diff, EXPECTED_ATTENUATE_DIFF); } TEST_F(LibYUVPlanarTest, ARGBAttenuate_Invert) { int max_diff = TestAttenuateI(benchmark_width_, benchmark_height_, benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_, -1, 0); - EXPECT_LE(max_diff, 2); + EXPECT_LE(max_diff, EXPECTED_ATTENUATE_DIFF); } TEST_F(LibYUVPlanarTest, ARGBAttenuate_Opt) { int max_diff = TestAttenuateI(benchmark_width_, benchmark_height_, benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_, +1, 0); - EXPECT_LE(max_diff, 2); + EXPECT_LE(max_diff, EXPECTED_ATTENUATE_DIFF); } static int TestUnattenuateI(int width, @@ -224,31 +235,31 @@ static int TestUnattenuateI(int width, } TEST_F(LibYUVPlanarTest, ARGBUnattenuate_Any) { - int max_diff = TestUnattenuateI(benchmark_width_ - 1, benchmark_height_, + int max_diff = TestUnattenuateI(benchmark_width_ + 1, benchmark_height_, benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_, +1, 0); - EXPECT_LE(max_diff, 2); + EXPECT_LE(max_diff, EXPECTED_ATTENUATE_DIFF); } TEST_F(LibYUVPlanarTest, ARGBUnattenuate_Unaligned) { int max_diff = TestUnattenuateI(benchmark_width_, benchmark_height_, benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_, +1, 1); - EXPECT_LE(max_diff, 2); + EXPECT_LE(max_diff, EXPECTED_ATTENUATE_DIFF); } TEST_F(LibYUVPlanarTest, ARGBUnattenuate_Invert) { int max_diff = TestUnattenuateI(benchmark_width_, benchmark_height_, benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_, -1, 0); - EXPECT_LE(max_diff, 2); + EXPECT_LE(max_diff, EXPECTED_ATTENUATE_DIFF); } TEST_F(LibYUVPlanarTest, ARGBUnattenuate_Opt) { int max_diff = TestUnattenuateI(benchmark_width_, benchmark_height_, benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_, +1, 0); - EXPECT_LE(max_diff, 2); + EXPECT_LE(max_diff, EXPECTED_ATTENUATE_DIFF); } TEST_F(LibYUVPlanarTest, TestARGBComputeCumulativeSum) { @@ -277,6 +288,7 @@ TEST_F(LibYUVPlanarTest, TestARGBComputeCumulativeSum) { } } +// near is for legacy platforms. TEST_F(LibYUVPlanarTest, TestARGBGray) { SIMD_ALIGNED(uint8_t orig_pixels[1280][4]); memset(orig_pixels, 0, sizeof(orig_pixels)); @@ -313,17 +325,17 @@ TEST_F(LibYUVPlanarTest, TestARGBGray) { orig_pixels[5][3] = 224u; // Do 16 to test asm version. ARGBGray(&orig_pixels[0][0], 0, 0, 0, 16, 1); - EXPECT_EQ(30u, orig_pixels[0][0]); - EXPECT_EQ(30u, orig_pixels[0][1]); - EXPECT_EQ(30u, orig_pixels[0][2]); + EXPECT_NEAR(29u, orig_pixels[0][0], 1); + EXPECT_NEAR(29u, orig_pixels[0][1], 1); + EXPECT_NEAR(29u, orig_pixels[0][2], 1); EXPECT_EQ(128u, orig_pixels[0][3]); EXPECT_EQ(149u, orig_pixels[1][0]); EXPECT_EQ(149u, orig_pixels[1][1]); EXPECT_EQ(149u, orig_pixels[1][2]); EXPECT_EQ(0u, orig_pixels[1][3]); - EXPECT_EQ(76u, orig_pixels[2][0]); - EXPECT_EQ(76u, orig_pixels[2][1]); - EXPECT_EQ(76u, orig_pixels[2][2]); + EXPECT_NEAR(77u, orig_pixels[2][0], 1); + EXPECT_NEAR(77u, orig_pixels[2][1], 1); + EXPECT_NEAR(77u, orig_pixels[2][2], 1); EXPECT_EQ(255u, orig_pixels[2][3]); EXPECT_EQ(0u, orig_pixels[3][0]); EXPECT_EQ(0u, orig_pixels[3][1]); @@ -333,9 +345,9 @@ TEST_F(LibYUVPlanarTest, TestARGBGray) { EXPECT_EQ(255u, orig_pixels[4][1]); EXPECT_EQ(255u, orig_pixels[4][2]); EXPECT_EQ(255u, orig_pixels[4][3]); - EXPECT_EQ(96u, orig_pixels[5][0]); - EXPECT_EQ(96u, orig_pixels[5][1]); - EXPECT_EQ(96u, orig_pixels[5][2]); + EXPECT_NEAR(97u, orig_pixels[5][0], 1); + EXPECT_NEAR(97u, orig_pixels[5][1], 1); + EXPECT_NEAR(97u, orig_pixels[5][2], 1); EXPECT_EQ(224u, orig_pixels[5][3]); for (int i = 0; i < 1280; ++i) { orig_pixels[i][0] = i; @@ -385,30 +397,30 @@ TEST_F(LibYUVPlanarTest, TestARGBGrayTo) { orig_pixels[5][3] = 224u; // Do 16 to test asm version. ARGBGrayTo(&orig_pixels[0][0], 0, &gray_pixels[0][0], 0, 16, 1); - EXPECT_EQ(30u, gray_pixels[0][0]); - EXPECT_EQ(30u, gray_pixels[0][1]); - EXPECT_EQ(30u, gray_pixels[0][2]); - EXPECT_EQ(128u, gray_pixels[0][3]); - EXPECT_EQ(149u, gray_pixels[1][0]); - EXPECT_EQ(149u, gray_pixels[1][1]); - EXPECT_EQ(149u, gray_pixels[1][2]); - EXPECT_EQ(0u, gray_pixels[1][3]); - EXPECT_EQ(76u, gray_pixels[2][0]); - EXPECT_EQ(76u, gray_pixels[2][1]); - EXPECT_EQ(76u, gray_pixels[2][2]); - EXPECT_EQ(255u, gray_pixels[2][3]); - EXPECT_EQ(0u, gray_pixels[3][0]); - EXPECT_EQ(0u, gray_pixels[3][1]); - EXPECT_EQ(0u, gray_pixels[3][2]); - EXPECT_EQ(255u, gray_pixels[3][3]); - EXPECT_EQ(255u, gray_pixels[4][0]); - EXPECT_EQ(255u, gray_pixels[4][1]); - EXPECT_EQ(255u, gray_pixels[4][2]); - EXPECT_EQ(255u, gray_pixels[4][3]); - EXPECT_EQ(96u, gray_pixels[5][0]); - EXPECT_EQ(96u, gray_pixels[5][1]); - EXPECT_EQ(96u, gray_pixels[5][2]); - EXPECT_EQ(224u, gray_pixels[5][3]); + EXPECT_NEAR(30u, gray_pixels[0][0], 1); + EXPECT_NEAR(30u, gray_pixels[0][1], 1); + EXPECT_NEAR(30u, gray_pixels[0][2], 1); + EXPECT_NEAR(128u, gray_pixels[0][3], 1); + EXPECT_NEAR(149u, gray_pixels[1][0], 1); + EXPECT_NEAR(149u, gray_pixels[1][1], 1); + EXPECT_NEAR(149u, gray_pixels[1][2], 1); + EXPECT_NEAR(0u, gray_pixels[1][3], 1); + EXPECT_NEAR(76u, gray_pixels[2][0], 1); + EXPECT_NEAR(76u, gray_pixels[2][1], 1); + EXPECT_NEAR(76u, gray_pixels[2][2], 1); + EXPECT_NEAR(255u, gray_pixels[2][3], 1); + EXPECT_NEAR(0u, gray_pixels[3][0], 1); + EXPECT_NEAR(0u, gray_pixels[3][1], 1); + EXPECT_NEAR(0u, gray_pixels[3][2], 1); + EXPECT_NEAR(255u, gray_pixels[3][3], 1); + EXPECT_NEAR(255u, gray_pixels[4][0], 1); + EXPECT_NEAR(255u, gray_pixels[4][1], 1); + EXPECT_NEAR(255u, gray_pixels[4][2], 1); + EXPECT_NEAR(255u, gray_pixels[4][3], 1); + EXPECT_NEAR(96u, gray_pixels[5][0], 1); + EXPECT_NEAR(96u, gray_pixels[5][1], 1); + EXPECT_NEAR(96u, gray_pixels[5][2], 1); + EXPECT_NEAR(224u, gray_pixels[5][3], 1); for (int i = 0; i < 1280; ++i) { orig_pixels[i][0] = i; orig_pixels[i][1] = i / 2; @@ -418,6 +430,20 @@ TEST_F(LibYUVPlanarTest, TestARGBGrayTo) { for (int i = 0; i < benchmark_pixels_div1280_; ++i) { ARGBGrayTo(&orig_pixels[0][0], 0, &gray_pixels[0][0], 0, 1280, 1); } + + for (int i = 0; i < 256; ++i) { + orig_pixels[i][0] = i; + orig_pixels[i][1] = i; + orig_pixels[i][2] = i; + orig_pixels[i][3] = i; + } + ARGBGray(&orig_pixels[0][0], 0, 0, 0, 256, 1); + for (int i = 0; i < 256; ++i) { + EXPECT_EQ(i, orig_pixels[i][0]); + EXPECT_EQ(i, orig_pixels[i][1]); + EXPECT_EQ(i, orig_pixels[i][2]); + EXPECT_EQ(i, orig_pixels[i][3]); + } } TEST_F(LibYUVPlanarTest, TestARGBSepia) { @@ -763,27 +789,75 @@ TEST_F(LibYUVPlanarTest, TestARGBQuantize) { } } -TEST_F(LibYUVPlanarTest, TestARGBMirror) { - SIMD_ALIGNED(uint8_t orig_pixels[1280][4]); - SIMD_ALIGNED(uint8_t dst_pixels[1280][4]); +TEST_F(LibYUVPlanarTest, ARGBMirror_Opt) { + align_buffer_page_end(src_pixels, benchmark_width_ * benchmark_height_ * 4); + align_buffer_page_end(dst_pixels_opt, + benchmark_width_ * benchmark_height_ * 4); + align_buffer_page_end(dst_pixels_c, benchmark_width_ * benchmark_height_ * 4); - for (int i = 0; i < 1280; ++i) { - orig_pixels[i][0] = i; - orig_pixels[i][1] = i / 2; - orig_pixels[i][2] = i / 3; - orig_pixels[i][3] = i / 4; + MemRandomize(src_pixels, benchmark_width_ * benchmark_height_ * 4); + MaskCpuFlags(disable_cpu_flags_); + ARGBMirror(src_pixels, benchmark_width_ * 4, dst_pixels_c, + benchmark_width_ * 4, benchmark_width_, benchmark_height_); + MaskCpuFlags(benchmark_cpu_info_); + + for (int i = 0; i < benchmark_iterations_; ++i) { + ARGBMirror(src_pixels, benchmark_width_ * 4, dst_pixels_opt, + benchmark_width_ * 4, benchmark_width_, benchmark_height_); } - ARGBMirror(&orig_pixels[0][0], 0, &dst_pixels[0][0], 0, 1280, 1); + for (int i = 0; i < benchmark_width_ * benchmark_height_ * 4; ++i) { + EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); + } + free_aligned_buffer_page_end(src_pixels); + free_aligned_buffer_page_end(dst_pixels_opt); + free_aligned_buffer_page_end(dst_pixels_c); +} - for (int i = 0; i < 1280; ++i) { - EXPECT_EQ(i & 255, dst_pixels[1280 - 1 - i][0]); - EXPECT_EQ((i / 2) & 255, dst_pixels[1280 - 1 - i][1]); - EXPECT_EQ((i / 3) & 255, dst_pixels[1280 - 1 - i][2]); - EXPECT_EQ((i / 4) & 255, dst_pixels[1280 - 1 - i][3]); +TEST_F(LibYUVPlanarTest, MirrorPlane_Opt) { + align_buffer_page_end(src_pixels, benchmark_width_ * benchmark_height_); + align_buffer_page_end(dst_pixels_opt, benchmark_width_ * benchmark_height_); + align_buffer_page_end(dst_pixels_c, benchmark_width_ * benchmark_height_); + + MemRandomize(src_pixels, benchmark_width_ * benchmark_height_); + MaskCpuFlags(disable_cpu_flags_); + MirrorPlane(src_pixels, benchmark_width_, dst_pixels_c, benchmark_width_, + benchmark_width_, benchmark_height_); + MaskCpuFlags(benchmark_cpu_info_); + + for (int i = 0; i < benchmark_iterations_; ++i) { + MirrorPlane(src_pixels, benchmark_width_, dst_pixels_opt, benchmark_width_, + benchmark_width_, benchmark_height_); } - for (int i = 0; i < benchmark_pixels_div1280_; ++i) { - ARGBMirror(&orig_pixels[0][0], 0, &dst_pixels[0][0], 0, 1280, 1); + for (int i = 0; i < benchmark_width_ * benchmark_height_; ++i) { + EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); + } + free_aligned_buffer_page_end(src_pixels); + free_aligned_buffer_page_end(dst_pixels_opt); + free_aligned_buffer_page_end(dst_pixels_c); +} + +TEST_F(LibYUVPlanarTest, MirrorUVPlane_Opt) { + align_buffer_page_end(src_pixels, benchmark_width_ * benchmark_height_ * 2); + align_buffer_page_end(dst_pixels_opt, + benchmark_width_ * benchmark_height_ * 2); + align_buffer_page_end(dst_pixels_c, benchmark_width_ * benchmark_height_ * 2); + + MemRandomize(src_pixels, benchmark_width_ * benchmark_height_ * 2); + MaskCpuFlags(disable_cpu_flags_); + MirrorUVPlane(src_pixels, benchmark_width_ * 2, dst_pixels_c, + benchmark_width_ * 2, benchmark_width_, benchmark_height_); + MaskCpuFlags(benchmark_cpu_info_); + + for (int i = 0; i < benchmark_iterations_; ++i) { + MirrorUVPlane(src_pixels, benchmark_width_ * 2, dst_pixels_opt, + benchmark_width_ * 2, benchmark_width_, benchmark_height_); } + for (int i = 0; i < benchmark_width_ * benchmark_height_ * 2; ++i) { + EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); + } + free_aligned_buffer_page_end(src_pixels); + free_aligned_buffer_page_end(dst_pixels_opt); + free_aligned_buffer_page_end(dst_pixels_c); } TEST_F(LibYUVPlanarTest, TestShade) { @@ -1006,10 +1080,91 @@ TEST_F(LibYUVPlanarTest, TestInterpolatePlane) { } } +TEST_F(LibYUVPlanarTest, TestInterpolatePlane_16) { + SIMD_ALIGNED(uint16_t orig_pixels_0[1280]); + SIMD_ALIGNED(uint16_t orig_pixels_1[1280]); + SIMD_ALIGNED(uint16_t interpolate_pixels[1280]); + memset(orig_pixels_0, 0, sizeof(orig_pixels_0)); + memset(orig_pixels_1, 0, sizeof(orig_pixels_1)); + + orig_pixels_0[0] = 16u; + orig_pixels_0[1] = 32u; + orig_pixels_0[2] = 64u; + orig_pixels_0[3] = 128u; + orig_pixels_0[4] = 0u; + orig_pixels_0[5] = 0u; + orig_pixels_0[6] = 0u; + orig_pixels_0[7] = 255u; + orig_pixels_0[8] = 0u; + orig_pixels_0[9] = 0u; + orig_pixels_0[10] = 0u; + orig_pixels_0[11] = 0u; + orig_pixels_0[12] = 0u; + orig_pixels_0[13] = 0u; + orig_pixels_0[14] = 0u; + orig_pixels_0[15] = 0u; + + orig_pixels_1[0] = 0u; + orig_pixels_1[1] = 0u; + orig_pixels_1[2] = 0u; + orig_pixels_1[3] = 0u; + orig_pixels_1[4] = 0u; + orig_pixels_1[5] = 0u; + orig_pixels_1[6] = 0u; + orig_pixels_1[7] = 0u; + orig_pixels_1[8] = 0u; + orig_pixels_1[9] = 0u; + orig_pixels_1[10] = 0u; + orig_pixels_1[11] = 0u; + orig_pixels_1[12] = 255u; + orig_pixels_1[13] = 255u; + orig_pixels_1[14] = 255u; + orig_pixels_1[15] = 255u; + + InterpolatePlane_16(&orig_pixels_0[0], 0, &orig_pixels_1[0], 0, + &interpolate_pixels[0], 0, 16, 1, 128); + EXPECT_EQ(8u, interpolate_pixels[0]); + EXPECT_EQ(16u, interpolate_pixels[1]); + EXPECT_EQ(32u, interpolate_pixels[2]); + EXPECT_EQ(64u, interpolate_pixels[3]); + EXPECT_EQ(0u, interpolate_pixels[4]); + EXPECT_EQ(0u, interpolate_pixels[5]); + EXPECT_EQ(0u, interpolate_pixels[6]); + EXPECT_EQ(128u, interpolate_pixels[7]); + EXPECT_EQ(0u, interpolate_pixels[8]); + EXPECT_EQ(0u, interpolate_pixels[9]); + EXPECT_EQ(0u, interpolate_pixels[10]); + EXPECT_EQ(0u, interpolate_pixels[11]); + EXPECT_EQ(128u, interpolate_pixels[12]); + EXPECT_EQ(128u, interpolate_pixels[13]); + EXPECT_EQ(128u, interpolate_pixels[14]); + EXPECT_EQ(128u, interpolate_pixels[15]); + + InterpolatePlane_16(&orig_pixels_0[0], 0, &orig_pixels_1[0], 0, + &interpolate_pixels[0], 0, 16, 1, 0); + EXPECT_EQ(16u, interpolate_pixels[0]); + EXPECT_EQ(32u, interpolate_pixels[1]); + EXPECT_EQ(64u, interpolate_pixels[2]); + EXPECT_EQ(128u, interpolate_pixels[3]); + + InterpolatePlane_16(&orig_pixels_0[0], 0, &orig_pixels_1[0], 0, + &interpolate_pixels[0], 0, 16, 1, 192); + + EXPECT_EQ(4u, interpolate_pixels[0]); + EXPECT_EQ(8u, interpolate_pixels[1]); + EXPECT_EQ(16u, interpolate_pixels[2]); + EXPECT_EQ(32u, interpolate_pixels[3]); + + for (int i = 0; i < benchmark_pixels_div1280_; ++i) { + InterpolatePlane_16(&orig_pixels_0[0], 0, &orig_pixels_1[0], 0, + &interpolate_pixels[0], 0, 1280, 1, 123); + } +} + #define TESTTERP(FMT_A, BPP_A, STRIDE_A, FMT_B, BPP_B, STRIDE_B, W1280, TERP, \ N, NEG, OFF) \ TEST_F(LibYUVPlanarTest, ARGBInterpolate##TERP##N) { \ - const int kWidth = ((W1280) > 0) ? (W1280) : 1; \ + const int kWidth = W1280; \ const int kHeight = benchmark_height_; \ const int kStrideA = \ (kWidth * BPP_A + STRIDE_A - 1) / STRIDE_A * STRIDE_A; \ @@ -1041,7 +1196,7 @@ TEST_F(LibYUVPlanarTest, TestInterpolatePlane) { } #define TESTINTERPOLATE(TERP) \ - TESTTERP(ARGB, 4, 1, ARGB, 4, 1, benchmark_width_ - 1, TERP, _Any, +, 0) \ + TESTTERP(ARGB, 4, 1, ARGB, 4, 1, benchmark_width_ + 1, TERP, _Any, +, 0) \ TESTTERP(ARGB, 4, 1, ARGB, 4, 1, benchmark_width_, TERP, _Unaligned, +, 1) \ TESTTERP(ARGB, 4, 1, ARGB, 4, 1, benchmark_width_, TERP, _Invert, -, 0) \ TESTTERP(ARGB, 4, 1, ARGB, 4, 1, benchmark_width_, TERP, _Opt, +, 0) @@ -1058,7 +1213,8 @@ static int TestBlend(int width, int disable_cpu_flags, int benchmark_cpu_info, int invert, - int off) { + int off, + int attenuate) { if (width < 1) { width = 1; } @@ -1072,10 +1228,12 @@ static int TestBlend(int width, src_argb_a[i + off] = (fastrand() & 0xff); src_argb_b[i + off] = (fastrand() & 0xff); } - ARGBAttenuate(src_argb_a + off, kStride, src_argb_a + off, kStride, width, - height); - ARGBAttenuate(src_argb_b + off, kStride, src_argb_b + off, kStride, width, - height); + MemRandomize(src_argb_a, kStride * height + off); + MemRandomize(src_argb_b, kStride * height + off); + if (attenuate) { + ARGBAttenuate(src_argb_a + off, kStride, src_argb_a + off, kStride, width, + height); + } memset(dst_argb_c, 255, kStride * height); memset(dst_argb_opt, 255, kStride * height); @@ -1104,29 +1262,36 @@ static int TestBlend(int width, TEST_F(LibYUVPlanarTest, ARGBBlend_Any) { int max_diff = - TestBlend(benchmark_width_ - 4, benchmark_height_, benchmark_iterations_, - disable_cpu_flags_, benchmark_cpu_info_, +1, 0); + TestBlend(benchmark_width_ + 1, benchmark_height_, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_, +1, 0, 1); EXPECT_LE(max_diff, 1); } TEST_F(LibYUVPlanarTest, ARGBBlend_Unaligned) { int max_diff = TestBlend(benchmark_width_, benchmark_height_, benchmark_iterations_, - disable_cpu_flags_, benchmark_cpu_info_, +1, 1); + disable_cpu_flags_, benchmark_cpu_info_, +1, 1, 1); EXPECT_LE(max_diff, 1); } TEST_F(LibYUVPlanarTest, ARGBBlend_Invert) { int max_diff = TestBlend(benchmark_width_, benchmark_height_, benchmark_iterations_, - disable_cpu_flags_, benchmark_cpu_info_, -1, 0); + disable_cpu_flags_, benchmark_cpu_info_, -1, 0, 1); + EXPECT_LE(max_diff, 1); +} + +TEST_F(LibYUVPlanarTest, ARGBBlend_Unattenuated) { + int max_diff = + TestBlend(benchmark_width_, benchmark_height_, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_, +1, 0, 0); EXPECT_LE(max_diff, 1); } TEST_F(LibYUVPlanarTest, ARGBBlend_Opt) { int max_diff = TestBlend(benchmark_width_, benchmark_height_, benchmark_iterations_, - disable_cpu_flags_, benchmark_cpu_info_, +1, 0); + disable_cpu_flags_, benchmark_cpu_info_, +1, 0, 1); EXPECT_LE(max_diff, 1); } @@ -1203,7 +1368,7 @@ TEST_F(LibYUVPlanarTest, BlendPlane_Unaligned) { disable_cpu_flags_, benchmark_cpu_info_, +1, 1); } TEST_F(LibYUVPlanarTest, BlendPlane_Any) { - TestBlendPlane(benchmark_width_ - 4, benchmark_height_, benchmark_iterations_, + TestBlendPlane(benchmark_width_ + 1, benchmark_height_, benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_, +1, 1); } TEST_F(LibYUVPlanarTest, BlendPlane_Invert) { @@ -1298,7 +1463,7 @@ TEST_F(LibYUVPlanarTest, I420Blend_Unaligned) { // TODO(fbarchard): DISABLED because _Any uses C. Avoid C and re-enable. TEST_F(LibYUVPlanarTest, DISABLED_I420Blend_Any) { - TestI420Blend(benchmark_width_ - 4, benchmark_height_, benchmark_iterations_, + TestI420Blend(benchmark_width_ + 1, benchmark_height_, benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_, +1, 0); } TEST_F(LibYUVPlanarTest, I420Blend_Invert) { @@ -1400,6 +1565,212 @@ TEST_F(LibYUVPlanarTest, TestCopyPlane) { EXPECT_EQ(0, err); } +TEST_F(LibYUVPlanarTest, CopyPlane_Opt) { + int i; + int y_plane_size = benchmark_width_ * benchmark_height_; + align_buffer_page_end(orig_y, y_plane_size); + align_buffer_page_end(dst_c, y_plane_size); + align_buffer_page_end(dst_opt, y_plane_size); + + MemRandomize(orig_y, y_plane_size); + memset(dst_c, 1, y_plane_size); + memset(dst_opt, 2, y_plane_size); + + // Disable all optimizations. + MaskCpuFlags(disable_cpu_flags_); + for (i = 0; i < benchmark_iterations_; i++) { + CopyPlane(orig_y, benchmark_width_, dst_c, benchmark_width_, + benchmark_width_, benchmark_height_); + } + + // Enable optimizations. + MaskCpuFlags(benchmark_cpu_info_); + for (i = 0; i < benchmark_iterations_; i++) { + CopyPlane(orig_y, benchmark_width_, dst_opt, benchmark_width_, + benchmark_width_, benchmark_height_); + } + + for (i = 0; i < y_plane_size; ++i) { + EXPECT_EQ(dst_c[i], dst_opt[i]); + } + + free_aligned_buffer_page_end(orig_y); + free_aligned_buffer_page_end(dst_c); + free_aligned_buffer_page_end(dst_opt); +} + +TEST_F(LibYUVPlanarTest, TestCopyPlaneZero) { + // Test to verify copying a rect with a zero height or width does + // not touch destination memory. + uint8_t src = 42; + uint8_t dst = 0; + + // Disable all optimizations. + MaskCpuFlags(disable_cpu_flags_); + CopyPlane(&src, 0, &dst, 0, 0, 0); + EXPECT_EQ(src, 42); + EXPECT_EQ(dst, 0); + + CopyPlane(&src, 1, &dst, 1, 1, 0); + EXPECT_EQ(src, 42); + EXPECT_EQ(dst, 0); + + CopyPlane(&src, 1, &dst, 1, 0, 1); + EXPECT_EQ(src, 42); + EXPECT_EQ(dst, 0); + + // Enable optimizations. + MaskCpuFlags(benchmark_cpu_info_); + CopyPlane(&src, 0, &dst, 0, 0, 0); + EXPECT_EQ(src, 42); + EXPECT_EQ(dst, 0); + + CopyPlane(&src, 1, &dst, 1, 1, 0); + EXPECT_EQ(src, 42); + EXPECT_EQ(dst, 0); + + CopyPlane(&src, 1, &dst, 1, 0, 1); + EXPECT_EQ(src, 42); + EXPECT_EQ(dst, 0); +} + +TEST_F(LibYUVPlanarTest, TestDetilePlane) { + int i, j; + + // orig is tiled. Allocate enough memory for tiles. + int orig_width = (benchmark_width_ + 15) & ~15; + int orig_height = (benchmark_height_ + 15) & ~15; + int orig_plane_size = orig_width * orig_height; + int y_plane_size = benchmark_width_ * benchmark_height_; + align_buffer_page_end(orig_y, orig_plane_size); + align_buffer_page_end(dst_c, y_plane_size); + align_buffer_page_end(dst_opt, y_plane_size); + + MemRandomize(orig_y, orig_plane_size); + memset(dst_c, 0, y_plane_size); + memset(dst_opt, 0, y_plane_size); + + // Disable all optimizations. + MaskCpuFlags(disable_cpu_flags_); + for (j = 0; j < benchmark_iterations_; j++) { + DetilePlane(orig_y, orig_width, dst_c, benchmark_width_, benchmark_width_, + benchmark_height_, 16); + } + + // Enable optimizations. + MaskCpuFlags(benchmark_cpu_info_); + for (j = 0; j < benchmark_iterations_; j++) { + DetilePlane(orig_y, orig_width, dst_opt, benchmark_width_, benchmark_width_, + benchmark_height_, 16); + } + + for (i = 0; i < y_plane_size; ++i) { + EXPECT_EQ(dst_c[i], dst_opt[i]); + } + + free_aligned_buffer_page_end(orig_y); + free_aligned_buffer_page_end(dst_c); + free_aligned_buffer_page_end(dst_opt); +} + +// Compares DetileSplitUV to 2 step Detile + SplitUV +TEST_F(LibYUVPlanarTest, TestDetileSplitUVPlane_Correctness) { + int i, j; + + // orig is tiled. Allocate enough memory for tiles. + int orig_width = (benchmark_width_ + 15) & ~15; + int orig_height = (benchmark_height_ + 15) & ~15; + int orig_plane_size = orig_width * orig_height; + int uv_plane_size = ((benchmark_width_ + 1) / 2) * benchmark_height_; + align_buffer_page_end(orig_uv, orig_plane_size); + align_buffer_page_end(detiled_uv, orig_plane_size); + align_buffer_page_end(dst_u_two_stage, uv_plane_size); + align_buffer_page_end(dst_u_opt, uv_plane_size); + align_buffer_page_end(dst_v_two_stage, uv_plane_size); + align_buffer_page_end(dst_v_opt, uv_plane_size); + + MemRandomize(orig_uv, orig_plane_size); + memset(detiled_uv, 0, orig_plane_size); + memset(dst_u_two_stage, 0, uv_plane_size); + memset(dst_u_opt, 0, uv_plane_size); + memset(dst_v_two_stage, 0, uv_plane_size); + memset(dst_v_opt, 0, uv_plane_size); + + DetileSplitUVPlane(orig_uv, orig_width, dst_u_opt, (benchmark_width_ + 1) / 2, + dst_v_opt, (benchmark_width_ + 1) / 2, benchmark_width_, + benchmark_height_, 16); + + // Benchmark 2 step conversion for comparison. + for (j = 0; j < benchmark_iterations_; j++) { + DetilePlane(orig_uv, orig_width, detiled_uv, benchmark_width_, + benchmark_width_, benchmark_height_, 16); + SplitUVPlane(detiled_uv, orig_width, dst_u_two_stage, + (benchmark_width_ + 1) / 2, dst_v_two_stage, + (benchmark_width_ + 1) / 2, (benchmark_width_ + 1) / 2, + benchmark_height_); + } + + for (i = 0; i < uv_plane_size; ++i) { + EXPECT_EQ(dst_u_two_stage[i], dst_u_opt[i]); + EXPECT_EQ(dst_v_two_stage[i], dst_v_opt[i]); + } + + free_aligned_buffer_page_end(orig_uv); + free_aligned_buffer_page_end(detiled_uv); + free_aligned_buffer_page_end(dst_u_two_stage); + free_aligned_buffer_page_end(dst_u_opt); + free_aligned_buffer_page_end(dst_v_two_stage); + free_aligned_buffer_page_end(dst_v_opt); +} + +TEST_F(LibYUVPlanarTest, TestDetileSplitUVPlane_Benchmark) { + int i, j; + + // orig is tiled. Allocate enough memory for tiles. + int orig_width = (benchmark_width_ + 15) & ~15; + int orig_height = (benchmark_height_ + 15) & ~15; + int orig_plane_size = orig_width * orig_height; + int uv_plane_size = ((benchmark_width_ + 1) / 2) * benchmark_height_; + align_buffer_page_end(orig_uv, orig_plane_size); + align_buffer_page_end(dst_u_c, uv_plane_size); + align_buffer_page_end(dst_u_opt, uv_plane_size); + align_buffer_page_end(dst_v_c, uv_plane_size); + align_buffer_page_end(dst_v_opt, uv_plane_size); + + MemRandomize(orig_uv, orig_plane_size); + memset(dst_u_c, 0, uv_plane_size); + memset(dst_u_opt, 0, uv_plane_size); + memset(dst_v_c, 0, uv_plane_size); + memset(dst_v_opt, 0, uv_plane_size); + + // Disable all optimizations. + MaskCpuFlags(disable_cpu_flags_); + + DetileSplitUVPlane(orig_uv, orig_width, dst_u_c, (benchmark_width_ + 1) / 2, + dst_v_c, (benchmark_width_ + 1) / 2, benchmark_width_, + benchmark_height_, 16); + + // Enable optimizations. + MaskCpuFlags(benchmark_cpu_info_); + + for (j = 0; j < benchmark_iterations_; j++) { + DetileSplitUVPlane( + orig_uv, orig_width, dst_u_opt, (benchmark_width_ + 1) / 2, dst_v_opt, + (benchmark_width_ + 1) / 2, benchmark_width_, benchmark_height_, 16); + } + + for (i = 0; i < uv_plane_size; ++i) { + EXPECT_EQ(dst_u_c[i], dst_u_opt[i]); + EXPECT_EQ(dst_v_c[i], dst_v_opt[i]); + } + + free_aligned_buffer_page_end(orig_uv); + free_aligned_buffer_page_end(dst_u_c); + free_aligned_buffer_page_end(dst_u_opt); + free_aligned_buffer_page_end(dst_v_c); + free_aligned_buffer_page_end(dst_v_opt); +} + static int TestMultiply(int width, int height, int benchmark_iterations, @@ -1447,7 +1818,7 @@ static int TestMultiply(int width, } TEST_F(LibYUVPlanarTest, ARGBMultiply_Any) { - int max_diff = TestMultiply(benchmark_width_ - 1, benchmark_height_, + int max_diff = TestMultiply(benchmark_width_ + 1, benchmark_height_, benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_, +1, 0); EXPECT_LE(max_diff, 1); @@ -1522,7 +1893,7 @@ static int TestAdd(int width, TEST_F(LibYUVPlanarTest, ARGBAdd_Any) { int max_diff = - TestAdd(benchmark_width_ - 1, benchmark_height_, benchmark_iterations_, + TestAdd(benchmark_width_ + 1, benchmark_height_, benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_, +1, 0); EXPECT_LE(max_diff, 1); } @@ -1595,7 +1966,7 @@ static int TestSubtract(int width, } TEST_F(LibYUVPlanarTest, ARGBSubtract_Any) { - int max_diff = TestSubtract(benchmark_width_ - 1, benchmark_height_, + int max_diff = TestSubtract(benchmark_width_ + 1, benchmark_height_, benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_, +1, 0); EXPECT_LE(max_diff, 1); @@ -1668,7 +2039,7 @@ static int TestSobel(int width, TEST_F(LibYUVPlanarTest, ARGBSobel_Any) { int max_diff = - TestSobel(benchmark_width_ - 1, benchmark_height_, benchmark_iterations_, + TestSobel(benchmark_width_ + 1, benchmark_height_, benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_, +1, 0); EXPECT_EQ(0, max_diff); } @@ -1741,7 +2112,7 @@ static int TestSobelToPlane(int width, } TEST_F(LibYUVPlanarTest, ARGBSobelToPlane_Any) { - int max_diff = TestSobelToPlane(benchmark_width_ - 1, benchmark_height_, + int max_diff = TestSobelToPlane(benchmark_width_ + 1, benchmark_height_, benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_, +1, 0); EXPECT_EQ(0, max_diff); @@ -1813,7 +2184,7 @@ static int TestSobelXY(int width, } TEST_F(LibYUVPlanarTest, ARGBSobelXY_Any) { - int max_diff = TestSobelXY(benchmark_width_ - 1, benchmark_height_, + int max_diff = TestSobelXY(benchmark_width_ + 1, benchmark_height_, benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_, +1, 0); EXPECT_EQ(0, max_diff); @@ -1889,29 +2260,35 @@ static int TestBlur(int width, return max_diff; } +#if !defined(DISABLE_SLOW_TESTS) || defined(__x86_64__) || defined(__i386__) +#define DISABLED_ARM(name) name +#else +#define DISABLED_ARM(name) DISABLED_##name +#endif + static const int kBlurSize = 55; -TEST_F(LibYUVPlanarTest, ARGBBlur_Any) { +TEST_F(LibYUVPlanarTest, DISABLED_ARM(ARGBBlur_Any)) { int max_diff = - TestBlur(benchmark_width_ - 1, benchmark_height_, benchmark_iterations_, + TestBlur(benchmark_width_ + 1, benchmark_height_, benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_, +1, 0, kBlurSize); EXPECT_LE(max_diff, 1); } -TEST_F(LibYUVPlanarTest, ARGBBlur_Unaligned) { +TEST_F(LibYUVPlanarTest, DISABLED_ARM(ARGBBlur_Unaligned)) { int max_diff = TestBlur(benchmark_width_, benchmark_height_, benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_, +1, 1, kBlurSize); EXPECT_LE(max_diff, 1); } -TEST_F(LibYUVPlanarTest, ARGBBlur_Invert) { +TEST_F(LibYUVPlanarTest, DISABLED_ARM(ARGBBlur_Invert)) { int max_diff = TestBlur(benchmark_width_, benchmark_height_, benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_, -1, 0, kBlurSize); EXPECT_LE(max_diff, 1); } -TEST_F(LibYUVPlanarTest, ARGBBlur_Opt) { +TEST_F(LibYUVPlanarTest, DISABLED_ARM(ARGBBlur_Opt)) { int max_diff = TestBlur(benchmark_width_, benchmark_height_, benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_, +1, 0, kBlurSize); @@ -1919,35 +2296,35 @@ TEST_F(LibYUVPlanarTest, ARGBBlur_Opt) { } static const int kBlurSmallSize = 5; -TEST_F(LibYUVPlanarTest, ARGBBlurSmall_Any) { +TEST_F(LibYUVPlanarTest, DISABLED_ARM(ARGBBlurSmall_Any)) { int max_diff = - TestBlur(benchmark_width_ - 1, benchmark_height_, benchmark_iterations_, + TestBlur(benchmark_width_ + 1, benchmark_height_, benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_, +1, 0, kBlurSmallSize); EXPECT_LE(max_diff, 1); } -TEST_F(LibYUVPlanarTest, ARGBBlurSmall_Unaligned) { +TEST_F(LibYUVPlanarTest, DISABLED_ARM(ARGBBlurSmall_Unaligned)) { int max_diff = TestBlur(benchmark_width_, benchmark_height_, benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_, +1, 1, kBlurSmallSize); EXPECT_LE(max_diff, 1); } -TEST_F(LibYUVPlanarTest, ARGBBlurSmall_Invert) { +TEST_F(LibYUVPlanarTest, DISABLED_ARM(ARGBBlurSmall_Invert)) { int max_diff = TestBlur(benchmark_width_, benchmark_height_, benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_, -1, 0, kBlurSmallSize); EXPECT_LE(max_diff, 1); } -TEST_F(LibYUVPlanarTest, ARGBBlurSmall_Opt) { +TEST_F(LibYUVPlanarTest, DISABLED_ARM(ARGBBlurSmall_Opt)) { int max_diff = TestBlur(benchmark_width_, benchmark_height_, benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_, +1, 0, kBlurSmallSize); EXPECT_LE(max_diff, 1); } -TEST_F(LibYUVPlanarTest, TestARGBPolynomial) { +TEST_F(LibYUVPlanarTest, DISABLED_ARM(TestARGBPolynomial)) { SIMD_ALIGNED(uint8_t orig_pixels[1280][4]); SIMD_ALIGNED(uint8_t dst_pixels_opt[1280][4]); SIMD_ALIGNED(uint8_t dst_pixels_c[1280][4]); @@ -2426,7 +2803,7 @@ static int TestARGBRect(int width, } TEST_F(LibYUVPlanarTest, ARGBRect_Any) { - int max_diff = TestARGBRect(benchmark_width_ - 1, benchmark_height_, + int max_diff = TestARGBRect(benchmark_width_ + 1, benchmark_height_, benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_, +1, 0, 4); EXPECT_EQ(0, max_diff); @@ -2454,7 +2831,7 @@ TEST_F(LibYUVPlanarTest, ARGBRect_Opt) { } TEST_F(LibYUVPlanarTest, SetPlane_Any) { - int max_diff = TestARGBRect(benchmark_width_ - 1, benchmark_height_, + int max_diff = TestARGBRect(benchmark_width_ + 1, benchmark_height_, benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_, +1, 0, 1); EXPECT_EQ(0, max_diff); @@ -2483,33 +2860,24 @@ TEST_F(LibYUVPlanarTest, SetPlane_Opt) { TEST_F(LibYUVPlanarTest, MergeUVPlane_Opt) { const int kPixels = benchmark_width_ * benchmark_height_; - align_buffer_page_end(src_pixels, kPixels * 2); - align_buffer_page_end(tmp_pixels_u, kPixels); - align_buffer_page_end(tmp_pixels_v, kPixels); + align_buffer_page_end(src_pixels_u, kPixels); + align_buffer_page_end(src_pixels_v, kPixels); align_buffer_page_end(dst_pixels_opt, kPixels * 2); align_buffer_page_end(dst_pixels_c, kPixels * 2); - MemRandomize(src_pixels, kPixels * 2); - MemRandomize(tmp_pixels_u, kPixels); - MemRandomize(tmp_pixels_v, kPixels); + MemRandomize(src_pixels_u, kPixels); + MemRandomize(src_pixels_v, kPixels); MemRandomize(dst_pixels_opt, kPixels * 2); MemRandomize(dst_pixels_c, kPixels * 2); MaskCpuFlags(disable_cpu_flags_); - SplitUVPlane(src_pixels, benchmark_width_ * 2, tmp_pixels_u, benchmark_width_, - tmp_pixels_v, benchmark_width_, benchmark_width_, - benchmark_height_); - MergeUVPlane(tmp_pixels_u, benchmark_width_, tmp_pixels_v, benchmark_width_, + MergeUVPlane(src_pixels_u, benchmark_width_, src_pixels_v, benchmark_width_, dst_pixels_c, benchmark_width_ * 2, benchmark_width_, benchmark_height_); MaskCpuFlags(benchmark_cpu_info_); - SplitUVPlane(src_pixels, benchmark_width_ * 2, tmp_pixels_u, benchmark_width_, - tmp_pixels_v, benchmark_width_, benchmark_width_, - benchmark_height_); - for (int i = 0; i < benchmark_iterations_; ++i) { - MergeUVPlane(tmp_pixels_u, benchmark_width_, tmp_pixels_v, benchmark_width_, + MergeUVPlane(src_pixels_u, benchmark_width_, src_pixels_v, benchmark_width_, dst_pixels_opt, benchmark_width_ * 2, benchmark_width_, benchmark_height_); } @@ -2518,9 +2886,43 @@ TEST_F(LibYUVPlanarTest, MergeUVPlane_Opt) { EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); } - free_aligned_buffer_page_end(src_pixels); - free_aligned_buffer_page_end(tmp_pixels_u); - free_aligned_buffer_page_end(tmp_pixels_v); + free_aligned_buffer_page_end(src_pixels_u); + free_aligned_buffer_page_end(src_pixels_v); + free_aligned_buffer_page_end(dst_pixels_opt); + free_aligned_buffer_page_end(dst_pixels_c); +} + +// 16 bit channel split and merge +TEST_F(LibYUVPlanarTest, MergeUVPlane_16_Opt) { + const int kPixels = benchmark_width_ * benchmark_height_; + align_buffer_page_end(src_pixels_u, kPixels * 2); + align_buffer_page_end(src_pixels_v, kPixels * 2); + align_buffer_page_end(dst_pixels_opt, kPixels * 2 * 2); + align_buffer_page_end(dst_pixels_c, kPixels * 2 * 2); + MemRandomize(src_pixels_u, kPixels * 2); + MemRandomize(src_pixels_v, kPixels * 2); + MemRandomize(dst_pixels_opt, kPixels * 2 * 2); + MemRandomize(dst_pixels_c, kPixels * 2 * 2); + + MaskCpuFlags(disable_cpu_flags_); + MergeUVPlane_16((const uint16_t*)src_pixels_u, benchmark_width_, + (const uint16_t*)src_pixels_v, benchmark_width_, + (uint16_t*)dst_pixels_c, benchmark_width_ * 2, + benchmark_width_, benchmark_height_, 12); + MaskCpuFlags(benchmark_cpu_info_); + + for (int i = 0; i < benchmark_iterations_; ++i) { + MergeUVPlane_16((const uint16_t*)src_pixels_u, benchmark_width_, + (const uint16_t*)src_pixels_v, benchmark_width_, + (uint16_t*)dst_pixels_opt, benchmark_width_ * 2, + benchmark_width_, benchmark_height_, 12); + } + + for (int i = 0; i < kPixels * 2 * 2; ++i) { + EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); + } + free_aligned_buffer_page_end(src_pixels_u); + free_aligned_buffer_page_end(src_pixels_v); free_aligned_buffer_page_end(dst_pixels_opt); free_aligned_buffer_page_end(dst_pixels_c); } @@ -2528,47 +2930,112 @@ TEST_F(LibYUVPlanarTest, MergeUVPlane_Opt) { TEST_F(LibYUVPlanarTest, SplitUVPlane_Opt) { const int kPixels = benchmark_width_ * benchmark_height_; align_buffer_page_end(src_pixels, kPixels * 2); - align_buffer_page_end(tmp_pixels_u, kPixels); - align_buffer_page_end(tmp_pixels_v, kPixels); + align_buffer_page_end(dst_pixels_u_c, kPixels); + align_buffer_page_end(dst_pixels_v_c, kPixels); + align_buffer_page_end(dst_pixels_u_opt, kPixels); + align_buffer_page_end(dst_pixels_v_opt, kPixels); + + MemRandomize(src_pixels, kPixels * 2); + MemRandomize(dst_pixels_u_c, kPixels); + MemRandomize(dst_pixels_v_c, kPixels); + MemRandomize(dst_pixels_u_opt, kPixels); + MemRandomize(dst_pixels_v_opt, kPixels); + + MaskCpuFlags(disable_cpu_flags_); + SplitUVPlane(src_pixels, benchmark_width_ * 2, dst_pixels_u_c, + benchmark_width_, dst_pixels_v_c, benchmark_width_, + benchmark_width_, benchmark_height_); + MaskCpuFlags(benchmark_cpu_info_); + + for (int i = 0; i < benchmark_iterations_; ++i) { + SplitUVPlane(src_pixels, benchmark_width_ * 2, dst_pixels_u_opt, + benchmark_width_, dst_pixels_v_opt, benchmark_width_, + benchmark_width_, benchmark_height_); + } + + for (int i = 0; i < kPixels; ++i) { + EXPECT_EQ(dst_pixels_u_c[i], dst_pixels_u_opt[i]); + EXPECT_EQ(dst_pixels_v_c[i], dst_pixels_v_opt[i]); + } + + free_aligned_buffer_page_end(src_pixels); + free_aligned_buffer_page_end(dst_pixels_u_c); + free_aligned_buffer_page_end(dst_pixels_v_c); + free_aligned_buffer_page_end(dst_pixels_u_opt); + free_aligned_buffer_page_end(dst_pixels_v_opt); +} + +// 16 bit channel split +TEST_F(LibYUVPlanarTest, SplitUVPlane_16_Opt) { + const int kPixels = benchmark_width_ * benchmark_height_; + align_buffer_page_end(src_pixels, kPixels * 2 * 2); + align_buffer_page_end(dst_pixels_u_c, kPixels * 2); + align_buffer_page_end(dst_pixels_v_c, kPixels * 2); + align_buffer_page_end(dst_pixels_u_opt, kPixels * 2); + align_buffer_page_end(dst_pixels_v_opt, kPixels * 2); + MemRandomize(src_pixels, kPixels * 2 * 2); + MemRandomize(dst_pixels_u_c, kPixels * 2); + MemRandomize(dst_pixels_v_c, kPixels * 2); + MemRandomize(dst_pixels_u_opt, kPixels * 2); + MemRandomize(dst_pixels_v_opt, kPixels * 2); + + MaskCpuFlags(disable_cpu_flags_); + SplitUVPlane_16((const uint16_t*)src_pixels, benchmark_width_ * 2, + (uint16_t*)dst_pixels_u_c, benchmark_width_, + (uint16_t*)dst_pixels_v_c, benchmark_width_, benchmark_width_, + benchmark_height_, 10); + MaskCpuFlags(benchmark_cpu_info_); + + for (int i = 0; i < benchmark_iterations_; ++i) { + SplitUVPlane_16((const uint16_t*)src_pixels, benchmark_width_ * 2, + (uint16_t*)dst_pixels_u_opt, benchmark_width_, + (uint16_t*)dst_pixels_v_opt, benchmark_width_, + benchmark_width_, benchmark_height_, 10); + } + + for (int i = 0; i < kPixels * 2; ++i) { + EXPECT_EQ(dst_pixels_u_c[i], dst_pixels_u_opt[i]); + EXPECT_EQ(dst_pixels_v_c[i], dst_pixels_v_opt[i]); + } + free_aligned_buffer_page_end(src_pixels); + free_aligned_buffer_page_end(dst_pixels_u_c); + free_aligned_buffer_page_end(dst_pixels_v_c); + free_aligned_buffer_page_end(dst_pixels_u_opt); + free_aligned_buffer_page_end(dst_pixels_v_opt); +} + +TEST_F(LibYUVPlanarTest, SwapUVPlane_Opt) { + // Round count up to multiple of 16 + const int kPixels = benchmark_width_ * benchmark_height_; + align_buffer_page_end(src_pixels, kPixels * 2); align_buffer_page_end(dst_pixels_opt, kPixels * 2); align_buffer_page_end(dst_pixels_c, kPixels * 2); MemRandomize(src_pixels, kPixels * 2); - MemRandomize(tmp_pixels_u, kPixels); - MemRandomize(tmp_pixels_v, kPixels); MemRandomize(dst_pixels_opt, kPixels * 2); MemRandomize(dst_pixels_c, kPixels * 2); MaskCpuFlags(disable_cpu_flags_); - SplitUVPlane(src_pixels, benchmark_width_ * 2, tmp_pixels_u, benchmark_width_, - tmp_pixels_v, benchmark_width_, benchmark_width_, - benchmark_height_); - MergeUVPlane(tmp_pixels_u, benchmark_width_, tmp_pixels_v, benchmark_width_, - dst_pixels_c, benchmark_width_ * 2, benchmark_width_, - benchmark_height_); + SwapUVPlane(src_pixels, benchmark_width_ * 2, dst_pixels_c, + benchmark_width_ * 2, benchmark_width_, benchmark_height_); MaskCpuFlags(benchmark_cpu_info_); for (int i = 0; i < benchmark_iterations_; ++i) { - SplitUVPlane(src_pixels, benchmark_width_ * 2, tmp_pixels_u, - benchmark_width_, tmp_pixels_v, benchmark_width_, - benchmark_width_, benchmark_height_); + SwapUVPlane(src_pixels, benchmark_width_ * 2, dst_pixels_opt, + benchmark_width_ * 2, benchmark_width_, benchmark_height_); } - MergeUVPlane(tmp_pixels_u, benchmark_width_, tmp_pixels_v, benchmark_width_, - dst_pixels_opt, benchmark_width_ * 2, benchmark_width_, - benchmark_height_); for (int i = 0; i < kPixels * 2; ++i) { EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); } free_aligned_buffer_page_end(src_pixels); - free_aligned_buffer_page_end(tmp_pixels_u); - free_aligned_buffer_page_end(tmp_pixels_v); free_aligned_buffer_page_end(dst_pixels_opt); free_aligned_buffer_page_end(dst_pixels_c); } TEST_F(LibYUVPlanarTest, MergeRGBPlane_Opt) { + // Round count up to multiple of 16 const int kPixels = benchmark_width_ * benchmark_height_; align_buffer_page_end(src_pixels, kPixels * 3); align_buffer_page_end(tmp_pixels_r, kPixels); @@ -2617,6 +3084,7 @@ TEST_F(LibYUVPlanarTest, MergeRGBPlane_Opt) { } TEST_F(LibYUVPlanarTest, SplitRGBPlane_Opt) { + // Round count up to multiple of 16 const int kPixels = benchmark_width_ * benchmark_height_; align_buffer_page_end(src_pixels, kPixels * 3); align_buffer_page_end(tmp_pixels_r, kPixels); @@ -2663,10 +3131,373 @@ TEST_F(LibYUVPlanarTest, SplitRGBPlane_Opt) { free_aligned_buffer_page_end(dst_pixels_c); } +TEST_F(LibYUVPlanarTest, MergeARGBPlane_Opt) { + const int kPixels = benchmark_width_ * benchmark_height_; + align_buffer_page_end(src_pixels, kPixels * 4); + align_buffer_page_end(tmp_pixels_r, kPixels); + align_buffer_page_end(tmp_pixels_g, kPixels); + align_buffer_page_end(tmp_pixels_b, kPixels); + align_buffer_page_end(tmp_pixels_a, kPixels); + align_buffer_page_end(dst_pixels_opt, kPixels * 4); + align_buffer_page_end(dst_pixels_c, kPixels * 4); + + MemRandomize(src_pixels, kPixels * 4); + MemRandomize(tmp_pixels_r, kPixels); + MemRandomize(tmp_pixels_g, kPixels); + MemRandomize(tmp_pixels_b, kPixels); + MemRandomize(tmp_pixels_a, kPixels); + MemRandomize(dst_pixels_opt, kPixels * 4); + MemRandomize(dst_pixels_c, kPixels * 4); + + MaskCpuFlags(disable_cpu_flags_); + SplitARGBPlane(src_pixels, benchmark_width_ * 4, tmp_pixels_r, + benchmark_width_, tmp_pixels_g, benchmark_width_, tmp_pixels_b, + benchmark_width_, tmp_pixels_a, benchmark_width_, + benchmark_width_, benchmark_height_); + MergeARGBPlane(tmp_pixels_r, benchmark_width_, tmp_pixels_g, benchmark_width_, + tmp_pixels_b, benchmark_width_, tmp_pixels_a, benchmark_width_, + dst_pixels_c, benchmark_width_ * 4, benchmark_width_, + benchmark_height_); + + MaskCpuFlags(benchmark_cpu_info_); + SplitARGBPlane(src_pixels, benchmark_width_ * 4, tmp_pixels_r, + benchmark_width_, tmp_pixels_g, benchmark_width_, tmp_pixels_b, + benchmark_width_, tmp_pixels_a, benchmark_width_, + benchmark_width_, benchmark_height_); + + for (int i = 0; i < benchmark_iterations_; ++i) { + MergeARGBPlane(tmp_pixels_r, benchmark_width_, tmp_pixels_g, + benchmark_width_, tmp_pixels_b, benchmark_width_, + tmp_pixels_a, benchmark_width_, dst_pixels_opt, + benchmark_width_ * 4, benchmark_width_, benchmark_height_); + } + + for (int i = 0; i < kPixels * 4; ++i) { + EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); + } + + free_aligned_buffer_page_end(src_pixels); + free_aligned_buffer_page_end(tmp_pixels_r); + free_aligned_buffer_page_end(tmp_pixels_g); + free_aligned_buffer_page_end(tmp_pixels_b); + free_aligned_buffer_page_end(tmp_pixels_a); + free_aligned_buffer_page_end(dst_pixels_opt); + free_aligned_buffer_page_end(dst_pixels_c); +} + +TEST_F(LibYUVPlanarTest, SplitARGBPlane_Opt) { + const int kPixels = benchmark_width_ * benchmark_height_; + align_buffer_page_end(src_pixels, kPixels * 4); + align_buffer_page_end(tmp_pixels_r, kPixels); + align_buffer_page_end(tmp_pixels_g, kPixels); + align_buffer_page_end(tmp_pixels_b, kPixels); + align_buffer_page_end(tmp_pixels_a, kPixels); + align_buffer_page_end(dst_pixels_opt, kPixels * 4); + align_buffer_page_end(dst_pixels_c, kPixels * 4); + + MemRandomize(src_pixels, kPixels * 4); + MemRandomize(tmp_pixels_r, kPixels); + MemRandomize(tmp_pixels_g, kPixels); + MemRandomize(tmp_pixels_b, kPixels); + MemRandomize(tmp_pixels_a, kPixels); + MemRandomize(dst_pixels_opt, kPixels * 4); + MemRandomize(dst_pixels_c, kPixels * 4); + + MaskCpuFlags(disable_cpu_flags_); + SplitARGBPlane(src_pixels, benchmark_width_ * 4, tmp_pixels_r, + benchmark_width_, tmp_pixels_g, benchmark_width_, tmp_pixels_b, + benchmark_width_, tmp_pixels_a, benchmark_width_, + benchmark_width_, benchmark_height_); + MergeARGBPlane(tmp_pixels_r, benchmark_width_, tmp_pixels_g, benchmark_width_, + tmp_pixels_b, benchmark_width_, tmp_pixels_a, benchmark_width_, + dst_pixels_c, benchmark_width_ * 4, benchmark_width_, + benchmark_height_); + + MaskCpuFlags(benchmark_cpu_info_); + for (int i = 0; i < benchmark_iterations_; ++i) { + SplitARGBPlane(src_pixels, benchmark_width_ * 4, tmp_pixels_r, + benchmark_width_, tmp_pixels_g, benchmark_width_, + tmp_pixels_b, benchmark_width_, tmp_pixels_a, + benchmark_width_, benchmark_width_, benchmark_height_); + } + + MergeARGBPlane(tmp_pixels_r, benchmark_width_, tmp_pixels_g, benchmark_width_, + tmp_pixels_b, benchmark_width_, tmp_pixels_a, benchmark_width_, + dst_pixels_opt, benchmark_width_ * 4, benchmark_width_, + benchmark_height_); + + for (int i = 0; i < kPixels * 4; ++i) { + EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); + } + + free_aligned_buffer_page_end(src_pixels); + free_aligned_buffer_page_end(tmp_pixels_r); + free_aligned_buffer_page_end(tmp_pixels_g); + free_aligned_buffer_page_end(tmp_pixels_b); + free_aligned_buffer_page_end(tmp_pixels_a); + free_aligned_buffer_page_end(dst_pixels_opt); + free_aligned_buffer_page_end(dst_pixels_c); +} + +TEST_F(LibYUVPlanarTest, MergeXRGBPlane_Opt) { + const int kPixels = benchmark_width_ * benchmark_height_; + align_buffer_page_end(src_pixels, kPixels * 4); + align_buffer_page_end(tmp_pixels_r, kPixels); + align_buffer_page_end(tmp_pixels_g, kPixels); + align_buffer_page_end(tmp_pixels_b, kPixels); + align_buffer_page_end(dst_pixels_opt, kPixels * 4); + align_buffer_page_end(dst_pixels_c, kPixels * 4); + + MemRandomize(src_pixels, kPixels * 4); + MemRandomize(tmp_pixels_r, kPixels); + MemRandomize(tmp_pixels_g, kPixels); + MemRandomize(tmp_pixels_b, kPixels); + MemRandomize(dst_pixels_opt, kPixels * 4); + MemRandomize(dst_pixels_c, kPixels * 4); + + MaskCpuFlags(disable_cpu_flags_); + SplitARGBPlane(src_pixels, benchmark_width_ * 4, tmp_pixels_r, + benchmark_width_, tmp_pixels_g, benchmark_width_, tmp_pixels_b, + benchmark_width_, NULL, 0, benchmark_width_, + benchmark_height_); + MergeARGBPlane(tmp_pixels_r, benchmark_width_, tmp_pixels_g, benchmark_width_, + tmp_pixels_b, benchmark_width_, NULL, 0, dst_pixels_c, + benchmark_width_ * 4, benchmark_width_, benchmark_height_); + + MaskCpuFlags(benchmark_cpu_info_); + SplitARGBPlane(src_pixels, benchmark_width_ * 4, tmp_pixels_r, + benchmark_width_, tmp_pixels_g, benchmark_width_, tmp_pixels_b, + benchmark_width_, NULL, 0, benchmark_width_, + benchmark_height_); + + for (int i = 0; i < benchmark_iterations_; ++i) { + MergeARGBPlane(tmp_pixels_r, benchmark_width_, tmp_pixels_g, + benchmark_width_, tmp_pixels_b, benchmark_width_, NULL, 0, + dst_pixels_opt, benchmark_width_ * 4, benchmark_width_, + benchmark_height_); + } + + for (int i = 0; i < kPixels * 4; ++i) { + EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); + } + + free_aligned_buffer_page_end(src_pixels); + free_aligned_buffer_page_end(tmp_pixels_r); + free_aligned_buffer_page_end(tmp_pixels_g); + free_aligned_buffer_page_end(tmp_pixels_b); + free_aligned_buffer_page_end(dst_pixels_opt); + free_aligned_buffer_page_end(dst_pixels_c); +} + +TEST_F(LibYUVPlanarTest, SplitXRGBPlane_Opt) { + const int kPixels = benchmark_width_ * benchmark_height_; + align_buffer_page_end(src_pixels, kPixels * 4); + align_buffer_page_end(tmp_pixels_r, kPixels); + align_buffer_page_end(tmp_pixels_g, kPixels); + align_buffer_page_end(tmp_pixels_b, kPixels); + align_buffer_page_end(dst_pixels_opt, kPixels * 4); + align_buffer_page_end(dst_pixels_c, kPixels * 4); + + MemRandomize(src_pixels, kPixels * 4); + MemRandomize(tmp_pixels_r, kPixels); + MemRandomize(tmp_pixels_g, kPixels); + MemRandomize(tmp_pixels_b, kPixels); + MemRandomize(dst_pixels_opt, kPixels * 4); + MemRandomize(dst_pixels_c, kPixels * 4); + + MaskCpuFlags(disable_cpu_flags_); + SplitARGBPlane(src_pixels, benchmark_width_ * 4, tmp_pixels_r, + benchmark_width_, tmp_pixels_g, benchmark_width_, tmp_pixels_b, + benchmark_width_, NULL, 0, benchmark_width_, + benchmark_height_); + MergeARGBPlane(tmp_pixels_r, benchmark_width_, tmp_pixels_g, benchmark_width_, + tmp_pixels_b, benchmark_width_, NULL, 0, dst_pixels_c, + benchmark_width_ * 4, benchmark_width_, benchmark_height_); + + MaskCpuFlags(benchmark_cpu_info_); + for (int i = 0; i < benchmark_iterations_; ++i) { + SplitARGBPlane(src_pixels, benchmark_width_ * 4, tmp_pixels_r, + benchmark_width_, tmp_pixels_g, benchmark_width_, + tmp_pixels_b, benchmark_width_, NULL, 0, benchmark_width_, + benchmark_height_); + } + + MergeARGBPlane(tmp_pixels_r, benchmark_width_, tmp_pixels_g, benchmark_width_, + tmp_pixels_b, benchmark_width_, NULL, 0, dst_pixels_opt, + benchmark_width_ * 4, benchmark_width_, benchmark_height_); + + for (int i = 0; i < kPixels * 4; ++i) { + EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); + } + + free_aligned_buffer_page_end(src_pixels); + free_aligned_buffer_page_end(tmp_pixels_r); + free_aligned_buffer_page_end(tmp_pixels_g); + free_aligned_buffer_page_end(tmp_pixels_b); + free_aligned_buffer_page_end(dst_pixels_opt); + free_aligned_buffer_page_end(dst_pixels_c); +} + +// Merge 4 channels +#define TESTQPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, W1280, N, NEG, OFF) \ + TEST_F(LibYUVPlanarTest, FUNC##Plane_##DEPTH##N) { \ + const int kWidth = W1280; \ + const int kPixels = kWidth * benchmark_height_; \ + align_buffer_page_end(src_memory_r, kPixels * sizeof(STYPE) + OFF); \ + align_buffer_page_end(src_memory_g, kPixels * sizeof(STYPE) + OFF); \ + align_buffer_page_end(src_memory_b, kPixels * sizeof(STYPE) + OFF); \ + align_buffer_page_end(src_memory_a, kPixels * sizeof(STYPE) + OFF); \ + align_buffer_page_end(dst_memory_c, kPixels * 4 * sizeof(DTYPE)); \ + align_buffer_page_end(dst_memory_opt, kPixels * 4 * sizeof(DTYPE)); \ + MemRandomize(src_memory_r, kPixels * sizeof(STYPE) + OFF); \ + MemRandomize(src_memory_g, kPixels * sizeof(STYPE) + OFF); \ + MemRandomize(src_memory_b, kPixels * sizeof(STYPE) + OFF); \ + MemRandomize(src_memory_a, kPixels * sizeof(STYPE) + OFF); \ + memset(dst_memory_c, 0, kPixels * 4 * sizeof(DTYPE)); \ + memset(dst_memory_opt, 0, kPixels * 4 * sizeof(DTYPE)); \ + STYPE* src_pixels_r = reinterpret_cast<STYPE*>(src_memory_r + OFF); \ + STYPE* src_pixels_g = reinterpret_cast<STYPE*>(src_memory_g + OFF); \ + STYPE* src_pixels_b = reinterpret_cast<STYPE*>(src_memory_b + OFF); \ + STYPE* src_pixels_a = reinterpret_cast<STYPE*>(src_memory_a + OFF); \ + DTYPE* dst_pixels_c = reinterpret_cast<DTYPE*>(dst_memory_c); \ + DTYPE* dst_pixels_opt = reinterpret_cast<DTYPE*>(dst_memory_opt); \ + MaskCpuFlags(disable_cpu_flags_); \ + FUNC##Plane(src_pixels_r, kWidth, src_pixels_g, kWidth, src_pixels_b, \ + kWidth, src_pixels_a, kWidth, dst_pixels_c, kWidth * 4, \ + kWidth, NEG benchmark_height_, DEPTH); \ + MaskCpuFlags(benchmark_cpu_info_); \ + for (int i = 0; i < benchmark_iterations_; ++i) { \ + FUNC##Plane(src_pixels_r, kWidth, src_pixels_g, kWidth, src_pixels_b, \ + kWidth, src_pixels_a, kWidth, dst_pixels_opt, kWidth * 4, \ + kWidth, NEG benchmark_height_, DEPTH); \ + } \ + for (int i = 0; i < kPixels * 4; ++i) { \ + EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); \ + } \ + free_aligned_buffer_page_end(src_memory_r); \ + free_aligned_buffer_page_end(src_memory_g); \ + free_aligned_buffer_page_end(src_memory_b); \ + free_aligned_buffer_page_end(src_memory_a); \ + free_aligned_buffer_page_end(dst_memory_c); \ + free_aligned_buffer_page_end(dst_memory_opt); \ + } + +// Merge 3 channel RGB into 4 channel XRGB with opaque alpha +#define TESTQPLANAROTOPI(FUNC, STYPE, DTYPE, DEPTH, W1280, N, NEG, OFF) \ + TEST_F(LibYUVPlanarTest, FUNC##Plane_Opaque_##DEPTH##N) { \ + const int kWidth = W1280; \ + const int kPixels = kWidth * benchmark_height_; \ + align_buffer_page_end(src_memory_r, kPixels * sizeof(STYPE) + OFF); \ + align_buffer_page_end(src_memory_g, kPixels * sizeof(STYPE) + OFF); \ + align_buffer_page_end(src_memory_b, kPixels * sizeof(STYPE) + OFF); \ + align_buffer_page_end(dst_memory_c, kPixels * 4 * sizeof(DTYPE)); \ + align_buffer_page_end(dst_memory_opt, kPixels * 4 * sizeof(DTYPE)); \ + MemRandomize(src_memory_r, kPixels * sizeof(STYPE) + OFF); \ + MemRandomize(src_memory_g, kPixels * sizeof(STYPE) + OFF); \ + MemRandomize(src_memory_b, kPixels * sizeof(STYPE) + OFF); \ + memset(dst_memory_c, 0, kPixels * 4 * sizeof(DTYPE)); \ + memset(dst_memory_opt, 0, kPixels * 4 * sizeof(DTYPE)); \ + STYPE* src_pixels_r = reinterpret_cast<STYPE*>(src_memory_r + OFF); \ + STYPE* src_pixels_g = reinterpret_cast<STYPE*>(src_memory_g + OFF); \ + STYPE* src_pixels_b = reinterpret_cast<STYPE*>(src_memory_b + OFF); \ + DTYPE* dst_pixels_c = reinterpret_cast<DTYPE*>(dst_memory_c); \ + DTYPE* dst_pixels_opt = reinterpret_cast<DTYPE*>(dst_memory_opt); \ + MaskCpuFlags(disable_cpu_flags_); \ + FUNC##Plane(src_pixels_r, kWidth, src_pixels_g, kWidth, src_pixels_b, \ + kWidth, NULL, 0, dst_pixels_c, kWidth * 4, kWidth, \ + NEG benchmark_height_, DEPTH); \ + MaskCpuFlags(benchmark_cpu_info_); \ + for (int i = 0; i < benchmark_iterations_; ++i) { \ + FUNC##Plane(src_pixels_r, kWidth, src_pixels_g, kWidth, src_pixels_b, \ + kWidth, NULL, 0, dst_pixels_opt, kWidth * 4, kWidth, \ + NEG benchmark_height_, DEPTH); \ + } \ + for (int i = 0; i < kPixels * 4; ++i) { \ + EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); \ + } \ + free_aligned_buffer_page_end(src_memory_r); \ + free_aligned_buffer_page_end(src_memory_g); \ + free_aligned_buffer_page_end(src_memory_b); \ + free_aligned_buffer_page_end(dst_memory_c); \ + free_aligned_buffer_page_end(dst_memory_opt); \ + } + +#define TESTQPLANARTOP(FUNC, STYPE, DTYPE, DEPTH) \ + TESTQPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_ + 1, _Any, +, 0) \ + TESTQPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_, _Unaligned, +, \ + 2) \ + TESTQPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_, _Invert, -, 0) \ + TESTQPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_, _Opt, +, 0) \ + TESTQPLANAROTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_ + 1, _Any, +, \ + 0) \ + TESTQPLANAROTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_, _Unaligned, +, \ + 2) \ + TESTQPLANAROTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_, _Invert, -, 0) \ + TESTQPLANAROTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_, _Opt, +, 0) + +TESTQPLANARTOP(MergeAR64, uint16_t, uint16_t, 10) +TESTQPLANARTOP(MergeAR64, uint16_t, uint16_t, 12) +TESTQPLANARTOP(MergeAR64, uint16_t, uint16_t, 16) +TESTQPLANARTOP(MergeARGB16To8, uint16_t, uint8_t, 10) +TESTQPLANARTOP(MergeARGB16To8, uint16_t, uint8_t, 12) +TESTQPLANARTOP(MergeARGB16To8, uint16_t, uint8_t, 16) + +#define TESTTPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, W1280, N, NEG, OFF) \ + TEST_F(LibYUVPlanarTest, FUNC##Plane_##DEPTH##N) { \ + const int kWidth = W1280; \ + const int kPixels = kWidth * benchmark_height_; \ + align_buffer_page_end(src_memory_r, kPixels * sizeof(STYPE) + OFF); \ + align_buffer_page_end(src_memory_g, kPixels * sizeof(STYPE) + OFF); \ + align_buffer_page_end(src_memory_b, kPixels * sizeof(STYPE) + OFF); \ + align_buffer_page_end(dst_memory_c, kPixels * 4 * sizeof(DTYPE)); \ + align_buffer_page_end(dst_memory_opt, kPixels * 4 * sizeof(DTYPE)); \ + MemRandomize(src_memory_r, kPixels * sizeof(STYPE) + OFF); \ + MemRandomize(src_memory_g, kPixels * sizeof(STYPE) + OFF); \ + MemRandomize(src_memory_b, kPixels * sizeof(STYPE) + OFF); \ + STYPE* src_pixels_r = reinterpret_cast<STYPE*>(src_memory_r + OFF); \ + STYPE* src_pixels_g = reinterpret_cast<STYPE*>(src_memory_g + OFF); \ + STYPE* src_pixels_b = reinterpret_cast<STYPE*>(src_memory_b + OFF); \ + DTYPE* dst_pixels_c = reinterpret_cast<DTYPE*>(dst_memory_c); \ + DTYPE* dst_pixels_opt = reinterpret_cast<DTYPE*>(dst_memory_opt); \ + memset(dst_pixels_c, 1, kPixels * 4 * sizeof(DTYPE)); \ + memset(dst_pixels_opt, 2, kPixels * 4 * sizeof(DTYPE)); \ + MaskCpuFlags(disable_cpu_flags_); \ + FUNC##Plane(src_pixels_r, kWidth, src_pixels_g, kWidth, src_pixels_b, \ + kWidth, dst_pixels_c, kWidth * 4, kWidth, \ + NEG benchmark_height_, DEPTH); \ + MaskCpuFlags(benchmark_cpu_info_); \ + for (int i = 0; i < benchmark_iterations_; ++i) { \ + FUNC##Plane(src_pixels_r, kWidth, src_pixels_g, kWidth, src_pixels_b, \ + kWidth, dst_pixels_opt, kWidth * 4, kWidth, \ + NEG benchmark_height_, DEPTH); \ + } \ + for (int i = 0; i < kPixels * 4; ++i) { \ + EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); \ + } \ + free_aligned_buffer_page_end(src_memory_r); \ + free_aligned_buffer_page_end(src_memory_g); \ + free_aligned_buffer_page_end(src_memory_b); \ + free_aligned_buffer_page_end(dst_memory_c); \ + free_aligned_buffer_page_end(dst_memory_opt); \ + } + +#define TESTTPLANARTOP(FUNC, STYPE, DTYPE, DEPTH) \ + TESTTPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_ + 1, _Any, +, 0) \ + TESTTPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_, _Unaligned, +, \ + 2) \ + TESTTPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_, _Invert, -, 0) \ + TESTTPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_, _Opt, +, 0) + +TESTTPLANARTOP(MergeXR30, uint16_t, uint8_t, 10) +TESTTPLANARTOP(MergeXR30, uint16_t, uint8_t, 12) +TESTTPLANARTOP(MergeXR30, uint16_t, uint8_t, 16) + // TODO(fbarchard): improve test for platforms and cpu detect #ifdef HAS_MERGEUVROW_16_AVX2 TEST_F(LibYUVPlanarTest, MergeUVRow_16_Opt) { - const int kPixels = benchmark_width_ * benchmark_height_; + // Round count up to multiple of 16 + const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15; + align_buffer_page_end(src_pixels_u, kPixels * 2); align_buffer_page_end(src_pixels_v, kPixels * 2); align_buffer_page_end(dst_pixels_uv_opt, kPixels * 2 * 2); @@ -2679,19 +3510,19 @@ TEST_F(LibYUVPlanarTest, MergeUVRow_16_Opt) { MergeUVRow_16_C(reinterpret_cast<const uint16_t*>(src_pixels_u), reinterpret_cast<const uint16_t*>(src_pixels_v), - reinterpret_cast<uint16_t*>(dst_pixels_uv_c), 64, kPixels); + reinterpret_cast<uint16_t*>(dst_pixels_uv_c), 16, kPixels); int has_avx2 = TestCpuFlag(kCpuHasAVX2); for (int i = 0; i < benchmark_iterations_; ++i) { if (has_avx2) { MergeUVRow_16_AVX2(reinterpret_cast<const uint16_t*>(src_pixels_u), reinterpret_cast<const uint16_t*>(src_pixels_v), - reinterpret_cast<uint16_t*>(dst_pixels_uv_opt), 64, + reinterpret_cast<uint16_t*>(dst_pixels_uv_opt), 16, kPixels); } else { MergeUVRow_16_C(reinterpret_cast<const uint16_t*>(src_pixels_u), reinterpret_cast<const uint16_t*>(src_pixels_v), - reinterpret_cast<uint16_t*>(dst_pixels_uv_opt), 64, + reinterpret_cast<uint16_t*>(dst_pixels_uv_opt), 16, kPixels); } } @@ -2710,7 +3541,9 @@ TEST_F(LibYUVPlanarTest, MergeUVRow_16_Opt) { // TODO(fbarchard): Improve test for more platforms. #ifdef HAS_MULTIPLYROW_16_AVX2 TEST_F(LibYUVPlanarTest, MultiplyRow_16_Opt) { - const int kPixels = benchmark_width_ * benchmark_height_; + // Round count up to multiple of 32 + const int kPixels = (benchmark_width_ * benchmark_height_ + 31) & ~31; + align_buffer_page_end(src_pixels_y, kPixels * 2); align_buffer_page_end(dst_pixels_y_opt, kPixels * 2); align_buffer_page_end(dst_pixels_y_c, kPixels * 2); @@ -2776,6 +3609,65 @@ TEST_F(LibYUVPlanarTest, Convert16To8Plane) { free_aligned_buffer_page_end(dst_pixels_y_c); } +TEST_F(LibYUVPlanarTest, YUY2ToY) { + const int kPixels = benchmark_width_ * benchmark_height_; + align_buffer_page_end(src_pixels_y, kPixels * 2); + align_buffer_page_end(dst_pixels_y_opt, kPixels); + align_buffer_page_end(dst_pixels_y_c, kPixels); + + MemRandomize(src_pixels_y, kPixels * 2); + memset(dst_pixels_y_opt, 0, kPixels); + memset(dst_pixels_y_c, 1, kPixels); + + MaskCpuFlags(disable_cpu_flags_); + YUY2ToY(src_pixels_y, benchmark_width_ * 2, dst_pixels_y_c, benchmark_width_, + benchmark_width_, benchmark_height_); + MaskCpuFlags(benchmark_cpu_info_); + + for (int i = 0; i < benchmark_iterations_; ++i) { + YUY2ToY(src_pixels_y, benchmark_width_ * 2, dst_pixels_y_opt, + benchmark_width_, benchmark_width_, benchmark_height_); + } + + for (int i = 0; i < kPixels; ++i) { + EXPECT_EQ(dst_pixels_y_opt[i], dst_pixels_y_c[i]); + } + + free_aligned_buffer_page_end(src_pixels_y); + free_aligned_buffer_page_end(dst_pixels_y_opt); + free_aligned_buffer_page_end(dst_pixels_y_c); +} + +TEST_F(LibYUVPlanarTest, UYVYToY) { + const int kPixels = benchmark_width_ * benchmark_height_; + align_buffer_page_end(src_pixels_y, kPixels * 2); + align_buffer_page_end(dst_pixels_y_opt, kPixels); + align_buffer_page_end(dst_pixels_y_c, kPixels); + + MemRandomize(src_pixels_y, kPixels * 2); + memset(dst_pixels_y_opt, 0, kPixels); + memset(dst_pixels_y_c, 1, kPixels); + + MaskCpuFlags(disable_cpu_flags_); + UYVYToY(src_pixels_y, benchmark_width_ * 2, dst_pixels_y_c, benchmark_width_, + benchmark_width_, benchmark_height_); + MaskCpuFlags(benchmark_cpu_info_); + + for (int i = 0; i < benchmark_iterations_; ++i) { + UYVYToY(src_pixels_y, benchmark_width_ * 2, dst_pixels_y_opt, + benchmark_width_, benchmark_width_, benchmark_height_); + } + + for (int i = 0; i < kPixels; ++i) { + EXPECT_EQ(dst_pixels_y_opt[i], dst_pixels_y_c[i]); + } + + free_aligned_buffer_page_end(src_pixels_y); + free_aligned_buffer_page_end(dst_pixels_y_opt); + free_aligned_buffer_page_end(dst_pixels_y_c); +} + +#ifdef ENABLE_ROW_TESTS // TODO(fbarchard): Improve test for more platforms. #ifdef HAS_CONVERT16TO8ROW_AVX2 TEST_F(LibYUVPlanarTest, Convert16To8Row_Opt) { @@ -2822,6 +3714,36 @@ TEST_F(LibYUVPlanarTest, Convert16To8Row_Opt) { } #endif // HAS_CONVERT16TO8ROW_AVX2 +#ifdef HAS_UYVYTOYROW_NEON +TEST_F(LibYUVPlanarTest, UYVYToYRow_Opt) { + // NEON does multiple of 16, so round count up + const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15; + align_buffer_page_end(src_pixels_y, kPixels * 2); + align_buffer_page_end(dst_pixels_y_opt, kPixels); + align_buffer_page_end(dst_pixels_y_c, kPixels); + + MemRandomize(src_pixels_y, kPixels * 2); + memset(dst_pixels_y_opt, 0, kPixels); + memset(dst_pixels_y_c, 1, kPixels); + + UYVYToYRow_C(src_pixels_y, dst_pixels_y_c, kPixels); + + for (int i = 0; i < benchmark_iterations_; ++i) { + UYVYToYRow_NEON(src_pixels_y, dst_pixels_y_opt, kPixels); + } + + for (int i = 0; i < kPixels; ++i) { + EXPECT_EQ(dst_pixels_y_opt[i], dst_pixels_y_c[i]); + } + + free_aligned_buffer_page_end(src_pixels_y); + free_aligned_buffer_page_end(dst_pixels_y_opt); + free_aligned_buffer_page_end(dst_pixels_y_c); +} +#endif // HAS_UYVYTOYROW_NEON + +#endif // ENABLE_ROW_TESTS + TEST_F(LibYUVPlanarTest, Convert8To16Plane) { const int kPixels = benchmark_width_ * benchmark_height_; align_buffer_page_end(src_pixels_y, kPixels); @@ -2855,6 +3777,7 @@ TEST_F(LibYUVPlanarTest, Convert8To16Plane) { free_aligned_buffer_page_end(dst_pixels_y_c); } +#ifdef ENABLE_ROW_TESTS // TODO(fbarchard): Improve test for more platforms. #ifdef HAS_CONVERT8TO16ROW_AVX2 TEST_F(LibYUVPlanarTest, Convert8To16Row_Opt) { @@ -3173,33 +4096,33 @@ extern "C" void GaussRow_NEON(const uint32_t* src, uint16_t* dst, int width); extern "C" void GaussRow_C(const uint32_t* src, uint16_t* dst, int width); TEST_F(LibYUVPlanarTest, TestGaussRow_Opt) { - SIMD_ALIGNED(uint32_t orig_pixels[640 + 4]); - SIMD_ALIGNED(uint16_t dst_pixels_c[640]); - SIMD_ALIGNED(uint16_t dst_pixels_opt[640]); + SIMD_ALIGNED(uint32_t orig_pixels[1280 + 8]); + SIMD_ALIGNED(uint16_t dst_pixels_c[1280]); + SIMD_ALIGNED(uint16_t dst_pixels_opt[1280]); memset(orig_pixels, 0, sizeof(orig_pixels)); memset(dst_pixels_c, 1, sizeof(dst_pixels_c)); memset(dst_pixels_opt, 2, sizeof(dst_pixels_opt)); - for (int i = 0; i < 640 + 4; ++i) { + for (int i = 0; i < 1280 + 8; ++i) { orig_pixels[i] = i * 256; } - GaussRow_C(&orig_pixels[0], &dst_pixels_c[0], 640); - for (int i = 0; i < benchmark_pixels_div1280_ * 2; ++i) { + GaussRow_C(&orig_pixels[0], &dst_pixels_c[0], 1280); + for (int i = 0; i < benchmark_pixels_div1280_; ++i) { #if !defined(LIBYUV_DISABLE_NEON) && \ (defined(__aarch64__) || defined(__ARM_NEON__) || defined(LIBYUV_NEON)) int has_neon = TestCpuFlag(kCpuHasNEON); if (has_neon) { - GaussRow_NEON(&orig_pixels[0], &dst_pixels_opt[0], 640); + GaussRow_NEON(&orig_pixels[0], &dst_pixels_opt[0], 1280); } else { - GaussRow_C(&orig_pixels[0], &dst_pixels_opt[0], 640); + GaussRow_C(&orig_pixels[0], &dst_pixels_opt[0], 1280); } #else - GaussRow_C(&orig_pixels[0], &dst_pixels_opt[0], 640); + GaussRow_C(&orig_pixels[0], &dst_pixels_opt[0], 1280); #endif } - for (int i = 0; i < 640; ++i) { + for (int i = 0; i < 1280; ++i) { EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); } @@ -3225,141 +4148,139 @@ extern "C" void GaussCol_C(const uint16_t* src0, int width); TEST_F(LibYUVPlanarTest, TestGaussCol_Opt) { - SIMD_ALIGNED(uint16_t orig_pixels[640 * 5]); - SIMD_ALIGNED(uint32_t dst_pixels_c[640]); - SIMD_ALIGNED(uint32_t dst_pixels_opt[640]); + SIMD_ALIGNED(uint16_t orig_pixels[1280 * 5]); + SIMD_ALIGNED(uint32_t dst_pixels_c[1280]); + SIMD_ALIGNED(uint32_t dst_pixels_opt[1280]); memset(orig_pixels, 0, sizeof(orig_pixels)); memset(dst_pixels_c, 1, sizeof(dst_pixels_c)); memset(dst_pixels_opt, 2, sizeof(dst_pixels_opt)); - for (int i = 0; i < 640 * 5; ++i) { - orig_pixels[i] = i; + for (int i = 0; i < 1280 * 5; ++i) { + orig_pixels[i] = static_cast<float>(i); } - GaussCol_C(&orig_pixels[0], &orig_pixels[640], &orig_pixels[640 * 2], - &orig_pixels[640 * 3], &orig_pixels[640 * 4], &dst_pixels_c[0], - 640); - for (int i = 0; i < benchmark_pixels_div1280_ * 2; ++i) { + GaussCol_C(&orig_pixels[0], &orig_pixels[1280], &orig_pixels[1280 * 2], + &orig_pixels[1280 * 3], &orig_pixels[1280 * 4], &dst_pixels_c[0], + 1280); + for (int i = 0; i < benchmark_pixels_div1280_; ++i) { #if !defined(LIBYUV_DISABLE_NEON) && \ (defined(__aarch64__) || defined(__ARM_NEON__) || defined(LIBYUV_NEON)) int has_neon = TestCpuFlag(kCpuHasNEON); if (has_neon) { - GaussCol_NEON(&orig_pixels[0], &orig_pixels[640], &orig_pixels[640 * 2], - &orig_pixels[640 * 3], &orig_pixels[640 * 4], - &dst_pixels_opt[0], 640); + GaussCol_NEON(&orig_pixels[0], &orig_pixels[1280], &orig_pixels[1280 * 2], + &orig_pixels[1280 * 3], &orig_pixels[1280 * 4], + &dst_pixels_opt[0], 1280); } else { - GaussCol_C(&orig_pixels[0], &orig_pixels[640], &orig_pixels[640 * 2], - &orig_pixels[640 * 3], &orig_pixels[640 * 4], - &dst_pixels_opt[0], 640); + GaussCol_C(&orig_pixels[0], &orig_pixels[1280], &orig_pixels[1280 * 2], + &orig_pixels[1280 * 3], &orig_pixels[1280 * 4], + &dst_pixels_opt[0], 1280); } #else - GaussCol_C(&orig_pixels[0], &orig_pixels[640], &orig_pixels[640 * 2], - &orig_pixels[640 * 3], &orig_pixels[640 * 4], &dst_pixels_opt[0], - 640); + GaussCol_C(&orig_pixels[0], &orig_pixels[1280], &orig_pixels[1280 * 2], + &orig_pixels[1280 * 3], &orig_pixels[1280 * 4], + &dst_pixels_opt[0], 1280); #endif } - for (int i = 0; i < 640; ++i) { + for (int i = 0; i < 1280; ++i) { EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); } - - EXPECT_EQ(dst_pixels_c[0], - static_cast<uint32_t>(0 * 1 + 640 * 4 + 640 * 2 * 6 + 640 * 3 * 4 + - 640 * 4 * 1)); - EXPECT_EQ(dst_pixels_c[639], static_cast<uint32_t>(30704)); } -float TestFloatDivToByte(int benchmark_width, - int benchmark_height, - int benchmark_iterations, - float scale, - bool opt) { - int i, j; - // NEON does multiple of 8, so round count up - const int kPixels = (benchmark_width * benchmark_height + 7) & ~7; - align_buffer_page_end(src_weights, kPixels * 4); - align_buffer_page_end(src_values, kPixels * 4); - align_buffer_page_end(dst_out_c, kPixels); - align_buffer_page_end(dst_out_opt, kPixels); - align_buffer_page_end(dst_mask_c, kPixels); - align_buffer_page_end(dst_mask_opt, kPixels); - - // Randomize works but may contain some denormals affecting performance. - // MemRandomize(orig_y, kPixels * 4); - // large values are problematic. audio is really -1 to 1. - for (i = 0; i < kPixels; ++i) { - (reinterpret_cast<float*>(src_weights))[i] = scale; - (reinterpret_cast<float*>(src_values))[i] = - sinf(static_cast<float>(i) * 0.1f); - } - memset(dst_out_c, 0, kPixels); - memset(dst_out_opt, 1, kPixels); - memset(dst_mask_c, 2, kPixels); - memset(dst_mask_opt, 3, kPixels); +TEST_F(LibYUVPlanarTest, TestGaussRow_F32_Opt) { + SIMD_ALIGNED(float orig_pixels[1280 + 4]); + SIMD_ALIGNED(float dst_pixels_c[1280]); + SIMD_ALIGNED(float dst_pixels_opt[1280]); - FloatDivToByteRow_C(reinterpret_cast<float*>(src_weights), - reinterpret_cast<float*>(src_values), dst_out_c, - dst_mask_c, kPixels); + memset(orig_pixels, 0, sizeof(orig_pixels)); + memset(dst_pixels_c, 1, sizeof(dst_pixels_c)); + memset(dst_pixels_opt, 2, sizeof(dst_pixels_opt)); - for (j = 0; j < benchmark_iterations; j++) { - if (opt) { -#ifdef HAS_FLOATDIVTOBYTEROW_NEON - FloatDivToByteRow_NEON(reinterpret_cast<float*>(src_weights), - reinterpret_cast<float*>(src_values), dst_out_opt, - dst_mask_opt, kPixels); -#else - FloatDivToByteRow_C(reinterpret_cast<float*>(src_weights), - reinterpret_cast<float*>(src_values), dst_out_opt, - dst_mask_opt, kPixels); -#endif + for (int i = 0; i < 1280 + 4; ++i) { + orig_pixels[i] = static_cast<float>(i); + } + GaussRow_F32_C(&orig_pixels[0], &dst_pixels_c[0], 1280); + for (int i = 0; i < benchmark_pixels_div1280_; ++i) { +#if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__) + int has_neon = TestCpuFlag(kCpuHasNEON); + if (has_neon) { + GaussRow_F32_NEON(&orig_pixels[0], &dst_pixels_opt[0], 1280); } else { - FloatDivToByteRow_C(reinterpret_cast<float*>(src_weights), - reinterpret_cast<float*>(src_values), dst_out_opt, - dst_mask_opt, kPixels); + GaussRow_F32_C(&orig_pixels[0], &dst_pixels_opt[0], 1280); } +#else + GaussRow_F32_C(&orig_pixels[0], &dst_pixels_opt[0], 1280); +#endif } - uint8_t max_diff = 0; - for (i = 0; i < kPixels; ++i) { - uint8_t abs_diff = abs(dst_out_c[i] - dst_out_opt[i]) + - abs(dst_mask_c[i] - dst_mask_opt[i]); - if (abs_diff > max_diff) { - max_diff = abs_diff; - } + for (int i = 0; i < 1280; ++i) { + EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); } +} - free_aligned_buffer_page_end(src_weights); - free_aligned_buffer_page_end(src_values); - free_aligned_buffer_page_end(dst_out_c); - free_aligned_buffer_page_end(dst_out_opt); - free_aligned_buffer_page_end(dst_mask_c); - free_aligned_buffer_page_end(dst_mask_opt); +TEST_F(LibYUVPlanarTest, TestGaussCol_F32_Opt) { + SIMD_ALIGNED(float dst_pixels_c[1280]); + SIMD_ALIGNED(float dst_pixels_opt[1280]); + align_buffer_page_end(orig_pixels_buf, 1280 * 5 * 4); // 5 rows + float* orig_pixels = reinterpret_cast<float*>(orig_pixels_buf); - return max_diff; -} + memset(orig_pixels, 0, 1280 * 5 * 4); + memset(dst_pixels_c, 1, sizeof(dst_pixels_c)); + memset(dst_pixels_opt, 2, sizeof(dst_pixels_opt)); -TEST_F(LibYUVPlanarTest, TestFloatDivToByte_C) { - float diff = TestFloatDivToByte(benchmark_width_, benchmark_height_, - benchmark_iterations_, 1.2f, false); - EXPECT_EQ(0, diff); -} + for (int i = 0; i < 1280 * 5; ++i) { + orig_pixels[i] = static_cast<float>(i); + } + GaussCol_F32_C(&orig_pixels[0], &orig_pixels[1280], &orig_pixels[1280 * 2], + &orig_pixels[1280 * 3], &orig_pixels[1280 * 4], + &dst_pixels_c[0], 1280); + for (int i = 0; i < benchmark_pixels_div1280_; ++i) { +#if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__) + int has_neon = TestCpuFlag(kCpuHasNEON); + if (has_neon) { + GaussCol_F32_NEON(&orig_pixels[0], &orig_pixels[1280], + &orig_pixels[1280 * 2], &orig_pixels[1280 * 3], + &orig_pixels[1280 * 4], &dst_pixels_opt[0], 1280); + } else { + GaussCol_F32_C(&orig_pixels[0], &orig_pixels[1280], + &orig_pixels[1280 * 2], &orig_pixels[1280 * 3], + &orig_pixels[1280 * 4], &dst_pixels_opt[0], 1280); + } +#else + GaussCol_F32_C(&orig_pixels[0], &orig_pixels[1280], &orig_pixels[1280 * 2], + &orig_pixels[1280 * 3], &orig_pixels[1280 * 4], + &dst_pixels_opt[0], 1280); +#endif + } -TEST_F(LibYUVPlanarTest, TestFloatDivToByte_Opt) { - float diff = TestFloatDivToByte(benchmark_width_, benchmark_height_, - benchmark_iterations_, 1.2f, true); - EXPECT_EQ(0, diff); + for (int i = 0; i < 1280; ++i) { + EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); + } + free_aligned_buffer_page_end(orig_pixels_buf); } -TEST_F(LibYUVPlanarTest, UVToVURow) { +TEST_F(LibYUVPlanarTest, SwapUVRow) { const int kPixels = benchmark_width_ * benchmark_height_; + void (*SwapUVRow)(const uint8_t* src_uv, uint8_t* dst_vu, int width) = + SwapUVRow_C; + align_buffer_page_end(src_pixels_vu, kPixels * 2); align_buffer_page_end(dst_pixels_uv, kPixels * 2); - MemRandomize(src_pixels_vu, kPixels * 2); memset(dst_pixels_uv, 1, kPixels * 2); - UVToVURow_C(src_pixels_vu, dst_pixels_uv, kPixels); +#if defined(HAS_SWAPUVROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + SwapUVRow = SwapUVRow_Any_NEON; + if (IS_ALIGNED(kPixels, 16)) { + SwapUVRow = SwapUVRow_NEON; + } + } +#endif + for (int j = 0; j < benchmark_iterations_; j++) { + SwapUVRow(src_pixels_vu, dst_pixels_uv, kPixels); + } for (int i = 0; i < kPixels; ++i) { EXPECT_EQ(dst_pixels_uv[i * 2 + 0], src_pixels_vu[i * 2 + 1]); EXPECT_EQ(dst_pixels_uv[i * 2 + 1], src_pixels_vu[i * 2 + 0]); @@ -3368,5 +4289,144 @@ TEST_F(LibYUVPlanarTest, UVToVURow) { free_aligned_buffer_page_end(src_pixels_vu); free_aligned_buffer_page_end(dst_pixels_uv); } +#endif // ENABLE_ROW_TESTS + +TEST_F(LibYUVPlanarTest, TestGaussPlane_F32) { + const int kSize = benchmark_width_ * benchmark_height_ * 4; + align_buffer_page_end(orig_pixels, kSize); + align_buffer_page_end(dst_pixels_opt, kSize); + align_buffer_page_end(dst_pixels_c, kSize); + + for (int i = 0; i < benchmark_width_ * benchmark_height_; ++i) { + ((float*)(orig_pixels))[i] = (i & 1023) * 3.14f; + } + memset(dst_pixels_opt, 1, kSize); + memset(dst_pixels_c, 2, kSize); + + MaskCpuFlags(disable_cpu_flags_); + GaussPlane_F32((const float*)(orig_pixels), benchmark_width_, + (float*)(dst_pixels_c), benchmark_width_, benchmark_width_, + benchmark_height_); + MaskCpuFlags(benchmark_cpu_info_); + + for (int i = 0; i < benchmark_iterations_; ++i) { + GaussPlane_F32((const float*)(orig_pixels), benchmark_width_, + (float*)(dst_pixels_opt), benchmark_width_, benchmark_width_, + benchmark_height_); + } + for (int i = 0; i < benchmark_width_ * benchmark_height_; ++i) { + EXPECT_NEAR(((float*)(dst_pixels_c))[i], ((float*)(dst_pixels_opt))[i], 1.f) + << i; + } + + free_aligned_buffer_page_end(dst_pixels_c); + free_aligned_buffer_page_end(dst_pixels_opt); + free_aligned_buffer_page_end(orig_pixels); +} + +TEST_F(LibYUVPlanarTest, HalfMergeUVPlane_Opt) { + int dst_width = (benchmark_width_ + 1) / 2; + int dst_height = (benchmark_height_ + 1) / 2; + align_buffer_page_end(src_pixels_u, benchmark_width_ * benchmark_height_); + align_buffer_page_end(src_pixels_v, benchmark_width_ * benchmark_height_); + align_buffer_page_end(tmp_pixels_u, dst_width * dst_height); + align_buffer_page_end(tmp_pixels_v, dst_width * dst_height); + align_buffer_page_end(dst_pixels_uv_opt, dst_width * 2 * dst_height); + align_buffer_page_end(dst_pixels_uv_c, dst_width * 2 * dst_height); + + MemRandomize(src_pixels_u, benchmark_width_ * benchmark_height_); + MemRandomize(src_pixels_v, benchmark_width_ * benchmark_height_); + MemRandomize(tmp_pixels_u, dst_width * dst_height); + MemRandomize(tmp_pixels_v, dst_width * dst_height); + MemRandomize(dst_pixels_uv_opt, dst_width * 2 * dst_height); + MemRandomize(dst_pixels_uv_c, dst_width * 2 * dst_height); + + MaskCpuFlags(disable_cpu_flags_); + HalfMergeUVPlane(src_pixels_u, benchmark_width_, src_pixels_v, + benchmark_width_, dst_pixels_uv_c, dst_width * 2, + benchmark_width_, benchmark_height_); + MaskCpuFlags(benchmark_cpu_info_); + + for (int i = 0; i < benchmark_iterations_; ++i) { + HalfMergeUVPlane(src_pixels_u, benchmark_width_, src_pixels_v, + benchmark_width_, dst_pixels_uv_opt, dst_width * 2, + benchmark_width_, benchmark_height_); + } + + for (int i = 0; i < dst_width * 2 * dst_height; ++i) { + EXPECT_EQ(dst_pixels_uv_c[i], dst_pixels_uv_opt[i]); + } + + free_aligned_buffer_page_end(src_pixels_u); + free_aligned_buffer_page_end(src_pixels_v); + free_aligned_buffer_page_end(tmp_pixels_u); + free_aligned_buffer_page_end(tmp_pixels_v); + free_aligned_buffer_page_end(dst_pixels_uv_opt); + free_aligned_buffer_page_end(dst_pixels_uv_c); +} + +TEST_F(LibYUVPlanarTest, NV12Copy) { + const int halfwidth = (benchmark_width_ + 1) >> 1; + const int halfheight = (benchmark_height_ + 1) >> 1; + align_buffer_page_end(src_y, benchmark_width_ * benchmark_height_); + align_buffer_page_end(src_uv, halfwidth * 2 * halfheight); + align_buffer_page_end(dst_y, benchmark_width_ * benchmark_height_); + align_buffer_page_end(dst_uv, halfwidth * 2 * halfheight); + + MemRandomize(src_y, benchmark_width_ * benchmark_height_); + MemRandomize(src_uv, halfwidth * 2 * halfheight); + MemRandomize(dst_y, benchmark_width_ * benchmark_height_); + MemRandomize(dst_uv, halfwidth * 2 * halfheight); + + for (int i = 0; i < benchmark_iterations_; ++i) { + NV12Copy(src_y, benchmark_width_, src_uv, halfwidth * 2, dst_y, + benchmark_width_, dst_uv, halfwidth * 2, benchmark_width_, + benchmark_height_); + } + + for (int i = 0; i < benchmark_width_ * benchmark_height_; ++i) { + EXPECT_EQ(src_y[i], dst_y[i]); + } + for (int i = 0; i < halfwidth * 2 * halfheight; ++i) { + EXPECT_EQ(src_uv[i], dst_uv[i]); + } + + free_aligned_buffer_page_end(src_y); + free_aligned_buffer_page_end(src_uv); + free_aligned_buffer_page_end(dst_y); + free_aligned_buffer_page_end(dst_uv); +} + +TEST_F(LibYUVPlanarTest, NV21Copy) { + const int halfwidth = (benchmark_width_ + 1) >> 1; + const int halfheight = (benchmark_height_ + 1) >> 1; + align_buffer_page_end(src_y, benchmark_width_ * benchmark_height_); + align_buffer_page_end(src_vu, halfwidth * 2 * halfheight); + align_buffer_page_end(dst_y, benchmark_width_ * benchmark_height_); + align_buffer_page_end(dst_vu, halfwidth * 2 * halfheight); + + MemRandomize(src_y, benchmark_width_ * benchmark_height_); + MemRandomize(src_vu, halfwidth * 2 * halfheight); + MemRandomize(dst_y, benchmark_width_ * benchmark_height_); + MemRandomize(dst_vu, halfwidth * 2 * halfheight); + + for (int i = 0; i < benchmark_iterations_; ++i) { + NV21Copy(src_y, benchmark_width_, src_vu, halfwidth * 2, dst_y, + benchmark_width_, dst_vu, halfwidth * 2, benchmark_width_, + benchmark_height_); + } + + for (int i = 0; i < benchmark_width_ * benchmark_height_; ++i) { + EXPECT_EQ(src_y[i], dst_y[i]); + } + for (int i = 0; i < halfwidth * 2 * halfheight; ++i) { + EXPECT_EQ(src_vu[i], dst_vu[i]); + } + + free_aligned_buffer_page_end(src_y); + free_aligned_buffer_page_end(src_vu); + free_aligned_buffer_page_end(dst_y); + free_aligned_buffer_page_end(dst_vu); +} } // namespace libyuv |