diff options
author | Android Build Coastguard Worker <android-build-coastguard-worker@google.com> | 2023-07-31 15:23:43 +0000 |
---|---|---|
committer | Gerrit Code Review <noreply-gerritcodereview@google.com> | 2023-07-31 15:23:43 +0000 |
commit | de35ce683c1fd73fd043f2d43bca7c35200b7a93 (patch) | |
tree | 303e8eabb435a80ebec36a19c23ca85744d58fb5 /files/unit_test | |
parent | 06d64850caa240513108c6540a89fc6d78505596 (diff) | |
parent | cd56a504b7d42276155d9b59ea0bcdad88b5d7e6 (diff) | |
download | libyuv-de35ce683c1fd73fd043f2d43bca7c35200b7a93.tar.gz |
Merge "Snap for 10586204 from 1f9deebc6ecf78b637dff50d62772b48332ea5ea to androidx-core-release" into androidx-core-release
Diffstat (limited to 'files/unit_test')
-rw-r--r-- | files/unit_test/convert_test.cc | 762 | ||||
-rw-r--r-- | files/unit_test/cpu_test.cc | 146 | ||||
-rw-r--r-- | files/unit_test/planar_test.cc | 97 | ||||
-rw-r--r-- | files/unit_test/rotate_argb_test.cc | 106 | ||||
-rw-r--r-- | files/unit_test/rotate_test.cc | 363 | ||||
-rw-r--r-- | files/unit_test/scale_uv_test.cc | 79 | ||||
-rw-r--r-- | files/unit_test/testdata/riscv64.txt | 4 | ||||
-rw-r--r-- | files/unit_test/testdata/riscv64_rvv.txt | 4 | ||||
-rw-r--r-- | files/unit_test/testdata/riscv64_rvv_zvfh.txt | 4 | ||||
-rw-r--r-- | files/unit_test/unit_test.cc | 5 | ||||
-rw-r--r-- | files/unit_test/unit_test.h | 15 |
11 files changed, 1220 insertions, 365 deletions
diff --git a/files/unit_test/convert_test.cc b/files/unit_test/convert_test.cc index 1f975825..1f1896b0 100644 --- a/files/unit_test/convert_test.cc +++ b/files/unit_test/convert_test.cc @@ -48,6 +48,7 @@ namespace libyuv { #define AR30ToAR30 ARGBCopy #define ABGRToABGR ARGBCopy +// subsample amount uses a divide. #define SUBSAMPLE(v, a) ((((v) + (a)-1)) / (a)) // Planar test @@ -180,9 +181,12 @@ TESTPLANARTOP(I212, uint16_t, 2, 2, 1, I012, uint16_t, 2, 2, 2, 12) TESTPLANARTOP(I010, uint16_t, 2, 2, 2, I420, uint8_t, 1, 2, 2, 10) TESTPLANARTOP(I210, uint16_t, 2, 2, 1, I420, uint8_t, 1, 2, 2, 10) TESTPLANARTOP(I210, uint16_t, 2, 2, 1, I422, uint8_t, 1, 2, 1, 10) +TESTPLANARTOP(I410, uint16_t, 2, 1, 1, I420, uint8_t, 1, 2, 2, 10) TESTPLANARTOP(I410, uint16_t, 2, 1, 1, I444, uint8_t, 1, 1, 1, 10) TESTPLANARTOP(I012, uint16_t, 2, 2, 2, I420, uint8_t, 1, 2, 2, 12) +TESTPLANARTOP(I212, uint16_t, 2, 2, 1, I420, uint8_t, 1, 2, 2, 12) TESTPLANARTOP(I212, uint16_t, 2, 2, 1, I422, uint8_t, 1, 2, 1, 12) +TESTPLANARTOP(I412, uint16_t, 2, 1, 1, I420, uint8_t, 1, 2, 2, 12) TESTPLANARTOP(I412, uint16_t, 2, 1, 1, I444, uint8_t, 1, 1, 1, 12) // Test Android 420 to I420 @@ -417,131 +421,136 @@ TESTPLANARTOBP(I210, uint16_t, 2, 2, 1, P210, uint16_t, 2, 2, 1, 10) TESTPLANARTOBP(I012, uint16_t, 2, 2, 2, P012, uint16_t, 2, 2, 2, 12) TESTPLANARTOBP(I212, uint16_t, 2, 2, 1, P212, uint16_t, 2, 2, 1, 12) -#define TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ - SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, \ - DST_SUBSAMP_X, DST_SUBSAMP_Y, W1280, N, NEG, OFF, \ - DOY, SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT) \ - TEST_F(LibYUVConvertTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) { \ - static_assert(SRC_BPC == 1 || SRC_BPC == 2, "SRC BPC unsupported"); \ - static_assert(DST_BPC == 1 || DST_BPC == 2, "DST BPC unsupported"); \ - static_assert(SRC_SUBSAMP_X == 1 || SRC_SUBSAMP_X == 2, \ - "SRC_SUBSAMP_X unsupported"); \ - static_assert(SRC_SUBSAMP_Y == 1 || SRC_SUBSAMP_Y == 2, \ - "SRC_SUBSAMP_Y unsupported"); \ - static_assert(DST_SUBSAMP_X == 1 || DST_SUBSAMP_X == 2, \ - "DST_SUBSAMP_X unsupported"); \ - static_assert(DST_SUBSAMP_Y == 1 || DST_SUBSAMP_Y == 2, \ - "DST_SUBSAMP_Y unsupported"); \ - const int kWidth = W1280; \ - const int kHeight = benchmark_height_; \ - const int kSrcHalfWidth = SUBSAMPLE(kWidth, SRC_SUBSAMP_X); \ - const int kDstHalfWidth = SUBSAMPLE(kWidth, DST_SUBSAMP_X); \ - const int kDstHalfHeight = SUBSAMPLE(kHeight, DST_SUBSAMP_Y); \ - const int kPaddedWidth = (kWidth + (TILE_WIDTH - 1)) & ~(TILE_WIDTH - 1); \ - const int kPaddedHeight = \ - (kHeight + (TILE_HEIGHT - 1)) & ~(TILE_HEIGHT - 1); \ - const int kSrcHalfPaddedWidth = SUBSAMPLE(kPaddedWidth, SRC_SUBSAMP_X); \ - const int kSrcHalfPaddedHeight = SUBSAMPLE(kPaddedHeight, SRC_SUBSAMP_Y); \ - align_buffer_page_end(src_y, kPaddedWidth* kPaddedHeight* SRC_BPC + OFF); \ - align_buffer_page_end( \ - src_uv, \ - 2 * kSrcHalfPaddedWidth * kSrcHalfPaddedHeight * SRC_BPC + OFF); \ - align_buffer_page_end(dst_y_c, kWidth* kHeight* DST_BPC); \ - align_buffer_page_end(dst_uv_c, \ - 2 * kDstHalfWidth * kDstHalfHeight * DST_BPC); \ - align_buffer_page_end(dst_y_opt, kWidth* kHeight* DST_BPC); \ - align_buffer_page_end(dst_uv_opt, \ - 2 * kDstHalfWidth * kDstHalfHeight * DST_BPC); \ - SRC_T* src_y_p = reinterpret_cast<SRC_T*>(src_y + OFF); \ - SRC_T* src_uv_p = reinterpret_cast<SRC_T*>(src_uv + OFF); \ - for (int i = 0; i < kPaddedWidth * kPaddedHeight; ++i) { \ - src_y_p[i] = \ - (fastrand() & (((SRC_T)(-1)) << ((8 * SRC_BPC) - SRC_DEPTH))); \ - } \ - for (int i = 0; i < kSrcHalfPaddedWidth * kSrcHalfPaddedHeight * 2; ++i) { \ - src_uv_p[i] = \ - (fastrand() & (((SRC_T)(-1)) << ((8 * SRC_BPC) - SRC_DEPTH))); \ - } \ - memset(dst_y_c, 1, kWidth* kHeight* DST_BPC); \ - memset(dst_uv_c, 2, 2 * kDstHalfWidth * kDstHalfHeight * DST_BPC); \ - memset(dst_y_opt, 101, kWidth* kHeight* DST_BPC); \ - memset(dst_uv_opt, 102, 2 * kDstHalfWidth * kDstHalfHeight * DST_BPC); \ - MaskCpuFlags(disable_cpu_flags_); \ - SRC_FMT_PLANAR##To##FMT_PLANAR( \ - src_y_p, kWidth, src_uv_p, 2 * kSrcHalfWidth, \ - DOY ? reinterpret_cast<DST_T*>(dst_y_c) : NULL, kWidth, \ - reinterpret_cast<DST_T*>(dst_uv_c), 2 * kDstHalfWidth, kWidth, \ - NEG kHeight); \ - MaskCpuFlags(benchmark_cpu_info_); \ - for (int i = 0; i < benchmark_iterations_; ++i) { \ - SRC_FMT_PLANAR##To##FMT_PLANAR( \ - src_y_p, kWidth, src_uv_p, 2 * kSrcHalfWidth, \ - DOY ? reinterpret_cast<DST_T*>(dst_y_opt) : NULL, kWidth, \ - reinterpret_cast<DST_T*>(dst_uv_opt), 2 * kDstHalfWidth, kWidth, \ - NEG kHeight); \ - } \ - if (DOY) { \ - for (int i = 0; i < kHeight; ++i) { \ - for (int j = 0; j < kWidth; ++j) { \ - EXPECT_EQ(dst_y_c[i * kWidth + j], dst_y_opt[i * kWidth + j]); \ - } \ - } \ - } \ - for (int i = 0; i < kDstHalfHeight; ++i) { \ - for (int j = 0; j < 2 * kDstHalfWidth; ++j) { \ - EXPECT_EQ(dst_uv_c[i * 2 * kDstHalfWidth + j], \ - dst_uv_opt[i * 2 * kDstHalfWidth + j]); \ - } \ - } \ - free_aligned_buffer_page_end(dst_y_c); \ - free_aligned_buffer_page_end(dst_uv_c); \ - free_aligned_buffer_page_end(dst_y_opt); \ - free_aligned_buffer_page_end(dst_uv_opt); \ - free_aligned_buffer_page_end(src_y); \ - free_aligned_buffer_page_end(src_uv); \ +#define TESTBPTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ + SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \ + DST_SUBSAMP_Y, W1280, N, NEG, OFF, DOY, SRC_DEPTH, \ + TILE_WIDTH, TILE_HEIGHT) \ + TEST_F(LibYUVConvertTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) { \ + static_assert(DST_BPC == 1 || DST_BPC == 2, "DST BPC unsupported"); \ + static_assert(SRC_SUBSAMP_X == 1 || SRC_SUBSAMP_X == 2, \ + "SRC_SUBSAMP_X unsupported"); \ + static_assert(SRC_SUBSAMP_Y == 1 || SRC_SUBSAMP_Y == 2, \ + "SRC_SUBSAMP_Y unsupported"); \ + static_assert(DST_SUBSAMP_X == 1 || DST_SUBSAMP_X == 2, \ + "DST_SUBSAMP_X unsupported"); \ + static_assert(DST_SUBSAMP_Y == 1 || DST_SUBSAMP_Y == 2, \ + "DST_SUBSAMP_Y unsupported"); \ + const int kWidth = W1280; \ + const int kHeight = benchmark_height_; \ + const int kSrcHalfWidth = SUBSAMPLE(kWidth, SRC_SUBSAMP_X); \ + const int kDstHalfWidth = SUBSAMPLE(kWidth, DST_SUBSAMP_X); \ + const int kDstHalfHeight = SUBSAMPLE(kHeight, DST_SUBSAMP_Y); \ + const int kPaddedWidth = (kWidth + (TILE_WIDTH - 1)) & ~(TILE_WIDTH - 1); \ + const int kPaddedHeight = \ + (kHeight + (TILE_HEIGHT - 1)) & ~(TILE_HEIGHT - 1); \ + const int kSrcHalfPaddedWidth = SUBSAMPLE(kPaddedWidth, SRC_SUBSAMP_X); \ + const int kSrcHalfPaddedHeight = SUBSAMPLE(kPaddedHeight, SRC_SUBSAMP_Y); \ + align_buffer_page_end(src_y, kPaddedWidth* kPaddedHeight* SRC_BPC + OFF); \ + align_buffer_page_end( \ + src_uv, \ + 2 * kSrcHalfPaddedWidth * kSrcHalfPaddedHeight * SRC_BPC + OFF); \ + align_buffer_page_end(dst_y_c, kWidth* kHeight* DST_BPC); \ + align_buffer_page_end(dst_uv_c, \ + 2 * kDstHalfWidth * kDstHalfHeight * DST_BPC); \ + align_buffer_page_end(dst_y_opt, kWidth* kHeight* DST_BPC); \ + align_buffer_page_end(dst_uv_opt, \ + 2 * kDstHalfWidth * kDstHalfHeight * DST_BPC); \ + SRC_T* src_y_p = reinterpret_cast<SRC_T*>(src_y + OFF); \ + SRC_T* src_uv_p = reinterpret_cast<SRC_T*>(src_uv + OFF); \ + for (int i = 0; \ + i < kPaddedWidth * kPaddedHeight * SRC_BPC / (int)sizeof(SRC_T); \ + ++i) { \ + src_y_p[i] = \ + (fastrand() & (((SRC_T)(-1)) << ((8 * SRC_BPC) - SRC_DEPTH))); \ + } \ + for (int i = 0; i < kSrcHalfPaddedWidth * kSrcHalfPaddedHeight * 2 * \ + SRC_BPC / (int)sizeof(SRC_T); \ + ++i) { \ + src_uv_p[i] = \ + (fastrand() & (((SRC_T)(-1)) << ((8 * SRC_BPC) - SRC_DEPTH))); \ + } \ + memset(dst_y_c, 1, kWidth* kHeight* DST_BPC); \ + memset(dst_uv_c, 2, 2 * kDstHalfWidth * kDstHalfHeight * DST_BPC); \ + memset(dst_y_opt, 101, kWidth* kHeight* DST_BPC); \ + memset(dst_uv_opt, 102, 2 * kDstHalfWidth * kDstHalfHeight * DST_BPC); \ + MaskCpuFlags(disable_cpu_flags_); \ + SRC_FMT_PLANAR##To##FMT_PLANAR( \ + src_y_p, kWidth* SRC_BPC / (int)sizeof(SRC_T), src_uv_p, \ + 2 * kSrcHalfWidth * SRC_BPC / (int)sizeof(SRC_T), \ + DOY ? reinterpret_cast<DST_T*>(dst_y_c) : NULL, kWidth, \ + reinterpret_cast<DST_T*>(dst_uv_c), 2 * kDstHalfWidth, kWidth, \ + NEG kHeight); \ + MaskCpuFlags(benchmark_cpu_info_); \ + for (int i = 0; i < benchmark_iterations_; ++i) { \ + SRC_FMT_PLANAR##To##FMT_PLANAR( \ + src_y_p, kWidth* SRC_BPC / (int)sizeof(SRC_T), src_uv_p, \ + 2 * kSrcHalfWidth * SRC_BPC / (int)sizeof(SRC_T), \ + DOY ? reinterpret_cast<DST_T*>(dst_y_opt) : NULL, kWidth, \ + reinterpret_cast<DST_T*>(dst_uv_opt), 2 * kDstHalfWidth, kWidth, \ + NEG kHeight); \ + } \ + if (DOY) { \ + for (int i = 0; i < kHeight; ++i) { \ + for (int j = 0; j < kWidth; ++j) { \ + EXPECT_EQ(dst_y_c[i * kWidth + j], dst_y_opt[i * kWidth + j]); \ + } \ + } \ + } \ + for (int i = 0; i < kDstHalfHeight; ++i) { \ + for (int j = 0; j < 2 * kDstHalfWidth; ++j) { \ + EXPECT_EQ(dst_uv_c[i * 2 * kDstHalfWidth + j], \ + dst_uv_opt[i * 2 * kDstHalfWidth + j]); \ + } \ + } \ + free_aligned_buffer_page_end(dst_y_c); \ + free_aligned_buffer_page_end(dst_uv_c); \ + free_aligned_buffer_page_end(dst_y_opt); \ + free_aligned_buffer_page_end(dst_uv_opt); \ + free_aligned_buffer_page_end(src_y); \ + free_aligned_buffer_page_end(src_uv); \ } -#define TESTBIPLANARTOBP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ - SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, \ - DST_SUBSAMP_X, DST_SUBSAMP_Y, SRC_DEPTH, TILE_WIDTH, \ - TILE_HEIGHT) \ - TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ - SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \ - DST_SUBSAMP_Y, benchmark_width_ + 1, _Any, +, 0, 1, \ - SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT) \ - TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ - SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \ - DST_SUBSAMP_Y, benchmark_width_, _Unaligned, +, 2, 1, \ - SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT) \ - TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ - SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \ - DST_SUBSAMP_Y, benchmark_width_, _Invert, -, 0, 1, \ - SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT) \ - TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ - SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \ - DST_SUBSAMP_Y, benchmark_width_, _Opt, +, 0, 1, SRC_DEPTH, \ - TILE_WIDTH, TILE_HEIGHT) \ - TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ - SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \ - DST_SUBSAMP_Y, benchmark_width_, _NullY, +, 0, 0, \ - SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT) - -TESTBIPLANARTOBP(NV21, uint8_t, 1, 2, 2, NV12, uint8_t, 1, 2, 2, 8, 1, 1) -TESTBIPLANARTOBP(NV12, uint8_t, 1, 2, 2, NV12Mirror, uint8_t, 1, 2, 2, 8, 1, 1) -TESTBIPLANARTOBP(NV12, uint8_t, 1, 2, 2, NV24, uint8_t, 1, 1, 1, 8, 1, 1) -TESTBIPLANARTOBP(NV16, uint8_t, 1, 2, 1, NV24, uint8_t, 1, 1, 1, 8, 1, 1) -TESTBIPLANARTOBP(P010, uint16_t, 2, 2, 2, P410, uint16_t, 2, 1, 1, 10, 1, 1) -TESTBIPLANARTOBP(P210, uint16_t, 2, 2, 1, P410, uint16_t, 2, 1, 1, 10, 1, 1) -TESTBIPLANARTOBP(P012, uint16_t, 2, 2, 2, P412, uint16_t, 2, 1, 1, 10, 1, 1) -TESTBIPLANARTOBP(P212, uint16_t, 2, 2, 1, P412, uint16_t, 2, 1, 1, 12, 1, 1) -TESTBIPLANARTOBP(P016, uint16_t, 2, 2, 2, P416, uint16_t, 2, 1, 1, 12, 1, 1) -TESTBIPLANARTOBP(P216, uint16_t, 2, 2, 1, P416, uint16_t, 2, 1, 1, 12, 1, 1) -TESTBIPLANARTOBP(MM21, uint8_t, 1, 2, 2, NV12, uint8_t, 1, 2, 2, 8, 16, 32) - -#define TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ - SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, \ - DST_SUBSAMP_X, DST_SUBSAMP_Y, W1280, N, NEG, OFF, \ - SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT) \ +#define TESTBPTOBP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ + SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \ + DST_SUBSAMP_Y, SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT) \ + TESTBPTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \ + benchmark_width_ + 1, _Any, +, 0, 1, SRC_DEPTH, TILE_WIDTH, \ + TILE_HEIGHT) \ + TESTBPTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \ + benchmark_width_, _Unaligned, +, 2, 1, SRC_DEPTH, TILE_WIDTH, \ + TILE_HEIGHT) \ + TESTBPTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \ + benchmark_width_, _Invert, -, 0, 1, SRC_DEPTH, TILE_WIDTH, \ + TILE_HEIGHT) \ + TESTBPTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \ + benchmark_width_, _Opt, +, 0, 1, SRC_DEPTH, TILE_WIDTH, \ + TILE_HEIGHT) \ + TESTBPTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \ + benchmark_width_, _NullY, +, 0, 0, SRC_DEPTH, TILE_WIDTH, \ + TILE_HEIGHT) + +TESTBPTOBP(NV21, uint8_t, 1, 2, 2, NV12, uint8_t, 1, 2, 2, 8, 1, 1) +TESTBPTOBP(NV12, uint8_t, 1, 2, 2, NV12Mirror, uint8_t, 1, 2, 2, 8, 1, 1) +TESTBPTOBP(NV12, uint8_t, 1, 2, 2, NV24, uint8_t, 1, 1, 1, 8, 1, 1) +TESTBPTOBP(NV16, uint8_t, 1, 2, 1, NV24, uint8_t, 1, 1, 1, 8, 1, 1) +TESTBPTOBP(P010, uint16_t, 2, 2, 2, P410, uint16_t, 2, 1, 1, 10, 1, 1) +TESTBPTOBP(P210, uint16_t, 2, 2, 1, P410, uint16_t, 2, 1, 1, 10, 1, 1) +TESTBPTOBP(P012, uint16_t, 2, 2, 2, P412, uint16_t, 2, 1, 1, 10, 1, 1) +TESTBPTOBP(P212, uint16_t, 2, 2, 1, P412, uint16_t, 2, 1, 1, 12, 1, 1) +TESTBPTOBP(P016, uint16_t, 2, 2, 2, P416, uint16_t, 2, 1, 1, 12, 1, 1) +TESTBPTOBP(P216, uint16_t, 2, 2, 1, P416, uint16_t, 2, 1, 1, 12, 1, 1) +TESTBPTOBP(MM21, uint8_t, 1, 2, 2, NV12, uint8_t, 1, 2, 2, 8, 16, 32) +TESTBPTOBP(MT2T, uint8_t, 10 / 8, 2, 2, P010, uint16_t, 2, 2, 2, 10, 16, 32) + +#define TESTBPTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ + SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \ + DST_SUBSAMP_Y, W1280, N, NEG, OFF, SRC_DEPTH, TILE_WIDTH, \ + TILE_HEIGHT) \ TEST_F(LibYUVConvertTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) { \ static_assert(SRC_BPC == 1 || SRC_BPC == 2, "SRC BPC unsupported"); \ static_assert(DST_BPC == 1 || DST_BPC == 2, "DST BPC unsupported"); \ @@ -621,30 +630,30 @@ TESTBIPLANARTOBP(MM21, uint8_t, 1, 2, 2, NV12, uint8_t, 1, 2, 2, 8, 16, 32) free_aligned_buffer_page_end(src_uv); \ } -#define TESTBIPLANARTOP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ - SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, \ - DST_SUBSAMP_X, DST_SUBSAMP_Y, SRC_DEPTH, TILE_WIDTH, \ - TILE_HEIGHT) \ - TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ - SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \ - DST_SUBSAMP_Y, benchmark_width_ + 1, _Any, +, 0, SRC_DEPTH, \ - TILE_WIDTH, TILE_HEIGHT) \ - TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ - SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \ - DST_SUBSAMP_Y, benchmark_width_, _Unaligned, +, 2, \ - SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT) \ - TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ - SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \ - DST_SUBSAMP_Y, benchmark_width_, _Invert, -, 0, SRC_DEPTH, \ - TILE_WIDTH, TILE_HEIGHT) \ - TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ - SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \ - DST_SUBSAMP_Y, benchmark_width_, _Opt, +, 0, SRC_DEPTH, \ - TILE_WIDTH, TILE_HEIGHT) - -TESTBIPLANARTOP(NV12, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2, 8, 1, 1) -TESTBIPLANARTOP(NV21, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2, 8, 1, 1) -TESTBIPLANARTOP(MM21, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2, 8, 16, 32) +#define TESTBPTOP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ + SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \ + DST_SUBSAMP_Y, SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT) \ + TESTBPTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \ + benchmark_width_ + 1, _Any, +, 0, SRC_DEPTH, TILE_WIDTH, \ + TILE_HEIGHT) \ + TESTBPTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \ + benchmark_width_, _Unaligned, +, 2, SRC_DEPTH, TILE_WIDTH, \ + TILE_HEIGHT) \ + TESTBPTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \ + benchmark_width_, _Invert, -, 0, SRC_DEPTH, TILE_WIDTH, \ + TILE_HEIGHT) \ + TESTBPTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \ + benchmark_width_, _Opt, +, 0, SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT) + +TESTBPTOP(NV12, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2, 8, 1, 1) +TESTBPTOP(NV21, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2, 8, 1, 1) +TESTBPTOP(MM21, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2, 8, 16, 32) +TESTBPTOP(P010, uint16_t, 2, 2, 2, I010, uint16_t, 2, 2, 2, 10, 1, 1) +TESTBPTOP(P012, uint16_t, 2, 2, 2, I012, uint16_t, 2, 2, 2, 12, 1, 1) // Provide matrix wrappers for full range bt.709 #define F420ToABGR(a, b, c, d, e, f, g, h, i, j) \ @@ -680,6 +689,12 @@ TESTBIPLANARTOP(MM21, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2, 8, 16, 32) #define I422ToARGBFilter(a, b, c, d, e, f, g, h, i, j) \ I422ToARGBMatrixFilter(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j, \ kFilterBilinear) +#define I420ToRGB24Filter(a, b, c, d, e, f, g, h, i, j) \ + I420ToRGB24MatrixFilter(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j, \ + kFilterBilinear) +#define I422ToRGB24Filter(a, b, c, d, e, f, g, h, i, j) \ + I420ToRGB24MatrixFilter(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j, \ + kFilterBilinear) #define ALIGNINT(V, ALIGN) (((V) + (ALIGN)-1) / (ALIGN) * (ALIGN)) @@ -792,8 +807,12 @@ TESTPLANARTOB(V422, 2, 1, ARGB, 4, 4, 1) TESTPLANARTOB(V422, 2, 1, ABGR, 4, 4, 1) TESTPLANARTOB(I422, 2, 1, BGRA, 4, 4, 1) TESTPLANARTOB(I422, 2, 1, RGBA, 4, 4, 1) +TESTPLANARTOB(I422, 1, 1, RGB24, 3, 3, 1) +TESTPLANARTOB(I422, 1, 1, RAW, 3, 3, 1) TESTPLANARTOB(I444, 1, 1, ARGB, 4, 4, 1) TESTPLANARTOB(I444, 1, 1, ABGR, 4, 4, 1) +TESTPLANARTOB(I444, 1, 1, RGB24, 3, 3, 1) +TESTPLANARTOB(I444, 1, 1, RAW, 3, 3, 1) TESTPLANARTOB(J444, 1, 1, ARGB, 4, 4, 1) TESTPLANARTOB(J444, 1, 1, ABGR, 4, 4, 1) TESTPLANARTOB(H444, 1, 1, ARGB, 4, 4, 1) @@ -816,6 +835,8 @@ TESTPLANARTOB(H420, 2, 2, AB30, 4, 4, 1) #endif TESTPLANARTOB(I420, 2, 2, ARGBFilter, 4, 4, 1) TESTPLANARTOB(I422, 2, 1, ARGBFilter, 4, 4, 1) +TESTPLANARTOB(I420, 2, 2, RGB24Filter, 3, 3, 1) +TESTPLANARTOB(I422, 2, 2, RGB24Filter, 3, 3, 1) #else TESTPLANARTOB(I420, 2, 2, ABGR, 4, 4, 1) TESTPLANARTOB(I420, 2, 2, ARGB, 4, 4, 1) @@ -832,14 +853,15 @@ TESTPLANARTOB(I422, 2, 1, RGB565, 2, 2, 1) TESTPLANARTOB(I420, 2, 2, I400, 1, 1, 1) TESTPLANARTOB(I420, 2, 2, UYVY, 2, 4, 1) TESTPLANARTOB(I420, 2, 2, YUY2, 2, 4, 1) -TESTPLANARTOB(I420, 2, 2, ARGBFilter, 4, 4, 1) TESTPLANARTOB(I422, 2, 1, ABGR, 4, 4, 1) TESTPLANARTOB(I422, 2, 1, ARGB, 4, 4, 1) TESTPLANARTOB(I422, 2, 1, BGRA, 4, 4, 1) TESTPLANARTOB(I422, 2, 1, RGBA, 4, 4, 1) TESTPLANARTOB(I422, 2, 1, UYVY, 2, 4, 1) TESTPLANARTOB(I422, 2, 1, YUY2, 2, 4, 1) +TESTPLANARTOB(I420, 2, 2, ARGBFilter, 4, 4, 1) TESTPLANARTOB(I422, 2, 1, ARGBFilter, 4, 4, 1) +TESTPLANARTOB(I420, 2, 2, RGB24Filter, 3, 3, 1) TESTPLANARTOB(I444, 1, 1, ABGR, 4, 4, 1) TESTPLANARTOB(I444, 1, 1, ARGB, 4, 4, 1) #endif @@ -1056,8 +1078,8 @@ TESTQPLANARTOB(I420Alpha, 2, 2, ARGBFilter, 4, 4, 1) TESTQPLANARTOB(I422Alpha, 2, 1, ARGBFilter, 4, 4, 1) #endif -#define TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, \ - BPP_B, W1280, N, NEG, OFF) \ +#define TESTBPTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \ + W1280, N, NEG, OFF) \ TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) { \ const int kWidth = W1280; \ const int kHeight = benchmark_height_; \ @@ -1110,15 +1132,15 @@ TESTQPLANARTOB(I422Alpha, 2, 1, ARGBFilter, 4, 4, 1) free_aligned_buffer_page_end(dst_argb32_opt); \ } -#define TESTBIPLANARTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B) \ - TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \ - benchmark_width_ + 1, _Any, +, 0) \ - TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \ - benchmark_width_, _Unaligned, +, 2) \ - TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \ - benchmark_width_, _Invert, -, 0) \ - TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \ - benchmark_width_, _Opt, +, 0) +#define TESTBPTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B) \ + TESTBPTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \ + benchmark_width_ + 1, _Any, +, 0) \ + TESTBPTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \ + benchmark_width_, _Unaligned, +, 2) \ + TESTBPTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \ + benchmark_width_, _Invert, -, 0) \ + TESTBPTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \ + benchmark_width_, _Opt, +, 0) #define JNV12ToARGB(a, b, c, d, e, f, g, h) \ NV12ToARGBMatrix(a, b, c, d, e, f, &kYuvJPEGConstants, g, h) @@ -1139,29 +1161,29 @@ TESTQPLANARTOB(I422Alpha, 2, 1, ARGBFilter, 4, 4, 1) #define JNV12ToRGB565(a, b, c, d, e, f, g, h) \ NV12ToRGB565Matrix(a, b, c, d, e, f, &kYuvJPEGConstants, g, h) -TESTBIPLANARTOB(JNV12, 2, 2, ARGB, ARGB, 4) -TESTBIPLANARTOB(JNV21, 2, 2, ARGB, ARGB, 4) -TESTBIPLANARTOB(JNV12, 2, 2, ABGR, ABGR, 4) -TESTBIPLANARTOB(JNV21, 2, 2, ABGR, ABGR, 4) -TESTBIPLANARTOB(JNV12, 2, 2, RGB24, RGB24, 3) -TESTBIPLANARTOB(JNV21, 2, 2, RGB24, RGB24, 3) -TESTBIPLANARTOB(JNV12, 2, 2, RAW, RAW, 3) -TESTBIPLANARTOB(JNV21, 2, 2, RAW, RAW, 3) +TESTBPTOB(JNV12, 2, 2, ARGB, ARGB, 4) +TESTBPTOB(JNV21, 2, 2, ARGB, ARGB, 4) +TESTBPTOB(JNV12, 2, 2, ABGR, ABGR, 4) +TESTBPTOB(JNV21, 2, 2, ABGR, ABGR, 4) +TESTBPTOB(JNV12, 2, 2, RGB24, RGB24, 3) +TESTBPTOB(JNV21, 2, 2, RGB24, RGB24, 3) +TESTBPTOB(JNV12, 2, 2, RAW, RAW, 3) +TESTBPTOB(JNV21, 2, 2, RAW, RAW, 3) #ifdef LITTLE_ENDIAN_ONLY_TEST -TESTBIPLANARTOB(JNV12, 2, 2, RGB565, RGB565, 2) +TESTBPTOB(JNV12, 2, 2, RGB565, RGB565, 2) #endif -TESTBIPLANARTOB(NV12, 2, 2, ARGB, ARGB, 4) -TESTBIPLANARTOB(NV21, 2, 2, ARGB, ARGB, 4) -TESTBIPLANARTOB(NV12, 2, 2, ABGR, ABGR, 4) -TESTBIPLANARTOB(NV21, 2, 2, ABGR, ABGR, 4) -TESTBIPLANARTOB(NV12, 2, 2, RGB24, RGB24, 3) -TESTBIPLANARTOB(NV21, 2, 2, RGB24, RGB24, 3) -TESTBIPLANARTOB(NV12, 2, 2, RAW, RAW, 3) -TESTBIPLANARTOB(NV21, 2, 2, RAW, RAW, 3) -TESTBIPLANARTOB(NV21, 2, 2, YUV24, RAW, 3) +TESTBPTOB(NV12, 2, 2, ARGB, ARGB, 4) +TESTBPTOB(NV21, 2, 2, ARGB, ARGB, 4) +TESTBPTOB(NV12, 2, 2, ABGR, ABGR, 4) +TESTBPTOB(NV21, 2, 2, ABGR, ABGR, 4) +TESTBPTOB(NV12, 2, 2, RGB24, RGB24, 3) +TESTBPTOB(NV21, 2, 2, RGB24, RGB24, 3) +TESTBPTOB(NV12, 2, 2, RAW, RAW, 3) +TESTBPTOB(NV21, 2, 2, RAW, RAW, 3) +TESTBPTOB(NV21, 2, 2, YUV24, RAW, 3) #ifdef LITTLE_ENDIAN_ONLY_TEST -TESTBIPLANARTOB(NV12, 2, 2, RGB565, RGB565, 2) +TESTBPTOB(NV12, 2, 2, RGB565, RGB565, 2) #endif #define TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ @@ -1236,6 +1258,8 @@ TESTATOPLANAR(ARGB, 4, 1, I422, 2, 1) TESTATOPLANAR(ARGB, 4, 1, I444, 1, 1) TESTATOPLANAR(ARGB, 4, 1, J420, 2, 2) TESTATOPLANAR(ARGB, 4, 1, J422, 2, 1) +TESTATOPLANAR(ABGR, 4, 1, J420, 2, 2) +TESTATOPLANAR(ABGR, 4, 1, J422, 2, 1) #ifdef LITTLE_ENDIAN_ONLY_TEST TESTATOPLANAR(ARGB4444, 2, 1, I420, 2, 2) TESTATOPLANAR(RGB565, 2, 1, I420, 2, 2) @@ -1254,8 +1278,84 @@ TESTATOPLANAR(UYVY, 2, 1, I422, 2, 1) TESTATOPLANAR(YUY2, 2, 1, I420, 2, 2) TESTATOPLANAR(YUY2, 2, 1, I422, 2, 1) -#define TESTATOBIPLANARI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, \ - SUBSAMP_Y, W1280, N, NEG, OFF) \ +#define TESTATOPLANARAI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, \ + SUBSAMP_Y, W1280, N, NEG, OFF) \ + TEST_F(LibYUVConvertTest, FMT_A##To##FMT_PLANAR##N) { \ + const int kWidth = W1280; \ + const int kHeight = ALIGNINT(benchmark_height_, YALIGN); \ + const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \ + const int kStride = (kStrideUV * SUBSAMP_X * 8 * BPP_A + 7) / 8; \ + align_buffer_page_end(src_argb, kStride* kHeight + OFF); \ + align_buffer_page_end(dst_a_c, kWidth* kHeight); \ + align_buffer_page_end(dst_y_c, kWidth* kHeight); \ + align_buffer_page_end(dst_uv_c, \ + kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + align_buffer_page_end(dst_a_opt, kWidth* kHeight); \ + align_buffer_page_end(dst_y_opt, kWidth* kHeight); \ + align_buffer_page_end(dst_uv_opt, \ + kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + memset(dst_a_c, 1, kWidth* kHeight); \ + memset(dst_y_c, 2, kWidth* kHeight); \ + memset(dst_uv_c, 3, kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + memset(dst_a_opt, 101, kWidth* kHeight); \ + memset(dst_y_opt, 102, kWidth* kHeight); \ + memset(dst_uv_opt, 103, kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + for (int i = 0; i < kHeight; ++i) \ + for (int j = 0; j < kStride; ++j) \ + src_argb[(i * kStride) + j + OFF] = (fastrand() & 0xff); \ + MaskCpuFlags(disable_cpu_flags_); \ + FMT_A##To##FMT_PLANAR(src_argb + OFF, kStride, dst_y_c, kWidth, dst_uv_c, \ + kStrideUV * 2, dst_uv_c + kStrideUV, kStrideUV * 2, \ + dst_a_c, kWidth, kWidth, NEG kHeight); \ + MaskCpuFlags(benchmark_cpu_info_); \ + for (int i = 0; i < benchmark_iterations_; ++i) { \ + FMT_A##To##FMT_PLANAR(src_argb + OFF, kStride, dst_y_opt, kWidth, \ + dst_uv_opt, kStrideUV * 2, dst_uv_opt + kStrideUV, \ + kStrideUV * 2, dst_a_opt, kWidth, kWidth, \ + NEG kHeight); \ + } \ + for (int i = 0; i < kHeight; ++i) { \ + for (int j = 0; j < kWidth; ++j) { \ + EXPECT_EQ(dst_y_c[i * kWidth + j], dst_y_opt[i * kWidth + j]); \ + EXPECT_EQ(dst_a_c[i * kWidth + j], dst_a_opt[i * kWidth + j]); \ + } \ + } \ + for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y) * 2; ++i) { \ + for (int j = 0; j < kStrideUV; ++j) { \ + EXPECT_EQ(dst_uv_c[i * kStrideUV + j], dst_uv_opt[i * kStrideUV + j]); \ + } \ + } \ + free_aligned_buffer_page_end(dst_a_c); \ + free_aligned_buffer_page_end(dst_y_c); \ + free_aligned_buffer_page_end(dst_uv_c); \ + free_aligned_buffer_page_end(dst_a_opt); \ + free_aligned_buffer_page_end(dst_y_opt); \ + free_aligned_buffer_page_end(dst_uv_opt); \ + free_aligned_buffer_page_end(src_argb); \ + } + +#if defined(ENABLE_FULL_TESTS) +#define TESTATOPLANARA(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \ + TESTATOPLANARAI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + benchmark_width_ + 1, _Any, +, 0) \ + TESTATOPLANARAI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + benchmark_width_, _Unaligned, +, 2) \ + TESTATOPLANARAI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + benchmark_width_, _Invert, -, 0) \ + TESTATOPLANARAI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + benchmark_width_, _Opt, +, 0) +#else +#define TESTATOPLANARA(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \ + TESTATOPLANARAI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + benchmark_width_ + 1, _Any, +, 0) \ + TESTATOPLANARAI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + benchmark_width_, _Opt, +, 0) +#endif + +TESTATOPLANARA(ARGB, 4, 1, I420Alpha, 2, 2) + +#define TESTATOBPI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + W1280, N, NEG, OFF) \ TEST_F(LibYUVConvertTest, FMT_A##To##FMT_PLANAR##N) { \ const int kWidth = W1280; \ const int kHeight = benchmark_height_; \ @@ -1301,25 +1401,25 @@ TESTATOPLANAR(YUY2, 2, 1, I422, 2, 1) free_aligned_buffer_page_end(src_argb); \ } -#define TESTATOBIPLANAR(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \ - TESTATOBIPLANARI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ - benchmark_width_ + 1, _Any, +, 0) \ - TESTATOBIPLANARI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ - benchmark_width_, _Unaligned, +, 2) \ - TESTATOBIPLANARI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ - benchmark_width_, _Invert, -, 0) \ - TESTATOBIPLANARI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ - benchmark_width_, _Opt, +, 0) - -TESTATOBIPLANAR(ARGB, 1, 4, NV12, 2, 2) -TESTATOBIPLANAR(ARGB, 1, 4, NV21, 2, 2) -TESTATOBIPLANAR(ABGR, 1, 4, NV12, 2, 2) -TESTATOBIPLANAR(ABGR, 1, 4, NV21, 2, 2) -TESTATOBIPLANAR(RAW, 1, 3, JNV21, 2, 2) -TESTATOBIPLANAR(YUY2, 2, 4, NV12, 2, 2) -TESTATOBIPLANAR(UYVY, 2, 4, NV12, 2, 2) -TESTATOBIPLANAR(AYUV, 1, 4, NV12, 2, 2) -TESTATOBIPLANAR(AYUV, 1, 4, NV21, 2, 2) +#define TESTATOBP(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \ + TESTATOBPI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + benchmark_width_ + 1, _Any, +, 0) \ + TESTATOBPI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + benchmark_width_, _Unaligned, +, 2) \ + TESTATOBPI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + benchmark_width_, _Invert, -, 0) \ + TESTATOBPI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + benchmark_width_, _Opt, +, 0) + +TESTATOBP(ARGB, 1, 4, NV12, 2, 2) +TESTATOBP(ARGB, 1, 4, NV21, 2, 2) +TESTATOBP(ABGR, 1, 4, NV12, 2, 2) +TESTATOBP(ABGR, 1, 4, NV21, 2, 2) +TESTATOBP(RAW, 1, 3, JNV21, 2, 2) +TESTATOBP(YUY2, 2, 4, NV12, 2, 2) +TESTATOBP(UYVY, 2, 4, NV12, 2, 2) +TESTATOBP(AYUV, 1, 4, NV12, 2, 2) +TESTATOBP(AYUV, 1, 4, NV21, 2, 2) #define TESTATOBI(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B, \ EPP_B, STRIDE_B, HEIGHT_B, W1280, N, NEG, OFF) \ @@ -1440,6 +1540,7 @@ TESTATOB(ARGB, uint8_t, 4, 4, 1, ARGBMirror, uint8_t, 4, 4, 1) TESTATOB(ARGB, uint8_t, 4, 4, 1, BGRA, uint8_t, 4, 4, 1) TESTATOB(ARGB, uint8_t, 4, 4, 1, I400, uint8_t, 1, 1, 1) TESTATOB(ARGB, uint8_t, 4, 4, 1, J400, uint8_t, 1, 1, 1) +TESTATOB(ABGR, uint8_t, 4, 4, 1, J400, uint8_t, 1, 1, 1) TESTATOB(RGBA, uint8_t, 4, 4, 1, J400, uint8_t, 1, 1, 1) TESTATOB(ARGB, uint8_t, 4, 4, 1, RAW, uint8_t, 3, 3, 1) TESTATOB(ARGB, uint8_t, 4, 4, 1, RGB24, uint8_t, 3, 3, 1) @@ -1450,7 +1551,7 @@ TESTATOB(ARGB, uint8_t, 4, 4, 1, RGB565, uint8_t, 2, 2, 1) #endif TESTATOB(ARGB, uint8_t, 4, 4, 1, RGBA, uint8_t, 4, 4, 1) TESTATOB(ARGB, uint8_t, 4, 4, 1, UYVY, uint8_t, 2, 4, 1) -TESTATOB(ARGB, uint8_t, 4, 4, 1, YUY2, uint8_t, 2, 4, 1) // 4 +TESTATOB(ARGB, uint8_t, 4, 4, 1, YUY2, uint8_t, 2, 4, 1) TESTATOB(ARGB1555, uint8_t, 2, 2, 1, ARGB, uint8_t, 4, 4, 1) TESTATOB(ARGB4444, uint8_t, 2, 2, 1, ARGB, uint8_t, 4, 4, 1) TESTATOB(BGRA, uint8_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1) @@ -1484,6 +1585,127 @@ TESTATOB(AB64, uint16_t, 4, 4, 1, ABGR, uint8_t, 4, 4, 1) TESTATOB(AR64, uint16_t, 4, 4, 1, AB64, uint16_t, 4, 4, 1) TESTATOB(AB64, uint16_t, 4, 4, 1, AR64, uint16_t, 4, 4, 1) +// in place test +#define TESTATOAI(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B, \ + EPP_B, STRIDE_B, HEIGHT_B, W1280, N, NEG, OFF) \ + TEST_F(LibYUVConvertTest, FMT_A##To##FMT_B##N) { \ + const int kWidth = W1280; \ + const int kHeight = benchmark_height_; \ + const int kHeightA = (kHeight + HEIGHT_A - 1) / HEIGHT_A * HEIGHT_A; \ + const int kHeightB = (kHeight + HEIGHT_B - 1) / HEIGHT_B * HEIGHT_B; \ + const int kStrideA = \ + (kWidth * EPP_A + STRIDE_A - 1) / STRIDE_A * STRIDE_A; \ + const int kStrideB = \ + (kWidth * EPP_B + STRIDE_B - 1) / STRIDE_B * STRIDE_B; \ + align_buffer_page_end(src_argb, \ + kStrideA* kHeightA*(int)sizeof(TYPE_A) + OFF); \ + align_buffer_page_end(dst_argb_c, \ + kStrideA* kHeightA*(int)sizeof(TYPE_A) + OFF); \ + align_buffer_page_end(dst_argb_opt, \ + kStrideA* kHeightA*(int)sizeof(TYPE_A) + OFF); \ + for (int i = 0; i < kStrideA * kHeightA * (int)sizeof(TYPE_A); ++i) { \ + src_argb[i + OFF] = (fastrand() & 0xff); \ + } \ + memcpy(dst_argb_c + OFF, src_argb, \ + kStrideA * kHeightA * (int)sizeof(TYPE_A)); \ + memcpy(dst_argb_opt + OFF, src_argb, \ + kStrideA * kHeightA * (int)sizeof(TYPE_A)); \ + MaskCpuFlags(disable_cpu_flags_); \ + FMT_A##To##FMT_B((TYPE_A*)(dst_argb_c /* src */ + OFF), kStrideA, \ + (TYPE_B*)dst_argb_c, kStrideB, kWidth, NEG kHeight); \ + MaskCpuFlags(benchmark_cpu_info_); \ + for (int i = 0; i < benchmark_iterations_; ++i) { \ + FMT_A##To##FMT_B((TYPE_A*)(dst_argb_opt /* src */ + OFF), kStrideA, \ + (TYPE_B*)dst_argb_opt, kStrideB, kWidth, NEG kHeight); \ + } \ + memcpy(dst_argb_opt + OFF, src_argb, \ + kStrideA * kHeightA * (int)sizeof(TYPE_A)); \ + FMT_A##To##FMT_B((TYPE_A*)(dst_argb_opt /* src */ + OFF), kStrideA, \ + (TYPE_B*)dst_argb_opt, kStrideB, kWidth, NEG kHeight); \ + for (int i = 0; i < kStrideB * kHeightB * (int)sizeof(TYPE_B); ++i) { \ + EXPECT_EQ(dst_argb_c[i], dst_argb_opt[i]); \ + } \ + free_aligned_buffer_page_end(src_argb); \ + free_aligned_buffer_page_end(dst_argb_c); \ + free_aligned_buffer_page_end(dst_argb_opt); \ + } + +#define TESTATOA(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B, \ + EPP_B, STRIDE_B, HEIGHT_B) \ + TESTATOAI(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B, EPP_B, \ + STRIDE_B, HEIGHT_B, benchmark_width_, _Inplace, +, 0) + +TESTATOA(AB30, uint8_t, 4, 4, 1, ABGR, uint8_t, 4, 4, 1) +TESTATOA(AB30, uint8_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1) +#ifdef LITTLE_ENDIAN_ONLY_TEST +TESTATOA(ABGR, uint8_t, 4, 4, 1, AR30, uint8_t, 4, 4, 1) +#endif +TESTATOA(ABGR, uint8_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1) +#ifdef LITTLE_ENDIAN_ONLY_TEST +TESTATOA(AR30, uint8_t, 4, 4, 1, AB30, uint8_t, 4, 4, 1) +#endif +TESTATOA(AR30, uint8_t, 4, 4, 1, ABGR, uint8_t, 4, 4, 1) +#ifdef LITTLE_ENDIAN_ONLY_TEST +TESTATOA(AR30, uint8_t, 4, 4, 1, AR30, uint8_t, 4, 4, 1) +TESTATOA(AR30, uint8_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1) +#endif +TESTATOA(ARGB, uint8_t, 4, 4, 1, ABGR, uint8_t, 4, 4, 1) +#ifdef LITTLE_ENDIAN_ONLY_TEST +TESTATOA(ARGB, uint8_t, 4, 4, 1, AR30, uint8_t, 4, 4, 1) +#endif +TESTATOA(ARGB, uint8_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1) +TESTATOA(ARGB, uint8_t, 4, 4, 1, ARGB1555, uint8_t, 2, 2, 1) +TESTATOA(ARGB, uint8_t, 4, 4, 1, ARGB4444, uint8_t, 2, 2, 1) +// TODO(fbarchard): Support in place for mirror. +// TESTATOA(ARGB, uint8_t, 4, 4, 1, ARGBMirror, uint8_t, 4, 4, 1) +TESTATOA(ARGB, uint8_t, 4, 4, 1, BGRA, uint8_t, 4, 4, 1) +TESTATOA(ARGB, uint8_t, 4, 4, 1, I400, uint8_t, 1, 1, 1) +TESTATOA(ARGB, uint8_t, 4, 4, 1, J400, uint8_t, 1, 1, 1) +TESTATOA(RGBA, uint8_t, 4, 4, 1, J400, uint8_t, 1, 1, 1) +TESTATOA(ARGB, uint8_t, 4, 4, 1, RAW, uint8_t, 3, 3, 1) +TESTATOA(ARGB, uint8_t, 4, 4, 1, RGB24, uint8_t, 3, 3, 1) +TESTATOA(ABGR, uint8_t, 4, 4, 1, RAW, uint8_t, 3, 3, 1) +TESTATOA(ABGR, uint8_t, 4, 4, 1, RGB24, uint8_t, 3, 3, 1) +#ifdef LITTLE_ENDIAN_ONLY_TEST +TESTATOA(ARGB, uint8_t, 4, 4, 1, RGB565, uint8_t, 2, 2, 1) +#endif +TESTATOA(ARGB, uint8_t, 4, 4, 1, RGBA, uint8_t, 4, 4, 1) +TESTATOA(ARGB, uint8_t, 4, 4, 1, UYVY, uint8_t, 2, 4, 1) +TESTATOA(ARGB, uint8_t, 4, 4, 1, YUY2, uint8_t, 2, 4, 1) +// TODO(fbarchard): Support in place for conversions that increase bpp. +// TESTATOA(ARGB1555, uint8_t, 2, 2, 1, ARGB, uint8_t, 4, 4, 1) +// TESTATOA(ARGB4444, uint8_t, 2, 2, 1, ARGB, uint8_t, 4, 4, 1) +TESTATOA(BGRA, uint8_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1) +// TESTATOA(I400, uint8_t, 1, 1, 1, ARGB, uint8_t, 4, 4, 1) +TESTATOA(I400, uint8_t, 1, 1, 1, I400, uint8_t, 1, 1, 1) +// TESTATOA(I400, uint8_t, 1, 1, 1, I400Mirror, uint8_t, 1, 1, 1) +// TESTATOA(J400, uint8_t, 1, 1, 1, ARGB, uint8_t, 4, 4, 1) +TESTATOA(J400, uint8_t, 1, 1, 1, J400, uint8_t, 1, 1, 1) +// TESTATOA(RAW, uint8_t, 3, 3, 1, ARGB, uint8_t, 4, 4, 1) +// TESTATOA(RAW, uint8_t, 3, 3, 1, RGBA, uint8_t, 4, 4, 1) +TESTATOA(RAW, uint8_t, 3, 3, 1, RGB24, uint8_t, 3, 3, 1) +// TESTATOA(RGB24, uint8_t, 3, 3, 1, ARGB, uint8_t, 4, 4, 1) +TESTATOA(RGB24, uint8_t, 3, 3, 1, J400, uint8_t, 1, 1, 1) +// TESTATOA(RGB24, uint8_t, 3, 3, 1, RGB24Mirror, uint8_t, 3, 3, 1) +TESTATOA(RAW, uint8_t, 3, 3, 1, J400, uint8_t, 1, 1, 1) +#ifdef LITTLE_ENDIAN_ONLY_TEST +// TESTATOA(RGB565, uint8_t, 2, 2, 1, ARGB, uint8_t, 4, 4, 1) +#endif +TESTATOA(RGBA, uint8_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1) +// TESTATOA(UYVY, uint8_t, 2, 4, 1, ARGB, uint8_t, 4, 4, 1) +// TESTATOA(YUY2, uint8_t, 2, 4, 1, ARGB, uint8_t, 4, 4, 1) +TESTATOA(YUY2, uint8_t, 2, 4, 1, Y, uint8_t, 1, 1, 1) +// TESTATOA(ARGB, uint8_t, 4, 4, 1, AR64, uint16_t, 4, 4, 1) +// TESTATOA(ARGB, uint8_t, 4, 4, 1, AB64, uint16_t, 4, 4, 1) +// TESTATOA(ABGR, uint8_t, 4, 4, 1, AR64, uint16_t, 4, 4, 1) +// TESTATOA(ABGR, uint8_t, 4, 4, 1, AB64, uint16_t, 4, 4, 1) +TESTATOA(AR64, uint16_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1) +TESTATOA(AB64, uint16_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1) +TESTATOA(AR64, uint16_t, 4, 4, 1, ABGR, uint8_t, 4, 4, 1) +TESTATOA(AB64, uint16_t, 4, 4, 1, ABGR, uint8_t, 4, 4, 1) +TESTATOA(AR64, uint16_t, 4, 4, 1, AB64, uint16_t, 4, 4, 1) +TESTATOA(AB64, uint16_t, 4, 4, 1, AR64, uint16_t, 4, 4, 1) + #define TESTATOBDI(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, STRIDE_B, \ HEIGHT_B, W1280, N, NEG, OFF) \ TEST_F(LibYUVConvertTest, FMT_A##To##FMT_B##Dither##N) { \ @@ -2065,6 +2287,9 @@ TEST_F(LibYUVConvertTest, TestMJPGToI420) { int half_height = (height + 1) / 2; int benchmark_iterations = benchmark_iterations_ * benchmark_width_ * benchmark_height_ / (width * height); + if (benchmark_iterations < 1) { + benchmark_iterations = 1; + } align_buffer_page_end(dst_y, width * height); align_buffer_page_end(dst_u, half_width * half_height); @@ -2099,6 +2324,9 @@ TEST_F(LibYUVConvertTest, TestMJPGToI420_NV21) { int half_height = (height + 1) / 2; int benchmark_iterations = benchmark_iterations_ * benchmark_width_ * benchmark_height_ / (width * height); + if (benchmark_iterations < 1) { + benchmark_iterations = 1; + } // Convert to NV21 align_buffer_page_end(dst_y, width * height); @@ -2158,6 +2386,9 @@ TEST_F(LibYUVConvertTest, TestMJPGToI420_NV12) { int half_height = (height + 1) / 2; int benchmark_iterations = benchmark_iterations_ * benchmark_width_ * benchmark_height_ / (width * height); + if (benchmark_iterations < 1) { + benchmark_iterations = 1; + } // Convert to NV12 align_buffer_page_end(dst_y, width * height); @@ -2217,6 +2448,9 @@ TEST_F(LibYUVConvertTest, TestMJPGToNV21_420) { int half_height = (height + 1) / 2; int benchmark_iterations = benchmark_iterations_ * benchmark_width_ * benchmark_height_ / (width * height); + if (benchmark_iterations < 1) { + benchmark_iterations = 1; + } align_buffer_page_end(dst_y, width * height); align_buffer_page_end(dst_uv, half_width * half_height * 2); @@ -2247,6 +2481,9 @@ TEST_F(LibYUVConvertTest, TestMJPGToNV12_420) { int half_height = (height + 1) / 2; int benchmark_iterations = benchmark_iterations_ * benchmark_width_ * benchmark_height_ / (width * height); + if (benchmark_iterations < 1) { + benchmark_iterations = 1; + } align_buffer_page_end(dst_y, width * height); align_buffer_page_end(dst_uv, half_width * half_height * 2); @@ -2282,6 +2519,9 @@ TEST_F(LibYUVConvertTest, DISABLED_TestMJPGToNV21_422) { int half_height = (height + 1) / 2; int benchmark_iterations = benchmark_iterations_ * benchmark_width_ * benchmark_height_ / (width * height); + if (benchmark_iterations < 1) { + benchmark_iterations = 1; + } align_buffer_page_end(dst_y, width * height); align_buffer_page_end(dst_uv, half_width * half_height * 2); @@ -2312,6 +2552,9 @@ TEST_F(LibYUVConvertTest, DISABLED_TestMJPGToNV12_422) { int half_height = (height + 1) / 2; int benchmark_iterations = benchmark_iterations_ * benchmark_width_ * benchmark_height_ / (width * height); + if (benchmark_iterations < 1) { + benchmark_iterations = 1; + } align_buffer_page_end(dst_y, width * height); align_buffer_page_end(dst_uv, half_width * half_height * 2); @@ -2346,6 +2589,9 @@ TEST_F(LibYUVConvertTest, TestMJPGToNV21_400) { int half_height = (height + 1) / 2; int benchmark_iterations = benchmark_iterations_ * benchmark_width_ * benchmark_height_ / (width * height); + if (benchmark_iterations < 1) { + benchmark_iterations = 1; + } align_buffer_page_end(dst_y, width * height); align_buffer_page_end(dst_uv, half_width * half_height * 2); @@ -2376,6 +2622,9 @@ TEST_F(LibYUVConvertTest, TestMJPGToNV12_400) { int half_height = (height + 1) / 2; int benchmark_iterations = benchmark_iterations_ * benchmark_width_ * benchmark_height_ / (width * height); + if (benchmark_iterations < 1) { + benchmark_iterations = 1; + } align_buffer_page_end(dst_y, width * height); align_buffer_page_end(dst_uv, half_width * half_height * 2); @@ -2410,6 +2659,9 @@ TEST_F(LibYUVConvertTest, TestMJPGToNV21_444) { int half_height = (height + 1) / 2; int benchmark_iterations = benchmark_iterations_ * benchmark_width_ * benchmark_height_ / (width * height); + if (benchmark_iterations < 1) { + benchmark_iterations = 1; + } align_buffer_page_end(dst_y, width * height); align_buffer_page_end(dst_uv, half_width * half_height * 2); @@ -2440,6 +2692,9 @@ TEST_F(LibYUVConvertTest, TestMJPGToNV12_444) { int half_height = (height + 1) / 2; int benchmark_iterations = benchmark_iterations_ * benchmark_width_ * benchmark_height_ / (width * height); + if (benchmark_iterations < 1) { + benchmark_iterations = 1; + } align_buffer_page_end(dst_y, width * height); align_buffer_page_end(dst_uv, half_width * half_height * 2); @@ -2472,6 +2727,9 @@ TEST_F(LibYUVConvertTest, TestMJPGToARGB) { int benchmark_iterations = benchmark_iterations_ * benchmark_width_ * benchmark_height_ / (width * height); + if (benchmark_iterations < 1) { + benchmark_iterations = 1; + } align_buffer_page_end(dst_argb, width * height * 4); for (int times = 0; times < benchmark_iterations; ++times) { @@ -2921,6 +3179,51 @@ TESTPLANARTOBD(I420, 2, 2, RGB565, 2, 2, 1, ARGB, 4) TESTPTOB(TestYUY2ToNV12, YUY2ToI420, YUY2ToNV12) TESTPTOB(TestUYVYToNV12, UYVYToI420, UYVYToNV12) +TEST_F(LibYUVConvertTest, MM21ToYUY2) { + const int kWidth = (benchmark_width_ + 15) & (~15); + const int kHeight = (benchmark_height_ + 31) & (~31); + + align_buffer_page_end(orig_y, kWidth * kHeight); + align_buffer_page_end(orig_uv, + 2 * SUBSAMPLE(kWidth, 2) * SUBSAMPLE(kHeight, 2)); + + align_buffer_page_end(tmp_y, kWidth * kHeight); + align_buffer_page_end(tmp_u, SUBSAMPLE(kWidth, 2) * SUBSAMPLE(kHeight, 2)); + align_buffer_page_end(tmp_v, SUBSAMPLE(kWidth, 2) * SUBSAMPLE(kHeight, 2)); + + align_buffer_page_end(dst_yuyv, 4 * SUBSAMPLE(kWidth, 2) * kHeight); + align_buffer_page_end(golden_yuyv, 4 * SUBSAMPLE(kWidth, 2) * kHeight); + + MemRandomize(orig_y, kWidth * kHeight); + MemRandomize(orig_uv, 2 * SUBSAMPLE(kWidth, 2) * SUBSAMPLE(kHeight, 2)); + + /* Convert MM21 to YUY2 in 2 steps for reference */ + libyuv::MM21ToI420(orig_y, kWidth, orig_uv, 2 * SUBSAMPLE(kWidth, 2), tmp_y, + kWidth, tmp_u, SUBSAMPLE(kWidth, 2), tmp_v, + SUBSAMPLE(kWidth, 2), kWidth, kHeight); + libyuv::I420ToYUY2(tmp_y, kWidth, tmp_u, SUBSAMPLE(kWidth, 2), tmp_v, + SUBSAMPLE(kWidth, 2), golden_yuyv, + 4 * SUBSAMPLE(kWidth, 2), kWidth, kHeight); + + /* Convert to NV12 */ + for (int i = 0; i < benchmark_iterations_; ++i) { + libyuv::MM21ToYUY2(orig_y, kWidth, orig_uv, 2 * SUBSAMPLE(kWidth, 2), + dst_yuyv, 4 * SUBSAMPLE(kWidth, 2), kWidth, kHeight); + } + + for (int i = 0; i < 4 * SUBSAMPLE(kWidth, 2) * kHeight; ++i) { + EXPECT_EQ(dst_yuyv[i], golden_yuyv[i]); + } + + free_aligned_buffer_page_end(orig_y); + free_aligned_buffer_page_end(orig_uv); + free_aligned_buffer_page_end(tmp_y); + free_aligned_buffer_page_end(tmp_u); + free_aligned_buffer_page_end(tmp_v); + free_aligned_buffer_page_end(dst_yuyv); + free_aligned_buffer_page_end(golden_yuyv); +} + // Transitive test. A to B to C is same as A to C. // Benchmarks A To B to C for comparison to 1 step, benchmarked elsewhere. #define TESTPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B, \ @@ -3353,6 +3656,8 @@ TEST_F(LibYUVConvertTest, ABGRToAR30Row_Opt) { I012ToARGBMatrix(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j) #define I012ToAR30(a, b, c, d, e, f, g, h, i, j) \ I012ToAR30Matrix(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j) +#define I012ToAB30(a, b, c, d, e, f, g, h, i, j) \ + I012ToAB30Matrix(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j) #define I410ToARGB(a, b, c, d, e, f, g, h, i, j) \ I410ToARGBMatrix(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j) @@ -3495,6 +3800,7 @@ TESTPLANAR16TOB(H410, 1, 1, 0x3ff, AB30, 4, 4, 1) TESTPLANAR16TOB(U410, 1, 1, 0x3ff, AR30, 4, 4, 1) TESTPLANAR16TOB(U410, 1, 1, 0x3ff, AB30, 4, 4, 1) TESTPLANAR16TOB(I012, 2, 2, 0xfff, AR30, 4, 4, 1) +TESTPLANAR16TOB(I012, 2, 2, 0xfff, AB30, 4, 4, 1) TESTPLANAR16TOB(I010, 2, 2, 0x3ff, AR30Filter, 4, 4, 1) TESTPLANAR16TOB(I210, 2, 1, 0x3ff, AR30Filter, 4, 4, 1) #endif // LITTLE_ENDIAN_ONLY_TEST @@ -3733,8 +4039,8 @@ TESTQPLANAR16TOB(I010Alpha, 2, 2, ARGBFilter, 4, 4, 1, 10) TESTQPLANAR16TOB(I210Alpha, 2, 1, ARGBFilter, 4, 4, 1, 10) #endif // DISABLE_SLOW_TESTS -#define TESTBIPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \ - ALIGN, YALIGN, W1280, N, NEG, SOFF, DOFF, S_DEPTH) \ +#define TESTBP16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, W1280, N, NEG, SOFF, DOFF, S_DEPTH) \ TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) { \ const int kWidth = W1280; \ const int kHeight = ALIGNINT(benchmark_height_, YALIGN); \ @@ -3777,16 +4083,16 @@ TESTQPLANAR16TOB(I210Alpha, 2, 1, ARGBFilter, 4, 4, 1, 10) free_aligned_buffer_page_end(dst_argb_opt); \ } -#define TESTBIPLANAR16TOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \ - ALIGN, YALIGN, S_DEPTH) \ - TESTBIPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ - YALIGN, benchmark_width_ + 1, _Any, +, 0, 0, S_DEPTH) \ - TESTBIPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ - YALIGN, benchmark_width_, _Unaligned, +, 4, 4, S_DEPTH) \ - TESTBIPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ - YALIGN, benchmark_width_, _Invert, -, 0, 0, S_DEPTH) \ - TESTBIPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ - YALIGN, benchmark_width_, _Opt, +, 0, 0, S_DEPTH) +#define TESTBP16TOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, S_DEPTH) \ + TESTBP16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, YALIGN, \ + benchmark_width_ + 1, _Any, +, 0, 0, S_DEPTH) \ + TESTBP16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, YALIGN, \ + benchmark_width_, _Unaligned, +, 4, 4, S_DEPTH) \ + TESTBP16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, YALIGN, \ + benchmark_width_, _Invert, -, 0, 0, S_DEPTH) \ + TESTBP16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, YALIGN, \ + benchmark_width_, _Opt, +, 0, 0, S_DEPTH) #define P010ToARGB(a, b, c, d, e, f, g, h) \ P010ToARGBMatrix(a, b, c, d, e, f, &kYuvH709Constants, g, h) @@ -3829,23 +4135,23 @@ TESTQPLANAR16TOB(I210Alpha, 2, 1, ARGBFilter, 4, 4, 1, 10) kFilterBilinear) #if !defined(DISABLE_SLOW_TESTS) || defined(__x86_64__) || defined(__i386__) -TESTBIPLANAR16TOB(P010, 2, 2, ARGB, 4, 4, 1, 10) -TESTBIPLANAR16TOB(P210, 2, 1, ARGB, 4, 4, 1, 10) -TESTBIPLANAR16TOB(P012, 2, 2, ARGB, 4, 4, 1, 12) -TESTBIPLANAR16TOB(P212, 2, 1, ARGB, 4, 4, 1, 12) -TESTBIPLANAR16TOB(P016, 2, 2, ARGB, 4, 4, 1, 16) -TESTBIPLANAR16TOB(P216, 2, 1, ARGB, 4, 4, 1, 16) -TESTBIPLANAR16TOB(P010, 2, 2, ARGBFilter, 4, 4, 1, 10) -TESTBIPLANAR16TOB(P210, 2, 1, ARGBFilter, 4, 4, 1, 10) +TESTBP16TOB(P010, 2, 2, ARGB, 4, 4, 1, 10) +TESTBP16TOB(P210, 2, 1, ARGB, 4, 4, 1, 10) +TESTBP16TOB(P012, 2, 2, ARGB, 4, 4, 1, 12) +TESTBP16TOB(P212, 2, 1, ARGB, 4, 4, 1, 12) +TESTBP16TOB(P016, 2, 2, ARGB, 4, 4, 1, 16) +TESTBP16TOB(P216, 2, 1, ARGB, 4, 4, 1, 16) +TESTBP16TOB(P010, 2, 2, ARGBFilter, 4, 4, 1, 10) +TESTBP16TOB(P210, 2, 1, ARGBFilter, 4, 4, 1, 10) #ifdef LITTLE_ENDIAN_ONLY_TEST -TESTBIPLANAR16TOB(P010, 2, 2, AR30, 4, 4, 1, 10) -TESTBIPLANAR16TOB(P210, 2, 1, AR30, 4, 4, 1, 10) -TESTBIPLANAR16TOB(P012, 2, 2, AR30, 4, 4, 1, 12) -TESTBIPLANAR16TOB(P212, 2, 1, AR30, 4, 4, 1, 12) -TESTBIPLANAR16TOB(P016, 2, 2, AR30, 4, 4, 1, 16) -TESTBIPLANAR16TOB(P216, 2, 1, AR30, 4, 4, 1, 16) -TESTBIPLANAR16TOB(P010, 2, 2, AR30Filter, 4, 4, 1, 10) -TESTBIPLANAR16TOB(P210, 2, 1, AR30Filter, 4, 4, 1, 10) +TESTBP16TOB(P010, 2, 2, AR30, 4, 4, 1, 10) +TESTBP16TOB(P210, 2, 1, AR30, 4, 4, 1, 10) +TESTBP16TOB(P012, 2, 2, AR30, 4, 4, 1, 12) +TESTBP16TOB(P212, 2, 1, AR30, 4, 4, 1, 12) +TESTBP16TOB(P016, 2, 2, AR30, 4, 4, 1, 16) +TESTBP16TOB(P216, 2, 1, AR30, 4, 4, 1, 16) +TESTBP16TOB(P010, 2, 2, AR30Filter, 4, 4, 1, 10) +TESTBP16TOB(P210, 2, 1, AR30Filter, 4, 4, 1, 10) #endif // LITTLE_ENDIAN_ONLY_TEST #endif // DISABLE_SLOW_TESTS diff --git a/files/unit_test/cpu_test.cc b/files/unit_test/cpu_test.cc index 080778f5..93867fa7 100644 --- a/files/unit_test/cpu_test.cc +++ b/files/unit_test/cpu_test.cc @@ -20,13 +20,23 @@ namespace libyuv { TEST_F(LibYUVBaseTest, TestCpuHas) { int cpu_flags = TestCpuFlag(-1); - printf("Cpu Flags %d\n", cpu_flags); + printf("Cpu Flags 0x%x\n", cpu_flags); #if defined(__arm__) || defined(__aarch64__) int has_arm = TestCpuFlag(kCpuHasARM); - printf("Has ARM %d\n", has_arm); + printf("Has ARM 0x%x\n", has_arm); int has_neon = TestCpuFlag(kCpuHasNEON); - printf("Has NEON %d\n", has_neon); + printf("Has NEON 0x%x\n", has_neon); #endif +#if defined(__riscv) && defined(__linux__) + int has_riscv = TestCpuFlag(kCpuHasRISCV); + printf("Has RISCV 0x%x\n", has_riscv); + int has_rvv = TestCpuFlag(kCpuHasRVV); + printf("Has RVV 0x%x\n", has_rvv); + int has_rvvzvfh = TestCpuFlag(kCpuHasRVVZVFH); + printf("Has RVVZVFH 0x%x\n", has_rvvzvfh); +#endif +#if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || \ + defined(_M_X64) int has_x86 = TestCpuFlag(kCpuHasX86); int has_sse2 = TestCpuFlag(kCpuHasSSE2); int has_ssse3 = TestCpuFlag(kCpuHasSSSE3); @@ -45,39 +55,38 @@ TEST_F(LibYUVBaseTest, TestCpuHas) { int has_avx512vbmi2 = TestCpuFlag(kCpuHasAVX512VBMI2); int has_avx512vbitalg = TestCpuFlag(kCpuHasAVX512VBITALG); int has_avx512vpopcntdq = TestCpuFlag(kCpuHasAVX512VPOPCNTDQ); - printf("Has X86 %d\n", has_x86); - printf("Has SSE2 %d\n", has_sse2); - printf("Has SSSE3 %d\n", has_ssse3); - printf("Has SSE41 %d\n", has_sse41); - printf("Has SSE42 %d\n", has_sse42); - printf("Has AVX %d\n", has_avx); - printf("Has AVX2 %d\n", has_avx2); - printf("Has ERMS %d\n", has_erms); - printf("Has FMA3 %d\n", has_fma3); - printf("Has F16C %d\n", has_f16c); - printf("Has GFNI %d\n", has_gfni); - printf("Has AVX512BW %d\n", has_avx512bw); - printf("Has AVX512VL %d\n", has_avx512vl); - printf("Has AVX512VNNI %d\n", has_avx512vnni); - printf("Has AVX512VBMI %d\n", has_avx512vbmi); - printf("Has AVX512VBMI2 %d\n", has_avx512vbmi2); - printf("Has AVX512VBITALG %d\n", has_avx512vbitalg); - printf("Has AVX512VPOPCNTDQ %d\n", has_avx512vpopcntdq); - + printf("Has X86 0x%x\n", has_x86); + printf("Has SSE2 0x%x\n", has_sse2); + printf("Has SSSE3 0x%x\n", has_ssse3); + printf("Has SSE41 0x%x\n", has_sse41); + printf("Has SSE42 0x%x\n", has_sse42); + printf("Has AVX 0x%x\n", has_avx); + printf("Has AVX2 0x%x\n", has_avx2); + printf("Has ERMS 0x%x\n", has_erms); + printf("Has FMA3 0x%x\n", has_fma3); + printf("Has F16C 0x%x\n", has_f16c); + printf("Has GFNI 0x%x\n", has_gfni); + printf("Has AVX512BW 0x%x\n", has_avx512bw); + printf("Has AVX512VL 0x%x\n", has_avx512vl); + printf("Has AVX512VNNI 0x%x\n", has_avx512vnni); + printf("Has AVX512VBMI 0x%x\n", has_avx512vbmi); + printf("Has AVX512VBMI2 0x%x\n", has_avx512vbmi2); + printf("Has AVX512VBITALG 0x%x\n", has_avx512vbitalg); + printf("Has AVX512VPOPCNTDQ 0x%x\n", has_avx512vpopcntdq); +#endif #if defined(__mips__) int has_mips = TestCpuFlag(kCpuHasMIPS); - printf("Has MIPS %d\n", has_mips); + printf("Has MIPS 0x%x\n", has_mips); int has_msa = TestCpuFlag(kCpuHasMSA); - printf("Has MSA %d\n", has_msa); + printf("Has MSA 0x%x\n", has_msa); #endif - #if defined(__loongarch__) int has_loongarch = TestCpuFlag(kCpuHasLOONGARCH); - printf("Has LOONGARCH %d\n", has_loongarch); + printf("Has LOONGARCH 0x%x\n", has_loongarch); int has_lsx = TestCpuFlag(kCpuHasLSX); - printf("Has LSX %d\n", has_lsx); + printf("Has LSX 0x%x\n", has_lsx); int has_lasx = TestCpuFlag(kCpuHasLASX); - printf("Has LASX %d\n", has_lasx); + printf("Has LASX 0x%x\n", has_lasx); #endif } @@ -104,27 +113,33 @@ TEST_F(LibYUVBaseTest, TestCompilerMacros) { #ifdef __i386__ printf("__i386__ %d\n", __i386__); #endif -#ifdef __mips - printf("__mips %d\n", __mips); -#endif -#ifdef __mips_isa_rev - printf("__mips_isa_rev %d\n", __mips_isa_rev); -#endif #ifdef __x86_64__ printf("__x86_64__ %d\n", __x86_64__); #endif +#ifdef _M_IX86 + printf("_M_IX86 %d\n", _M_IX86); +#endif +#ifdef _M_X64 + printf("_M_X64 %d\n", _M_X64); +#endif #ifdef _MSC_VER printf("_MSC_VER %d\n", _MSC_VER); #endif #ifdef __aarch64__ printf("__aarch64__ %d\n", __aarch64__); #endif -#ifdef __APPLE__ - printf("__APPLE__ %d\n", __APPLE__); -#endif #ifdef __arm__ printf("__arm__ %d\n", __arm__); #endif +#ifdef __riscv + printf("__riscv %d\n", __riscv); +#endif +#ifdef __riscv_vector + printf("__riscv_vector %d\n", __riscv_vector); +#endif +#ifdef __APPLE__ + printf("__APPLE__ %d\n", __APPLE__); +#endif #ifdef __clang__ printf("__clang__ %d\n", __clang__); #endif @@ -140,20 +155,11 @@ TEST_F(LibYUVBaseTest, TestCompilerMacros) { #ifdef __mips_msa printf("__mips_msa %d\n", __mips_msa); #endif -#ifdef __native_client__ - printf("__native_client__ %d\n", __native_client__); -#endif -#ifdef __pic__ - printf("__pic__ %d\n", __pic__); -#endif -#ifdef __pnacl__ - printf("__pnacl__ %d\n", __pnacl__); -#endif -#ifdef _M_IX86 - printf("_M_IX86 %d\n", _M_IX86); +#ifdef __mips + printf("__mips %d\n", __mips); #endif -#ifdef _M_X64 - printf("_M_X64 %d\n", _M_X64); +#ifdef __mips_isa_rev + printf("__mips_isa_rev %d\n", __mips_isa_rev); #endif #ifdef _MIPS_ARCH_LOONGSON3A printf("_MIPS_ARCH_LOONGSON3A %d\n", _MIPS_ARCH_LOONGSON3A); @@ -164,6 +170,15 @@ TEST_F(LibYUVBaseTest, TestCompilerMacros) { #ifdef _WIN32 printf("_WIN32 %d\n", _WIN32); #endif +#ifdef __native_client__ + printf("__native_client__ %d\n", __native_client__); +#endif +#ifdef __pic__ + printf("__pic__ %d\n", __pic__); +#endif +#ifdef __pnacl__ + printf("__pnacl__ %d\n", __pnacl__); +#endif #ifdef GG_LONGLONG printf("GG_LONGLONG %d\n", GG_LONGLONG); #endif @@ -200,8 +215,9 @@ TEST_F(LibYUVBaseTest, TestCpuId) { cpu_info[0] = cpu_info[1]; // Reorder output cpu_info[1] = cpu_info[3]; cpu_info[3] = 0; - printf("Cpu Vendor: %s %x %x %x\n", reinterpret_cast<char*>(&cpu_info[0]), - cpu_info[0], cpu_info[1], cpu_info[2]); + printf("Cpu Vendor: %s 0x%x 0x%x 0x%x\n", + reinterpret_cast<char*>(&cpu_info[0]), cpu_info[0], cpu_info[1], + cpu_info[2]); EXPECT_EQ(12u, strlen(reinterpret_cast<char*>(&cpu_info[0]))); // CPU Family and Model @@ -264,6 +280,32 @@ TEST_F(LibYUVBaseTest, TestLinuxMipsMsa) { } } +TEST_F(LibYUVBaseTest, TestLinuxRVV) { + if (FileExists("../../unit_test/testdata/riscv64.txt")) { + printf("Note: testing to load \"../../unit_test/testdata/riscv64.txt\"\n"); + + EXPECT_EQ(0, RiscvCpuCaps("../../unit_test/testdata/riscv64.txt")); + EXPECT_EQ(kCpuHasRVV, + RiscvCpuCaps("../../unit_test/testdata/riscv64_rvv.txt")); + EXPECT_EQ(kCpuHasRVV | kCpuHasRVVZVFH, + RiscvCpuCaps("../../unit_test/testdata/riscv64_rvv_zvfh.txt")); + } else { + printf( + "WARNING: unable to load " + "\"../../unit_test/testdata/riscv64.txt\"\n"); + } +#if defined(__linux__) && defined(__riscv) + if (FileExists("/proc/cpuinfo")) { + if (!(kCpuHasRVV & RiscvCpuCaps("/proc/cpuinfo"))) { + // This can happen on RVV emulator but /proc/cpuinfo is from host. + printf("WARNING: RVV build enabled but CPU does not have RVV\n"); + } + } else { + printf("WARNING: unable to load \"/proc/cpuinfo\"\n"); + } +#endif +} + // TODO(fbarchard): Fix clangcl test of cpuflags. #ifdef _MSC_VER TEST_F(LibYUVBaseTest, DISABLED_TestSetCpuFlags) { diff --git a/files/unit_test/planar_test.cc b/files/unit_test/planar_test.cc index 3a8c470b..ad97b87e 100644 --- a/files/unit_test/planar_test.cc +++ b/files/unit_test/planar_test.cc @@ -1638,29 +1638,29 @@ TEST_F(LibYUVPlanarTest, TestDetilePlane) { int i, j; // orig is tiled. Allocate enough memory for tiles. - int orig_width = (benchmark_width_ + 15) & ~15; - int orig_height = (benchmark_height_ + 15) & ~15; - int orig_plane_size = orig_width * orig_height; + int tile_width = (benchmark_width_ + 15) & ~15; + int tile_height = (benchmark_height_ + 15) & ~15; + int tile_plane_size = tile_width * tile_height; int y_plane_size = benchmark_width_ * benchmark_height_; - align_buffer_page_end(orig_y, orig_plane_size); + align_buffer_page_end(tile_y, tile_plane_size); align_buffer_page_end(dst_c, y_plane_size); align_buffer_page_end(dst_opt, y_plane_size); - MemRandomize(orig_y, orig_plane_size); + MemRandomize(tile_y, tile_plane_size); memset(dst_c, 0, y_plane_size); memset(dst_opt, 0, y_plane_size); // Disable all optimizations. MaskCpuFlags(disable_cpu_flags_); for (j = 0; j < benchmark_iterations_; j++) { - DetilePlane(orig_y, orig_width, dst_c, benchmark_width_, benchmark_width_, + DetilePlane(tile_y, tile_width, dst_c, benchmark_width_, benchmark_width_, benchmark_height_, 16); } // Enable optimizations. MaskCpuFlags(benchmark_cpu_info_); for (j = 0; j < benchmark_iterations_; j++) { - DetilePlane(orig_y, orig_width, dst_opt, benchmark_width_, benchmark_width_, + DetilePlane(tile_y, tile_width, dst_opt, benchmark_width_, benchmark_width_, benchmark_height_, 16); } @@ -1668,7 +1668,46 @@ TEST_F(LibYUVPlanarTest, TestDetilePlane) { EXPECT_EQ(dst_c[i], dst_opt[i]); } - free_aligned_buffer_page_end(orig_y); + free_aligned_buffer_page_end(tile_y); + free_aligned_buffer_page_end(dst_c); + free_aligned_buffer_page_end(dst_opt); +} + +TEST_F(LibYUVPlanarTest, TestDetilePlane_16) { + int i, j; + + // orig is tiled. Allocate enough memory for tiles. + int tile_width = (benchmark_width_ + 15) & ~15; + int tile_height = (benchmark_height_ + 15) & ~15; + int tile_plane_size = tile_width * tile_height * 2; + int y_plane_size = benchmark_width_ * benchmark_height_ * 2; + align_buffer_page_end(tile_y, tile_plane_size); + align_buffer_page_end(dst_c, y_plane_size); + align_buffer_page_end(dst_opt, y_plane_size); + + MemRandomize(tile_y, tile_plane_size); + memset(dst_c, 0, y_plane_size); + memset(dst_opt, 0, y_plane_size); + + // Disable all optimizations. + MaskCpuFlags(disable_cpu_flags_); + for (j = 0; j < benchmark_iterations_; j++) { + DetilePlane_16((const uint16_t*)tile_y, tile_width, (uint16_t*)dst_c, + benchmark_width_, benchmark_width_, benchmark_height_, 16); + } + + // Enable optimizations. + MaskCpuFlags(benchmark_cpu_info_); + for (j = 0; j < benchmark_iterations_; j++) { + DetilePlane_16((const uint16_t*)tile_y, tile_width, (uint16_t*)dst_opt, + benchmark_width_, benchmark_width_, benchmark_height_, 16); + } + + for (i = 0; i < y_plane_size; ++i) { + EXPECT_EQ(dst_c[i], dst_opt[i]); + } + + free_aligned_buffer_page_end(tile_y); free_aligned_buffer_page_end(dst_c); free_aligned_buffer_page_end(dst_opt); } @@ -1678,33 +1717,33 @@ TEST_F(LibYUVPlanarTest, TestDetileSplitUVPlane_Correctness) { int i, j; // orig is tiled. Allocate enough memory for tiles. - int orig_width = (benchmark_width_ + 15) & ~15; - int orig_height = (benchmark_height_ + 15) & ~15; - int orig_plane_size = orig_width * orig_height; + int tile_width = (benchmark_width_ + 15) & ~15; + int tile_height = (benchmark_height_ + 15) & ~15; + int tile_plane_size = tile_width * tile_height; int uv_plane_size = ((benchmark_width_ + 1) / 2) * benchmark_height_; - align_buffer_page_end(orig_uv, orig_plane_size); - align_buffer_page_end(detiled_uv, orig_plane_size); + align_buffer_page_end(tile_uv, tile_plane_size); + align_buffer_page_end(detiled_uv, tile_plane_size); align_buffer_page_end(dst_u_two_stage, uv_plane_size); align_buffer_page_end(dst_u_opt, uv_plane_size); align_buffer_page_end(dst_v_two_stage, uv_plane_size); align_buffer_page_end(dst_v_opt, uv_plane_size); - MemRandomize(orig_uv, orig_plane_size); - memset(detiled_uv, 0, orig_plane_size); + MemRandomize(tile_uv, tile_plane_size); + memset(detiled_uv, 0, tile_plane_size); memset(dst_u_two_stage, 0, uv_plane_size); memset(dst_u_opt, 0, uv_plane_size); memset(dst_v_two_stage, 0, uv_plane_size); memset(dst_v_opt, 0, uv_plane_size); - DetileSplitUVPlane(orig_uv, orig_width, dst_u_opt, (benchmark_width_ + 1) / 2, + DetileSplitUVPlane(tile_uv, tile_width, dst_u_opt, (benchmark_width_ + 1) / 2, dst_v_opt, (benchmark_width_ + 1) / 2, benchmark_width_, benchmark_height_, 16); // Benchmark 2 step conversion for comparison. for (j = 0; j < benchmark_iterations_; j++) { - DetilePlane(orig_uv, orig_width, detiled_uv, benchmark_width_, + DetilePlane(tile_uv, tile_width, detiled_uv, benchmark_width_, benchmark_width_, benchmark_height_, 16); - SplitUVPlane(detiled_uv, orig_width, dst_u_two_stage, + SplitUVPlane(detiled_uv, tile_width, dst_u_two_stage, (benchmark_width_ + 1) / 2, dst_v_two_stage, (benchmark_width_ + 1) / 2, (benchmark_width_ + 1) / 2, benchmark_height_); @@ -1715,7 +1754,7 @@ TEST_F(LibYUVPlanarTest, TestDetileSplitUVPlane_Correctness) { EXPECT_EQ(dst_v_two_stage[i], dst_v_opt[i]); } - free_aligned_buffer_page_end(orig_uv); + free_aligned_buffer_page_end(tile_uv); free_aligned_buffer_page_end(detiled_uv); free_aligned_buffer_page_end(dst_u_two_stage); free_aligned_buffer_page_end(dst_u_opt); @@ -1727,17 +1766,17 @@ TEST_F(LibYUVPlanarTest, TestDetileSplitUVPlane_Benchmark) { int i, j; // orig is tiled. Allocate enough memory for tiles. - int orig_width = (benchmark_width_ + 15) & ~15; - int orig_height = (benchmark_height_ + 15) & ~15; - int orig_plane_size = orig_width * orig_height; + int tile_width = (benchmark_width_ + 15) & ~15; + int tile_height = (benchmark_height_ + 15) & ~15; + int tile_plane_size = tile_width * tile_height; int uv_plane_size = ((benchmark_width_ + 1) / 2) * benchmark_height_; - align_buffer_page_end(orig_uv, orig_plane_size); + align_buffer_page_end(tile_uv, tile_plane_size); align_buffer_page_end(dst_u_c, uv_plane_size); align_buffer_page_end(dst_u_opt, uv_plane_size); align_buffer_page_end(dst_v_c, uv_plane_size); align_buffer_page_end(dst_v_opt, uv_plane_size); - MemRandomize(orig_uv, orig_plane_size); + MemRandomize(tile_uv, tile_plane_size); memset(dst_u_c, 0, uv_plane_size); memset(dst_u_opt, 0, uv_plane_size); memset(dst_v_c, 0, uv_plane_size); @@ -1746,7 +1785,7 @@ TEST_F(LibYUVPlanarTest, TestDetileSplitUVPlane_Benchmark) { // Disable all optimizations. MaskCpuFlags(disable_cpu_flags_); - DetileSplitUVPlane(orig_uv, orig_width, dst_u_c, (benchmark_width_ + 1) / 2, + DetileSplitUVPlane(tile_uv, tile_width, dst_u_c, (benchmark_width_ + 1) / 2, dst_v_c, (benchmark_width_ + 1) / 2, benchmark_width_, benchmark_height_, 16); @@ -1755,7 +1794,7 @@ TEST_F(LibYUVPlanarTest, TestDetileSplitUVPlane_Benchmark) { for (j = 0; j < benchmark_iterations_; j++) { DetileSplitUVPlane( - orig_uv, orig_width, dst_u_opt, (benchmark_width_ + 1) / 2, dst_v_opt, + tile_uv, tile_width, dst_u_opt, (benchmark_width_ + 1) / 2, dst_v_opt, (benchmark_width_ + 1) / 2, benchmark_width_, benchmark_height_, 16); } @@ -1764,7 +1803,7 @@ TEST_F(LibYUVPlanarTest, TestDetileSplitUVPlane_Benchmark) { EXPECT_EQ(dst_v_c[i], dst_v_opt[i]); } - free_aligned_buffer_page_end(orig_uv); + free_aligned_buffer_page_end(tile_uv); free_aligned_buffer_page_end(dst_u_c); free_aligned_buffer_page_end(dst_u_opt); free_aligned_buffer_page_end(dst_v_c); @@ -3495,8 +3534,8 @@ TESTTPLANARTOP(MergeXR30, uint16_t, uint8_t, 16) // TODO(fbarchard): improve test for platforms and cpu detect #ifdef HAS_MERGEUVROW_16_AVX2 TEST_F(LibYUVPlanarTest, MergeUVRow_16_Opt) { - // Round count up to multiple of 16 - const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15; + // Round count up to multiple of 8 + const int kPixels = (benchmark_width_ * benchmark_height_ + 7) & ~7; align_buffer_page_end(src_pixels_u, kPixels * 2); align_buffer_page_end(src_pixels_v, kPixels * 2); diff --git a/files/unit_test/rotate_argb_test.cc b/files/unit_test/rotate_argb_test.cc index 01ed69ca..74952c4e 100644 --- a/files/unit_test/rotate_argb_test.cc +++ b/files/unit_test/rotate_argb_test.cc @@ -225,4 +225,110 @@ TEST_F(LibYUVRotateTest, RotatePlane90_TestStride) { free_aligned_buffer_page_end(src_argb); } +static void TestRotatePlane_16(int src_width, + int src_height, + int dst_width, + int dst_height, + libyuv::RotationMode mode, + int benchmark_iterations, + int disable_cpu_flags, + int benchmark_cpu_info) { + if (src_width < 1) { + src_width = 1; + } + if (src_height < 1) { + src_height = 1; + } + if (dst_width < 1) { + dst_width = 1; + } + if (dst_height < 1) { + dst_height = 1; + } + int src_stride = src_width; + int src_plane_size = src_stride * abs(src_height); + align_buffer_page_end_16(src, src_plane_size); + for (int i = 0; i < src_plane_size; ++i) { + src[i] = fastrand() & 0xff; + } + + int dst_stride = dst_width; + int dst_plane_size = dst_stride * dst_height; + align_buffer_page_end_16(dst_c, dst_plane_size); + align_buffer_page_end_16(dst_opt, dst_plane_size); + memset(dst_c, 2, dst_plane_size); + memset(dst_opt, 3, dst_plane_size); + + MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization. + RotatePlane_16(src, src_stride, dst_c, dst_stride, src_width, src_height, + mode); + + MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization. + for (int i = 0; i < benchmark_iterations; ++i) { + RotatePlane_16(src, src_stride, dst_opt, dst_stride, src_width, src_height, + mode); + } + + // Rotation should be exact. + for (int i = 0; i < dst_plane_size; ++i) { + EXPECT_EQ(dst_c[i], dst_opt[i]); + } + + free_aligned_buffer_page_end_16(dst_c); + free_aligned_buffer_page_end_16(dst_opt); + free_aligned_buffer_page_end_16(src); +} + +TEST_F(LibYUVRotateTest, RotatePlane0_16_Opt) { + TestRotatePlane_16(benchmark_width_, benchmark_height_, benchmark_width_, + benchmark_height_, kRotate0, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); +} + +TEST_F(LibYUVRotateTest, RotatePlane90_16_Opt) { + TestRotatePlane_16(benchmark_width_, benchmark_height_, benchmark_height_, + benchmark_width_, kRotate90, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); +} + +TEST_F(LibYUVRotateTest, RotatePlane180_16_Opt) { + TestRotatePlane_16(benchmark_width_, benchmark_height_, benchmark_width_, + benchmark_height_, kRotate180, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); +} + +TEST_F(LibYUVRotateTest, RotatePlane270_16_Opt) { + TestRotatePlane_16(benchmark_width_, benchmark_height_, benchmark_height_, + benchmark_width_, kRotate270, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); +} + +TEST_F(LibYUVRotateTest, RotatePlane0_16_Odd) { + TestRotatePlane_16(benchmark_width_ + 1, benchmark_height_ + 1, + benchmark_width_ + 1, benchmark_height_ + 1, kRotate0, + benchmark_iterations_, disable_cpu_flags_, + benchmark_cpu_info_); +} + +TEST_F(LibYUVRotateTest, RotatePlane90_16_Odd) { + TestRotatePlane_16(benchmark_width_ + 1, benchmark_height_ + 1, + benchmark_height_ + 1, benchmark_width_ + 1, kRotate90, + benchmark_iterations_, disable_cpu_flags_, + benchmark_cpu_info_); +} + +TEST_F(LibYUVRotateTest, RotatePlane180_16_Odd) { + TestRotatePlane_16(benchmark_width_ + 1, benchmark_height_ + 1, + benchmark_width_ + 1, benchmark_height_ + 1, kRotate180, + benchmark_iterations_, disable_cpu_flags_, + benchmark_cpu_info_); +} + +TEST_F(LibYUVRotateTest, RotatePlane270_16_Odd) { + TestRotatePlane_16(benchmark_width_ + 1, benchmark_height_ + 1, + benchmark_height_ + 1, benchmark_width_ + 1, kRotate270, + benchmark_iterations_, disable_cpu_flags_, + benchmark_cpu_info_); +} + } // namespace libyuv diff --git a/files/unit_test/rotate_test.cc b/files/unit_test/rotate_test.cc index d3887414..abc08efa 100644 --- a/files/unit_test/rotate_test.cc +++ b/files/unit_test/rotate_test.cc @@ -14,6 +14,10 @@ #include "libyuv/cpu_id.h" #include "libyuv/rotate.h" +#ifdef ENABLE_ROW_TESTS +#include "libyuv/rotate_row.h" +#endif + namespace libyuv { #define SUBSAMPLE(v, a) ((((v) + (a)-1)) / (a)) @@ -596,4 +600,363 @@ TESTAPLANARTOP(Android420, NV21, 2, 1, 0, 2, 2, I420, 2, 2) #undef TESTAPLANARTOP #undef TESTAPLANARTOPI +static void I010TestRotate(int src_width, + int src_height, + int dst_width, + int dst_height, + libyuv::RotationMode mode, + int benchmark_iterations, + int disable_cpu_flags, + int benchmark_cpu_info) { + if (src_width < 1) { + src_width = 1; + } + if (src_height == 0) { + src_height = 1; + } + if (dst_width < 1) { + dst_width = 1; + } + if (dst_height < 1) { + dst_height = 1; + } + int src_i010_y_size = src_width * Abs(src_height); + int src_i010_uv_size = ((src_width + 1) / 2) * ((Abs(src_height) + 1) / 2); + int src_i010_size = src_i010_y_size + src_i010_uv_size * 2; + align_buffer_page_end_16(src_i010, src_i010_size); + for (int i = 0; i < src_i010_size; ++i) { + src_i010[i] = fastrand() & 0x3ff; + } + + int dst_i010_y_size = dst_width * dst_height; + int dst_i010_uv_size = ((dst_width + 1) / 2) * ((dst_height + 1) / 2); + int dst_i010_size = dst_i010_y_size + dst_i010_uv_size * 2; + align_buffer_page_end_16(dst_i010_c, dst_i010_size); + align_buffer_page_end_16(dst_i010_opt, dst_i010_size); + memset(dst_i010_c, 2, dst_i010_size * 2); + memset(dst_i010_opt, 3, dst_i010_size * 2); + + MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization. + I010Rotate(src_i010, src_width, src_i010 + src_i010_y_size, + (src_width + 1) / 2, src_i010 + src_i010_y_size + src_i010_uv_size, + (src_width + 1) / 2, dst_i010_c, dst_width, + dst_i010_c + dst_i010_y_size, (dst_width + 1) / 2, + dst_i010_c + dst_i010_y_size + dst_i010_uv_size, + (dst_width + 1) / 2, src_width, src_height, mode); + + MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization. + for (int i = 0; i < benchmark_iterations; ++i) { + I010Rotate( + src_i010, src_width, src_i010 + src_i010_y_size, (src_width + 1) / 2, + src_i010 + src_i010_y_size + src_i010_uv_size, (src_width + 1) / 2, + dst_i010_opt, dst_width, dst_i010_opt + dst_i010_y_size, + (dst_width + 1) / 2, dst_i010_opt + dst_i010_y_size + dst_i010_uv_size, + (dst_width + 1) / 2, src_width, src_height, mode); + } + + // Rotation should be exact. + for (int i = 0; i < dst_i010_size; ++i) { + EXPECT_EQ(dst_i010_c[i], dst_i010_opt[i]); + } + + free_aligned_buffer_page_end_16(dst_i010_c); + free_aligned_buffer_page_end_16(dst_i010_opt); + free_aligned_buffer_page_end_16(src_i010); +} + +TEST_F(LibYUVRotateTest, I010Rotate0_Opt) { + I010TestRotate(benchmark_width_, benchmark_height_, benchmark_width_, + benchmark_height_, kRotate0, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); +} + +TEST_F(LibYUVRotateTest, I010Rotate90_Opt) { + I010TestRotate(benchmark_width_, benchmark_height_, benchmark_height_, + benchmark_width_, kRotate90, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); +} + +TEST_F(LibYUVRotateTest, I010Rotate180_Opt) { + I010TestRotate(benchmark_width_, benchmark_height_, benchmark_width_, + benchmark_height_, kRotate180, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); +} + +TEST_F(LibYUVRotateTest, I010Rotate270_Opt) { + I010TestRotate(benchmark_width_, benchmark_height_, benchmark_height_, + benchmark_width_, kRotate270, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); +} + +static void I210TestRotate(int src_width, + int src_height, + int dst_width, + int dst_height, + libyuv::RotationMode mode, + int benchmark_iterations, + int disable_cpu_flags, + int benchmark_cpu_info) { + if (src_width < 1) { + src_width = 1; + } + if (src_height == 0) { + src_height = 1; + } + if (dst_width < 1) { + dst_width = 1; + } + if (dst_height < 1) { + dst_height = 1; + } + int src_i210_y_size = src_width * Abs(src_height); + int src_i210_uv_size = ((src_width + 1) / 2) * Abs(src_height); + int src_i210_size = src_i210_y_size + src_i210_uv_size * 2; + align_buffer_page_end_16(src_i210, src_i210_size); + for (int i = 0; i < src_i210_size; ++i) { + src_i210[i] = fastrand() & 0x3ff; + } + + int dst_i210_y_size = dst_width * dst_height; + int dst_i210_uv_size = ((dst_width + 1) / 2) * dst_height; + int dst_i210_size = dst_i210_y_size + dst_i210_uv_size * 2; + align_buffer_page_end_16(dst_i210_c, dst_i210_size); + align_buffer_page_end_16(dst_i210_opt, dst_i210_size); + memset(dst_i210_c, 2, dst_i210_size * 2); + memset(dst_i210_opt, 3, dst_i210_size * 2); + + MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization. + I210Rotate(src_i210, src_width, src_i210 + src_i210_y_size, + (src_width + 1) / 2, src_i210 + src_i210_y_size + src_i210_uv_size, + (src_width + 1) / 2, dst_i210_c, dst_width, + dst_i210_c + dst_i210_y_size, (dst_width + 1) / 2, + dst_i210_c + dst_i210_y_size + dst_i210_uv_size, + (dst_width + 1) / 2, src_width, src_height, mode); + + MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization. + for (int i = 0; i < benchmark_iterations; ++i) { + I210Rotate( + src_i210, src_width, src_i210 + src_i210_y_size, (src_width + 1) / 2, + src_i210 + src_i210_y_size + src_i210_uv_size, (src_width + 1) / 2, + dst_i210_opt, dst_width, dst_i210_opt + dst_i210_y_size, + (dst_width + 1) / 2, dst_i210_opt + dst_i210_y_size + dst_i210_uv_size, + (dst_width + 1) / 2, src_width, src_height, mode); + } + + // Rotation should be exact. + for (int i = 0; i < dst_i210_size; ++i) { + EXPECT_EQ(dst_i210_c[i], dst_i210_opt[i]); + } + + free_aligned_buffer_page_end_16(dst_i210_c); + free_aligned_buffer_page_end_16(dst_i210_opt); + free_aligned_buffer_page_end_16(src_i210); +} + +TEST_F(LibYUVRotateTest, I210Rotate0_Opt) { + I210TestRotate(benchmark_width_, benchmark_height_, benchmark_width_, + benchmark_height_, kRotate0, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); +} + +TEST_F(LibYUVRotateTest, I210Rotate90_Opt) { + I210TestRotate(benchmark_width_, benchmark_height_, benchmark_height_, + benchmark_width_, kRotate90, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); +} + +TEST_F(LibYUVRotateTest, I210Rotate180_Opt) { + I210TestRotate(benchmark_width_, benchmark_height_, benchmark_width_, + benchmark_height_, kRotate180, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); +} + +TEST_F(LibYUVRotateTest, I210Rotate270_Opt) { + I210TestRotate(benchmark_width_, benchmark_height_, benchmark_height_, + benchmark_width_, kRotate270, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); +} + +static void I410TestRotate(int src_width, + int src_height, + int dst_width, + int dst_height, + libyuv::RotationMode mode, + int benchmark_iterations, + int disable_cpu_flags, + int benchmark_cpu_info) { + if (src_width < 1) { + src_width = 1; + } + if (src_height == 0) { + src_height = 1; + } + if (dst_width < 1) { + dst_width = 1; + } + if (dst_height < 1) { + dst_height = 1; + } + int src_i410_y_size = src_width * Abs(src_height); + int src_i410_uv_size = src_width * Abs(src_height); + int src_i410_size = src_i410_y_size + src_i410_uv_size * 2; + align_buffer_page_end_16(src_i410, src_i410_size); + for (int i = 0; i < src_i410_size; ++i) { + src_i410[i] = fastrand() & 0x3ff; + } + + int dst_i410_y_size = dst_width * dst_height; + int dst_i410_uv_size = dst_width * dst_height; + int dst_i410_size = dst_i410_y_size + dst_i410_uv_size * 2; + align_buffer_page_end_16(dst_i410_c, dst_i410_size); + align_buffer_page_end_16(dst_i410_opt, dst_i410_size); + memset(dst_i410_c, 2, dst_i410_size * 2); + memset(dst_i410_opt, 3, dst_i410_size * 2); + + MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization. + I410Rotate(src_i410, src_width, src_i410 + src_i410_y_size, src_width, + src_i410 + src_i410_y_size + src_i410_uv_size, src_width, + dst_i410_c, dst_width, dst_i410_c + dst_i410_y_size, dst_width, + dst_i410_c + dst_i410_y_size + dst_i410_uv_size, dst_width, + src_width, src_height, mode); + + MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization. + for (int i = 0; i < benchmark_iterations; ++i) { + I410Rotate(src_i410, src_width, src_i410 + src_i410_y_size, src_width, + src_i410 + src_i410_y_size + src_i410_uv_size, src_width, + dst_i410_opt, dst_width, dst_i410_opt + dst_i410_y_size, + dst_width, dst_i410_opt + dst_i410_y_size + dst_i410_uv_size, + dst_width, src_width, src_height, mode); + } + + // Rotation should be exact. + for (int i = 0; i < dst_i410_size; ++i) { + EXPECT_EQ(dst_i410_c[i], dst_i410_opt[i]); + } + + free_aligned_buffer_page_end_16(dst_i410_c); + free_aligned_buffer_page_end_16(dst_i410_opt); + free_aligned_buffer_page_end_16(src_i410); +} + +TEST_F(LibYUVRotateTest, I410Rotate0_Opt) { + I410TestRotate(benchmark_width_, benchmark_height_, benchmark_width_, + benchmark_height_, kRotate0, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); +} + +TEST_F(LibYUVRotateTest, I410Rotate90_Opt) { + I410TestRotate(benchmark_width_, benchmark_height_, benchmark_height_, + benchmark_width_, kRotate90, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); +} + +TEST_F(LibYUVRotateTest, I410Rotate180_Opt) { + I410TestRotate(benchmark_width_, benchmark_height_, benchmark_width_, + benchmark_height_, kRotate180, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); +} + +TEST_F(LibYUVRotateTest, I410Rotate270_Opt) { + I410TestRotate(benchmark_width_, benchmark_height_, benchmark_height_, + benchmark_width_, kRotate270, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); +} + +#if defined(ENABLE_ROW_TESTS) + +TEST_F(LibYUVRotateTest, Transpose4x4_Test) { + // dst width and height + const int width = 4; + const int height = 4; + int src_pixels[4][4]; + int dst_pixels_c[4][4]; + int dst_pixels_opt[4][4]; + + for (int i = 0; i < 4; ++i) { + for (int j = 0; j < 4; ++j) { + src_pixels[i][j] = i * 10 + j; + } + } + memset(dst_pixels_c, 1, width * height * 4); + memset(dst_pixels_opt, 2, width * height * 4); + + Transpose4x4_32_C((const uint8_t*)src_pixels, height * 4, + (uint8_t*)dst_pixels_c, width * 4, width); + + const int benchmark_iterations = + (benchmark_iterations_ * benchmark_width_ * benchmark_height_ + 15) / + (4 * 4); + for (int i = 0; i < benchmark_iterations; ++i) { +#if defined(HAS_TRANSPOSE4X4_32_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + Transpose4x4_32_NEON((const uint8_t*)src_pixels, height * 4, + (uint8_t*)dst_pixels_opt, width * 4, width); + } else +#elif defined(HAS_TRANSPOSE4X4_32_SSE2) + if (TestCpuFlag(kCpuHasSSE2)) { + Transpose4x4_32_SSE2((const uint8_t*)src_pixels, height * 4, + (uint8_t*)dst_pixels_opt, width * 4, width); + } else +#endif + { + Transpose4x4_32_C((const uint8_t*)src_pixels, height * 4, + (uint8_t*)dst_pixels_opt, width * 4, width); + } + } + + for (int i = 0; i < 4; ++i) { + for (int j = 0; j < 4; ++j) { + EXPECT_EQ(dst_pixels_c[i][j], src_pixels[j][i]); + EXPECT_EQ(dst_pixels_c[i][j], dst_pixels_opt[i][j]); + } + } +} + +TEST_F(LibYUVRotateTest, Transpose4x4_Opt) { + // dst width and height + const int width = ((benchmark_width_ * benchmark_height_ + 3) / 4 + 3) & ~3; + const int height = 4; + align_buffer_page_end(src_pixels, height * width * 4); + align_buffer_page_end(dst_pixels_c, width * height * 4); + align_buffer_page_end(dst_pixels_opt, width * height * 4); + + MemRandomize(src_pixels, height * width * 4); + memset(dst_pixels_c, 1, width * height * 4); + memset(dst_pixels_opt, 2, width * height * 4); + + Transpose4x4_32_C((const uint8_t*)src_pixels, height * 4, + (uint8_t*)dst_pixels_c, width * 4, width); + + for (int i = 0; i < benchmark_iterations_; ++i) { +#if defined(HAS_TRANSPOSE4X4_32_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + Transpose4x4_32_NEON((const uint8_t*)src_pixels, height * 4, + (uint8_t*)dst_pixels_opt, width * 4, width); + } else +#elif defined(HAS_TRANSPOSE4X4_32_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + Transpose4x4_32_AVX2((const uint8_t*)src_pixels, height * 4, + (uint8_t*)dst_pixels_opt, width * 4, width); + } else if (TestCpuFlag(kCpuHasSSE2)) { + Transpose4x4_32_SSE2((const uint8_t*)src_pixels, height * 4, + (uint8_t*)dst_pixels_opt, width * 4, width); + } else +#endif + { + Transpose4x4_32_C((const uint8_t*)src_pixels, height * 4, + (uint8_t*)dst_pixels_opt, width * 4, width); + } + } + + for (int i = 0; i < width * height; ++i) { + EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); + } + + free_aligned_buffer_page_end(src_pixels); + free_aligned_buffer_page_end(dst_pixels_c); + free_aligned_buffer_page_end(dst_pixels_opt); +} + +#endif // ENABLE_ROW_TESTS + } // namespace libyuv diff --git a/files/unit_test/scale_uv_test.cc b/files/unit_test/scale_uv_test.cc index 3d524bef..dab217c9 100644 --- a/files/unit_test/scale_uv_test.cc +++ b/files/unit_test/scale_uv_test.cc @@ -39,55 +39,35 @@ static int UVTestFilter(int src_width, return 0; } - int i, j; - const int b = 0; // 128 to test for padding/stride. - int64_t src_uv_plane_size = - (Abs(src_width) + b * 2) * (Abs(src_height) + b * 2) * 2LL; - int src_stride_uv = (b * 2 + Abs(src_width)) * 2; + int i; + int64_t src_uv_plane_size = Abs(src_width) * Abs(src_height) * 2LL; + int src_stride_uv = Abs(src_width) * 2; + int64_t dst_uv_plane_size = dst_width * dst_height * 2LL; + int dst_stride_uv = dst_width * 2; align_buffer_page_end(src_uv, src_uv_plane_size); - if (!src_uv) { - printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n"); - return 0; - } - MemRandomize(src_uv, src_uv_plane_size); - - int64_t dst_uv_plane_size = (dst_width + b * 2) * (dst_height + b * 2) * 2LL; - int dst_stride_uv = (b * 2 + dst_width) * 2; - align_buffer_page_end(dst_uv_c, dst_uv_plane_size); align_buffer_page_end(dst_uv_opt, dst_uv_plane_size); - if (!dst_uv_c || !dst_uv_opt) { + + if (!src_uv || !dst_uv_c || !dst_uv_opt) { printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n"); return 0; } + MemRandomize(src_uv, src_uv_plane_size); memset(dst_uv_c, 2, dst_uv_plane_size); - memset(dst_uv_opt, 3, dst_uv_plane_size); - - // Warm up both versions for consistent benchmarks. - MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization. - UVScale(src_uv + (src_stride_uv * b) + b * 2, src_stride_uv, src_width, - src_height, dst_uv_c + (dst_stride_uv * b) + b * 2, dst_stride_uv, - dst_width, dst_height, f); - MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization. - UVScale(src_uv + (src_stride_uv * b) + b * 2, src_stride_uv, src_width, - src_height, dst_uv_opt + (dst_stride_uv * b) + b * 2, dst_stride_uv, - dst_width, dst_height, f); + memset(dst_uv_opt, 123, dst_uv_plane_size); MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization. double c_time = get_time(); - UVScale(src_uv + (src_stride_uv * b) + b * 2, src_stride_uv, src_width, - src_height, dst_uv_c + (dst_stride_uv * b) + b * 2, dst_stride_uv, + UVScale(src_uv, src_stride_uv, src_width, src_height, dst_uv_c, dst_stride_uv, dst_width, dst_height, f); - c_time = (get_time() - c_time); MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization. double opt_time = get_time(); for (i = 0; i < benchmark_iterations; ++i) { - UVScale(src_uv + (src_stride_uv * b) + b * 2, src_stride_uv, src_width, - src_height, dst_uv_opt + (dst_stride_uv * b) + b * 2, dst_stride_uv, - dst_width, dst_height, f); + UVScale(src_uv, src_stride_uv, src_width, src_height, dst_uv_opt, + dst_stride_uv, dst_width, dst_height, f); } opt_time = (get_time() - opt_time) / benchmark_iterations; @@ -95,18 +75,11 @@ static int UVTestFilter(int src_width, printf("filter %d - %8d us C - %8d us OPT\n", f, static_cast<int>(c_time * 1e6), static_cast<int>(opt_time * 1e6)); - // C version may be a little off from the optimized. Order of - // operations may introduce rounding somewhere. So do a difference - // of the buffers and look to see that the max difference isn't - // over 2. int max_diff = 0; - for (i = b; i < (dst_height + b); ++i) { - for (j = b * 2; j < (dst_width + b) * 2; ++j) { - int abs_diff = Abs(dst_uv_c[(i * dst_stride_uv) + j] - - dst_uv_opt[(i * dst_stride_uv) + j]); - if (abs_diff > max_diff) { - max_diff = abs_diff; - } + for (i = 0; i < dst_uv_plane_size; ++i) { + int abs_diff = Abs(dst_uv_c[i] - dst_uv_opt[i]); + if (abs_diff > max_diff) { + max_diff = abs_diff; } } @@ -121,28 +94,26 @@ static int UVTestFilter(int src_width, #define DX(x, nom, denom) static_cast<int>((Abs(x) / nom) * nom) #define SX(x, nom, denom) static_cast<int>((x / nom) * denom) -#define TEST_FACTOR1(name, filter, nom, denom, max_diff) \ +#define TEST_FACTOR1(name, filter, nom, denom) \ TEST_F(LibYUVScaleTest, UVScaleDownBy##name##_##filter) { \ int diff = UVTestFilter( \ SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \ DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \ kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \ benchmark_cpu_info_); \ - EXPECT_LE(diff, max_diff); \ + EXPECT_EQ(0, diff); \ } #if defined(ENABLE_FULL_TESTS) -// Test a scale factor with all 4 filters. Expect unfiltered to be exact, but -// filtering is different fixed point implementations for SSSE3, Neon and C. -#define TEST_FACTOR(name, nom, denom) \ - TEST_FACTOR1(name, None, nom, denom, 0) \ - TEST_FACTOR1(name, Linear, nom, denom, 3) \ - TEST_FACTOR1(name, Bilinear, nom, denom, 3) \ - TEST_FACTOR1(name, Box, nom, denom, 3) +// Test a scale factor with all 4 filters. Expect exact for SIMD vs C. +#define TEST_FACTOR(name, nom, denom) \ + TEST_FACTOR1(name, None, nom, denom) \ + TEST_FACTOR1(name, Linear, nom, denom) \ + TEST_FACTOR1(name, Bilinear, nom, denom) \ + TEST_FACTOR1(name, Box, nom, denom) #else // Test a scale factor with Bilinear. -#define TEST_FACTOR(name, nom, denom) \ - TEST_FACTOR1(name, Bilinear, nom, denom, 3) +#define TEST_FACTOR(name, nom, denom) TEST_FACTOR1(name, Bilinear, nom, denom) #endif TEST_FACTOR(2, 1, 2) diff --git a/files/unit_test/testdata/riscv64.txt b/files/unit_test/testdata/riscv64.txt new file mode 100644 index 00000000..fbb4200f --- /dev/null +++ b/files/unit_test/testdata/riscv64.txt @@ -0,0 +1,4 @@ +processor : 0 +hart : 1 +isa : rv64imac +mmu : sv48
\ No newline at end of file diff --git a/files/unit_test/testdata/riscv64_rvv.txt b/files/unit_test/testdata/riscv64_rvv.txt new file mode 100644 index 00000000..af1b3f36 --- /dev/null +++ b/files/unit_test/testdata/riscv64_rvv.txt @@ -0,0 +1,4 @@ +processor : 0 +hart : 1 +isa : rv64imafdcv +mmu : sv48
\ No newline at end of file diff --git a/files/unit_test/testdata/riscv64_rvv_zvfh.txt b/files/unit_test/testdata/riscv64_rvv_zvfh.txt new file mode 100644 index 00000000..c416c1af --- /dev/null +++ b/files/unit_test/testdata/riscv64_rvv_zvfh.txt @@ -0,0 +1,4 @@ +processor : 0 +hart : 1 +isa : rv64imafdcv_zfh_zvfh +mmu : sv48
\ No newline at end of file diff --git a/files/unit_test/unit_test.cc b/files/unit_test/unit_test.cc index 61145a46..b66ebfab 100644 --- a/files/unit_test/unit_test.cc +++ b/files/unit_test/unit_test.cc @@ -88,6 +88,11 @@ int TestCpuEnv(int cpu_info) { cpu_info &= ~libyuv::kCpuHasLASX; } #endif +#if defined(__riscv) && defined(__linux__) + if (TestEnv("LIBYUV_DISABLE_RVV")) { + cpu_info &= ~libyuv::kCpuHasRVV; + } +#endif #if !defined(__pnacl__) && !defined(__CLR_VER) && \ (defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || \ defined(_M_IX86)) diff --git a/files/unit_test/unit_test.h b/files/unit_test/unit_test.h index 0a8df4d2..99cc8d19 100644 --- a/files/unit_test/unit_test.h +++ b/files/unit_test/unit_test.h @@ -11,10 +11,10 @@ #ifndef UNIT_TEST_UNIT_TEST_H_ // NOLINT #define UNIT_TEST_UNIT_TEST_H_ +#include <stddef.h> // For NULL #ifdef _WIN32 #include <windows.h> #else -#include <sys/resource.h> #include <sys/time.h> #endif @@ -77,7 +77,18 @@ static inline bool SizeValid(int src_width, #define free_aligned_buffer_page_end(var) \ free(var##_mem); \ - var = 0 + var = NULL + +#define align_buffer_page_end_16(var, size) \ + uint8_t* var##_mem = \ + reinterpret_cast<uint8_t*>(malloc(((size)*2 + 4095 + 63) & ~4095)); \ + uint16_t* var = reinterpret_cast<uint16_t*>( \ + (intptr_t)(var##_mem + (((size)*2 + 4095 + 63) & ~4095) - (size)*2) & \ + ~63) + +#define free_aligned_buffer_page_end_16(var) \ + free(var##_mem); \ + var = NULL #ifdef WIN32 static inline double get_time() { |