aboutsummaryrefslogtreecommitdiff
path: root/files/unit_test
diff options
context:
space:
mode:
authorAndroid Build Coastguard Worker <android-build-coastguard-worker@google.com>2023-07-31 15:23:43 +0000
committerGerrit Code Review <noreply-gerritcodereview@google.com>2023-07-31 15:23:43 +0000
commitde35ce683c1fd73fd043f2d43bca7c35200b7a93 (patch)
tree303e8eabb435a80ebec36a19c23ca85744d58fb5 /files/unit_test
parent06d64850caa240513108c6540a89fc6d78505596 (diff)
parentcd56a504b7d42276155d9b59ea0bcdad88b5d7e6 (diff)
downloadlibyuv-de35ce683c1fd73fd043f2d43bca7c35200b7a93.tar.gz
Merge "Snap for 10586204 from 1f9deebc6ecf78b637dff50d62772b48332ea5ea to androidx-core-release" into androidx-core-release
Diffstat (limited to 'files/unit_test')
-rw-r--r--files/unit_test/convert_test.cc762
-rw-r--r--files/unit_test/cpu_test.cc146
-rw-r--r--files/unit_test/planar_test.cc97
-rw-r--r--files/unit_test/rotate_argb_test.cc106
-rw-r--r--files/unit_test/rotate_test.cc363
-rw-r--r--files/unit_test/scale_uv_test.cc79
-rw-r--r--files/unit_test/testdata/riscv64.txt4
-rw-r--r--files/unit_test/testdata/riscv64_rvv.txt4
-rw-r--r--files/unit_test/testdata/riscv64_rvv_zvfh.txt4
-rw-r--r--files/unit_test/unit_test.cc5
-rw-r--r--files/unit_test/unit_test.h15
11 files changed, 1220 insertions, 365 deletions
diff --git a/files/unit_test/convert_test.cc b/files/unit_test/convert_test.cc
index 1f975825..1f1896b0 100644
--- a/files/unit_test/convert_test.cc
+++ b/files/unit_test/convert_test.cc
@@ -48,6 +48,7 @@ namespace libyuv {
#define AR30ToAR30 ARGBCopy
#define ABGRToABGR ARGBCopy
+// subsample amount uses a divide.
#define SUBSAMPLE(v, a) ((((v) + (a)-1)) / (a))
// Planar test
@@ -180,9 +181,12 @@ TESTPLANARTOP(I212, uint16_t, 2, 2, 1, I012, uint16_t, 2, 2, 2, 12)
TESTPLANARTOP(I010, uint16_t, 2, 2, 2, I420, uint8_t, 1, 2, 2, 10)
TESTPLANARTOP(I210, uint16_t, 2, 2, 1, I420, uint8_t, 1, 2, 2, 10)
TESTPLANARTOP(I210, uint16_t, 2, 2, 1, I422, uint8_t, 1, 2, 1, 10)
+TESTPLANARTOP(I410, uint16_t, 2, 1, 1, I420, uint8_t, 1, 2, 2, 10)
TESTPLANARTOP(I410, uint16_t, 2, 1, 1, I444, uint8_t, 1, 1, 1, 10)
TESTPLANARTOP(I012, uint16_t, 2, 2, 2, I420, uint8_t, 1, 2, 2, 12)
+TESTPLANARTOP(I212, uint16_t, 2, 2, 1, I420, uint8_t, 1, 2, 2, 12)
TESTPLANARTOP(I212, uint16_t, 2, 2, 1, I422, uint8_t, 1, 2, 1, 12)
+TESTPLANARTOP(I412, uint16_t, 2, 1, 1, I420, uint8_t, 1, 2, 2, 12)
TESTPLANARTOP(I412, uint16_t, 2, 1, 1, I444, uint8_t, 1, 1, 1, 12)
// Test Android 420 to I420
@@ -417,131 +421,136 @@ TESTPLANARTOBP(I210, uint16_t, 2, 2, 1, P210, uint16_t, 2, 2, 1, 10)
TESTPLANARTOBP(I012, uint16_t, 2, 2, 2, P012, uint16_t, 2, 2, 2, 12)
TESTPLANARTOBP(I212, uint16_t, 2, 2, 1, P212, uint16_t, 2, 2, 1, 12)
-#define TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
- SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, \
- DST_SUBSAMP_X, DST_SUBSAMP_Y, W1280, N, NEG, OFF, \
- DOY, SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT) \
- TEST_F(LibYUVConvertTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) { \
- static_assert(SRC_BPC == 1 || SRC_BPC == 2, "SRC BPC unsupported"); \
- static_assert(DST_BPC == 1 || DST_BPC == 2, "DST BPC unsupported"); \
- static_assert(SRC_SUBSAMP_X == 1 || SRC_SUBSAMP_X == 2, \
- "SRC_SUBSAMP_X unsupported"); \
- static_assert(SRC_SUBSAMP_Y == 1 || SRC_SUBSAMP_Y == 2, \
- "SRC_SUBSAMP_Y unsupported"); \
- static_assert(DST_SUBSAMP_X == 1 || DST_SUBSAMP_X == 2, \
- "DST_SUBSAMP_X unsupported"); \
- static_assert(DST_SUBSAMP_Y == 1 || DST_SUBSAMP_Y == 2, \
- "DST_SUBSAMP_Y unsupported"); \
- const int kWidth = W1280; \
- const int kHeight = benchmark_height_; \
- const int kSrcHalfWidth = SUBSAMPLE(kWidth, SRC_SUBSAMP_X); \
- const int kDstHalfWidth = SUBSAMPLE(kWidth, DST_SUBSAMP_X); \
- const int kDstHalfHeight = SUBSAMPLE(kHeight, DST_SUBSAMP_Y); \
- const int kPaddedWidth = (kWidth + (TILE_WIDTH - 1)) & ~(TILE_WIDTH - 1); \
- const int kPaddedHeight = \
- (kHeight + (TILE_HEIGHT - 1)) & ~(TILE_HEIGHT - 1); \
- const int kSrcHalfPaddedWidth = SUBSAMPLE(kPaddedWidth, SRC_SUBSAMP_X); \
- const int kSrcHalfPaddedHeight = SUBSAMPLE(kPaddedHeight, SRC_SUBSAMP_Y); \
- align_buffer_page_end(src_y, kPaddedWidth* kPaddedHeight* SRC_BPC + OFF); \
- align_buffer_page_end( \
- src_uv, \
- 2 * kSrcHalfPaddedWidth * kSrcHalfPaddedHeight * SRC_BPC + OFF); \
- align_buffer_page_end(dst_y_c, kWidth* kHeight* DST_BPC); \
- align_buffer_page_end(dst_uv_c, \
- 2 * kDstHalfWidth * kDstHalfHeight * DST_BPC); \
- align_buffer_page_end(dst_y_opt, kWidth* kHeight* DST_BPC); \
- align_buffer_page_end(dst_uv_opt, \
- 2 * kDstHalfWidth * kDstHalfHeight * DST_BPC); \
- SRC_T* src_y_p = reinterpret_cast<SRC_T*>(src_y + OFF); \
- SRC_T* src_uv_p = reinterpret_cast<SRC_T*>(src_uv + OFF); \
- for (int i = 0; i < kPaddedWidth * kPaddedHeight; ++i) { \
- src_y_p[i] = \
- (fastrand() & (((SRC_T)(-1)) << ((8 * SRC_BPC) - SRC_DEPTH))); \
- } \
- for (int i = 0; i < kSrcHalfPaddedWidth * kSrcHalfPaddedHeight * 2; ++i) { \
- src_uv_p[i] = \
- (fastrand() & (((SRC_T)(-1)) << ((8 * SRC_BPC) - SRC_DEPTH))); \
- } \
- memset(dst_y_c, 1, kWidth* kHeight* DST_BPC); \
- memset(dst_uv_c, 2, 2 * kDstHalfWidth * kDstHalfHeight * DST_BPC); \
- memset(dst_y_opt, 101, kWidth* kHeight* DST_BPC); \
- memset(dst_uv_opt, 102, 2 * kDstHalfWidth * kDstHalfHeight * DST_BPC); \
- MaskCpuFlags(disable_cpu_flags_); \
- SRC_FMT_PLANAR##To##FMT_PLANAR( \
- src_y_p, kWidth, src_uv_p, 2 * kSrcHalfWidth, \
- DOY ? reinterpret_cast<DST_T*>(dst_y_c) : NULL, kWidth, \
- reinterpret_cast<DST_T*>(dst_uv_c), 2 * kDstHalfWidth, kWidth, \
- NEG kHeight); \
- MaskCpuFlags(benchmark_cpu_info_); \
- for (int i = 0; i < benchmark_iterations_; ++i) { \
- SRC_FMT_PLANAR##To##FMT_PLANAR( \
- src_y_p, kWidth, src_uv_p, 2 * kSrcHalfWidth, \
- DOY ? reinterpret_cast<DST_T*>(dst_y_opt) : NULL, kWidth, \
- reinterpret_cast<DST_T*>(dst_uv_opt), 2 * kDstHalfWidth, kWidth, \
- NEG kHeight); \
- } \
- if (DOY) { \
- for (int i = 0; i < kHeight; ++i) { \
- for (int j = 0; j < kWidth; ++j) { \
- EXPECT_EQ(dst_y_c[i * kWidth + j], dst_y_opt[i * kWidth + j]); \
- } \
- } \
- } \
- for (int i = 0; i < kDstHalfHeight; ++i) { \
- for (int j = 0; j < 2 * kDstHalfWidth; ++j) { \
- EXPECT_EQ(dst_uv_c[i * 2 * kDstHalfWidth + j], \
- dst_uv_opt[i * 2 * kDstHalfWidth + j]); \
- } \
- } \
- free_aligned_buffer_page_end(dst_y_c); \
- free_aligned_buffer_page_end(dst_uv_c); \
- free_aligned_buffer_page_end(dst_y_opt); \
- free_aligned_buffer_page_end(dst_uv_opt); \
- free_aligned_buffer_page_end(src_y); \
- free_aligned_buffer_page_end(src_uv); \
+#define TESTBPTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
+ SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
+ DST_SUBSAMP_Y, W1280, N, NEG, OFF, DOY, SRC_DEPTH, \
+ TILE_WIDTH, TILE_HEIGHT) \
+ TEST_F(LibYUVConvertTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) { \
+ static_assert(DST_BPC == 1 || DST_BPC == 2, "DST BPC unsupported"); \
+ static_assert(SRC_SUBSAMP_X == 1 || SRC_SUBSAMP_X == 2, \
+ "SRC_SUBSAMP_X unsupported"); \
+ static_assert(SRC_SUBSAMP_Y == 1 || SRC_SUBSAMP_Y == 2, \
+ "SRC_SUBSAMP_Y unsupported"); \
+ static_assert(DST_SUBSAMP_X == 1 || DST_SUBSAMP_X == 2, \
+ "DST_SUBSAMP_X unsupported"); \
+ static_assert(DST_SUBSAMP_Y == 1 || DST_SUBSAMP_Y == 2, \
+ "DST_SUBSAMP_Y unsupported"); \
+ const int kWidth = W1280; \
+ const int kHeight = benchmark_height_; \
+ const int kSrcHalfWidth = SUBSAMPLE(kWidth, SRC_SUBSAMP_X); \
+ const int kDstHalfWidth = SUBSAMPLE(kWidth, DST_SUBSAMP_X); \
+ const int kDstHalfHeight = SUBSAMPLE(kHeight, DST_SUBSAMP_Y); \
+ const int kPaddedWidth = (kWidth + (TILE_WIDTH - 1)) & ~(TILE_WIDTH - 1); \
+ const int kPaddedHeight = \
+ (kHeight + (TILE_HEIGHT - 1)) & ~(TILE_HEIGHT - 1); \
+ const int kSrcHalfPaddedWidth = SUBSAMPLE(kPaddedWidth, SRC_SUBSAMP_X); \
+ const int kSrcHalfPaddedHeight = SUBSAMPLE(kPaddedHeight, SRC_SUBSAMP_Y); \
+ align_buffer_page_end(src_y, kPaddedWidth* kPaddedHeight* SRC_BPC + OFF); \
+ align_buffer_page_end( \
+ src_uv, \
+ 2 * kSrcHalfPaddedWidth * kSrcHalfPaddedHeight * SRC_BPC + OFF); \
+ align_buffer_page_end(dst_y_c, kWidth* kHeight* DST_BPC); \
+ align_buffer_page_end(dst_uv_c, \
+ 2 * kDstHalfWidth * kDstHalfHeight * DST_BPC); \
+ align_buffer_page_end(dst_y_opt, kWidth* kHeight* DST_BPC); \
+ align_buffer_page_end(dst_uv_opt, \
+ 2 * kDstHalfWidth * kDstHalfHeight * DST_BPC); \
+ SRC_T* src_y_p = reinterpret_cast<SRC_T*>(src_y + OFF); \
+ SRC_T* src_uv_p = reinterpret_cast<SRC_T*>(src_uv + OFF); \
+ for (int i = 0; \
+ i < kPaddedWidth * kPaddedHeight * SRC_BPC / (int)sizeof(SRC_T); \
+ ++i) { \
+ src_y_p[i] = \
+ (fastrand() & (((SRC_T)(-1)) << ((8 * SRC_BPC) - SRC_DEPTH))); \
+ } \
+ for (int i = 0; i < kSrcHalfPaddedWidth * kSrcHalfPaddedHeight * 2 * \
+ SRC_BPC / (int)sizeof(SRC_T); \
+ ++i) { \
+ src_uv_p[i] = \
+ (fastrand() & (((SRC_T)(-1)) << ((8 * SRC_BPC) - SRC_DEPTH))); \
+ } \
+ memset(dst_y_c, 1, kWidth* kHeight* DST_BPC); \
+ memset(dst_uv_c, 2, 2 * kDstHalfWidth * kDstHalfHeight * DST_BPC); \
+ memset(dst_y_opt, 101, kWidth* kHeight* DST_BPC); \
+ memset(dst_uv_opt, 102, 2 * kDstHalfWidth * kDstHalfHeight * DST_BPC); \
+ MaskCpuFlags(disable_cpu_flags_); \
+ SRC_FMT_PLANAR##To##FMT_PLANAR( \
+ src_y_p, kWidth* SRC_BPC / (int)sizeof(SRC_T), src_uv_p, \
+ 2 * kSrcHalfWidth * SRC_BPC / (int)sizeof(SRC_T), \
+ DOY ? reinterpret_cast<DST_T*>(dst_y_c) : NULL, kWidth, \
+ reinterpret_cast<DST_T*>(dst_uv_c), 2 * kDstHalfWidth, kWidth, \
+ NEG kHeight); \
+ MaskCpuFlags(benchmark_cpu_info_); \
+ for (int i = 0; i < benchmark_iterations_; ++i) { \
+ SRC_FMT_PLANAR##To##FMT_PLANAR( \
+ src_y_p, kWidth* SRC_BPC / (int)sizeof(SRC_T), src_uv_p, \
+ 2 * kSrcHalfWidth * SRC_BPC / (int)sizeof(SRC_T), \
+ DOY ? reinterpret_cast<DST_T*>(dst_y_opt) : NULL, kWidth, \
+ reinterpret_cast<DST_T*>(dst_uv_opt), 2 * kDstHalfWidth, kWidth, \
+ NEG kHeight); \
+ } \
+ if (DOY) { \
+ for (int i = 0; i < kHeight; ++i) { \
+ for (int j = 0; j < kWidth; ++j) { \
+ EXPECT_EQ(dst_y_c[i * kWidth + j], dst_y_opt[i * kWidth + j]); \
+ } \
+ } \
+ } \
+ for (int i = 0; i < kDstHalfHeight; ++i) { \
+ for (int j = 0; j < 2 * kDstHalfWidth; ++j) { \
+ EXPECT_EQ(dst_uv_c[i * 2 * kDstHalfWidth + j], \
+ dst_uv_opt[i * 2 * kDstHalfWidth + j]); \
+ } \
+ } \
+ free_aligned_buffer_page_end(dst_y_c); \
+ free_aligned_buffer_page_end(dst_uv_c); \
+ free_aligned_buffer_page_end(dst_y_opt); \
+ free_aligned_buffer_page_end(dst_uv_opt); \
+ free_aligned_buffer_page_end(src_y); \
+ free_aligned_buffer_page_end(src_uv); \
}
-#define TESTBIPLANARTOBP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
- SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, \
- DST_SUBSAMP_X, DST_SUBSAMP_Y, SRC_DEPTH, TILE_WIDTH, \
- TILE_HEIGHT) \
- TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
- SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
- DST_SUBSAMP_Y, benchmark_width_ + 1, _Any, +, 0, 1, \
- SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT) \
- TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
- SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
- DST_SUBSAMP_Y, benchmark_width_, _Unaligned, +, 2, 1, \
- SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT) \
- TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
- SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
- DST_SUBSAMP_Y, benchmark_width_, _Invert, -, 0, 1, \
- SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT) \
- TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
- SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
- DST_SUBSAMP_Y, benchmark_width_, _Opt, +, 0, 1, SRC_DEPTH, \
- TILE_WIDTH, TILE_HEIGHT) \
- TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
- SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
- DST_SUBSAMP_Y, benchmark_width_, _NullY, +, 0, 0, \
- SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT)
-
-TESTBIPLANARTOBP(NV21, uint8_t, 1, 2, 2, NV12, uint8_t, 1, 2, 2, 8, 1, 1)
-TESTBIPLANARTOBP(NV12, uint8_t, 1, 2, 2, NV12Mirror, uint8_t, 1, 2, 2, 8, 1, 1)
-TESTBIPLANARTOBP(NV12, uint8_t, 1, 2, 2, NV24, uint8_t, 1, 1, 1, 8, 1, 1)
-TESTBIPLANARTOBP(NV16, uint8_t, 1, 2, 1, NV24, uint8_t, 1, 1, 1, 8, 1, 1)
-TESTBIPLANARTOBP(P010, uint16_t, 2, 2, 2, P410, uint16_t, 2, 1, 1, 10, 1, 1)
-TESTBIPLANARTOBP(P210, uint16_t, 2, 2, 1, P410, uint16_t, 2, 1, 1, 10, 1, 1)
-TESTBIPLANARTOBP(P012, uint16_t, 2, 2, 2, P412, uint16_t, 2, 1, 1, 10, 1, 1)
-TESTBIPLANARTOBP(P212, uint16_t, 2, 2, 1, P412, uint16_t, 2, 1, 1, 12, 1, 1)
-TESTBIPLANARTOBP(P016, uint16_t, 2, 2, 2, P416, uint16_t, 2, 1, 1, 12, 1, 1)
-TESTBIPLANARTOBP(P216, uint16_t, 2, 2, 1, P416, uint16_t, 2, 1, 1, 12, 1, 1)
-TESTBIPLANARTOBP(MM21, uint8_t, 1, 2, 2, NV12, uint8_t, 1, 2, 2, 8, 16, 32)
-
-#define TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
- SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, \
- DST_SUBSAMP_X, DST_SUBSAMP_Y, W1280, N, NEG, OFF, \
- SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT) \
+#define TESTBPTOBP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
+ SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
+ DST_SUBSAMP_Y, SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT) \
+ TESTBPTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
+ FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \
+ benchmark_width_ + 1, _Any, +, 0, 1, SRC_DEPTH, TILE_WIDTH, \
+ TILE_HEIGHT) \
+ TESTBPTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
+ FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \
+ benchmark_width_, _Unaligned, +, 2, 1, SRC_DEPTH, TILE_WIDTH, \
+ TILE_HEIGHT) \
+ TESTBPTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
+ FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \
+ benchmark_width_, _Invert, -, 0, 1, SRC_DEPTH, TILE_WIDTH, \
+ TILE_HEIGHT) \
+ TESTBPTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
+ FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \
+ benchmark_width_, _Opt, +, 0, 1, SRC_DEPTH, TILE_WIDTH, \
+ TILE_HEIGHT) \
+ TESTBPTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
+ FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \
+ benchmark_width_, _NullY, +, 0, 0, SRC_DEPTH, TILE_WIDTH, \
+ TILE_HEIGHT)
+
+TESTBPTOBP(NV21, uint8_t, 1, 2, 2, NV12, uint8_t, 1, 2, 2, 8, 1, 1)
+TESTBPTOBP(NV12, uint8_t, 1, 2, 2, NV12Mirror, uint8_t, 1, 2, 2, 8, 1, 1)
+TESTBPTOBP(NV12, uint8_t, 1, 2, 2, NV24, uint8_t, 1, 1, 1, 8, 1, 1)
+TESTBPTOBP(NV16, uint8_t, 1, 2, 1, NV24, uint8_t, 1, 1, 1, 8, 1, 1)
+TESTBPTOBP(P010, uint16_t, 2, 2, 2, P410, uint16_t, 2, 1, 1, 10, 1, 1)
+TESTBPTOBP(P210, uint16_t, 2, 2, 1, P410, uint16_t, 2, 1, 1, 10, 1, 1)
+TESTBPTOBP(P012, uint16_t, 2, 2, 2, P412, uint16_t, 2, 1, 1, 10, 1, 1)
+TESTBPTOBP(P212, uint16_t, 2, 2, 1, P412, uint16_t, 2, 1, 1, 12, 1, 1)
+TESTBPTOBP(P016, uint16_t, 2, 2, 2, P416, uint16_t, 2, 1, 1, 12, 1, 1)
+TESTBPTOBP(P216, uint16_t, 2, 2, 1, P416, uint16_t, 2, 1, 1, 12, 1, 1)
+TESTBPTOBP(MM21, uint8_t, 1, 2, 2, NV12, uint8_t, 1, 2, 2, 8, 16, 32)
+TESTBPTOBP(MT2T, uint8_t, 10 / 8, 2, 2, P010, uint16_t, 2, 2, 2, 10, 16, 32)
+
+#define TESTBPTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
+ SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
+ DST_SUBSAMP_Y, W1280, N, NEG, OFF, SRC_DEPTH, TILE_WIDTH, \
+ TILE_HEIGHT) \
TEST_F(LibYUVConvertTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) { \
static_assert(SRC_BPC == 1 || SRC_BPC == 2, "SRC BPC unsupported"); \
static_assert(DST_BPC == 1 || DST_BPC == 2, "DST BPC unsupported"); \
@@ -621,30 +630,30 @@ TESTBIPLANARTOBP(MM21, uint8_t, 1, 2, 2, NV12, uint8_t, 1, 2, 2, 8, 16, 32)
free_aligned_buffer_page_end(src_uv); \
}
-#define TESTBIPLANARTOP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
- SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, \
- DST_SUBSAMP_X, DST_SUBSAMP_Y, SRC_DEPTH, TILE_WIDTH, \
- TILE_HEIGHT) \
- TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
- SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
- DST_SUBSAMP_Y, benchmark_width_ + 1, _Any, +, 0, SRC_DEPTH, \
- TILE_WIDTH, TILE_HEIGHT) \
- TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
- SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
- DST_SUBSAMP_Y, benchmark_width_, _Unaligned, +, 2, \
- SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT) \
- TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
- SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
- DST_SUBSAMP_Y, benchmark_width_, _Invert, -, 0, SRC_DEPTH, \
- TILE_WIDTH, TILE_HEIGHT) \
- TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
- SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
- DST_SUBSAMP_Y, benchmark_width_, _Opt, +, 0, SRC_DEPTH, \
- TILE_WIDTH, TILE_HEIGHT)
-
-TESTBIPLANARTOP(NV12, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2, 8, 1, 1)
-TESTBIPLANARTOP(NV21, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2, 8, 1, 1)
-TESTBIPLANARTOP(MM21, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2, 8, 16, 32)
+#define TESTBPTOP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
+ SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
+ DST_SUBSAMP_Y, SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT) \
+ TESTBPTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
+ FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \
+ benchmark_width_ + 1, _Any, +, 0, SRC_DEPTH, TILE_WIDTH, \
+ TILE_HEIGHT) \
+ TESTBPTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
+ FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \
+ benchmark_width_, _Unaligned, +, 2, SRC_DEPTH, TILE_WIDTH, \
+ TILE_HEIGHT) \
+ TESTBPTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
+ FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \
+ benchmark_width_, _Invert, -, 0, SRC_DEPTH, TILE_WIDTH, \
+ TILE_HEIGHT) \
+ TESTBPTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
+ FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \
+ benchmark_width_, _Opt, +, 0, SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT)
+
+TESTBPTOP(NV12, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2, 8, 1, 1)
+TESTBPTOP(NV21, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2, 8, 1, 1)
+TESTBPTOP(MM21, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2, 8, 16, 32)
+TESTBPTOP(P010, uint16_t, 2, 2, 2, I010, uint16_t, 2, 2, 2, 10, 1, 1)
+TESTBPTOP(P012, uint16_t, 2, 2, 2, I012, uint16_t, 2, 2, 2, 12, 1, 1)
// Provide matrix wrappers for full range bt.709
#define F420ToABGR(a, b, c, d, e, f, g, h, i, j) \
@@ -680,6 +689,12 @@ TESTBIPLANARTOP(MM21, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2, 8, 16, 32)
#define I422ToARGBFilter(a, b, c, d, e, f, g, h, i, j) \
I422ToARGBMatrixFilter(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j, \
kFilterBilinear)
+#define I420ToRGB24Filter(a, b, c, d, e, f, g, h, i, j) \
+ I420ToRGB24MatrixFilter(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j, \
+ kFilterBilinear)
+#define I422ToRGB24Filter(a, b, c, d, e, f, g, h, i, j) \
+ I420ToRGB24MatrixFilter(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j, \
+ kFilterBilinear)
#define ALIGNINT(V, ALIGN) (((V) + (ALIGN)-1) / (ALIGN) * (ALIGN))
@@ -792,8 +807,12 @@ TESTPLANARTOB(V422, 2, 1, ARGB, 4, 4, 1)
TESTPLANARTOB(V422, 2, 1, ABGR, 4, 4, 1)
TESTPLANARTOB(I422, 2, 1, BGRA, 4, 4, 1)
TESTPLANARTOB(I422, 2, 1, RGBA, 4, 4, 1)
+TESTPLANARTOB(I422, 1, 1, RGB24, 3, 3, 1)
+TESTPLANARTOB(I422, 1, 1, RAW, 3, 3, 1)
TESTPLANARTOB(I444, 1, 1, ARGB, 4, 4, 1)
TESTPLANARTOB(I444, 1, 1, ABGR, 4, 4, 1)
+TESTPLANARTOB(I444, 1, 1, RGB24, 3, 3, 1)
+TESTPLANARTOB(I444, 1, 1, RAW, 3, 3, 1)
TESTPLANARTOB(J444, 1, 1, ARGB, 4, 4, 1)
TESTPLANARTOB(J444, 1, 1, ABGR, 4, 4, 1)
TESTPLANARTOB(H444, 1, 1, ARGB, 4, 4, 1)
@@ -816,6 +835,8 @@ TESTPLANARTOB(H420, 2, 2, AB30, 4, 4, 1)
#endif
TESTPLANARTOB(I420, 2, 2, ARGBFilter, 4, 4, 1)
TESTPLANARTOB(I422, 2, 1, ARGBFilter, 4, 4, 1)
+TESTPLANARTOB(I420, 2, 2, RGB24Filter, 3, 3, 1)
+TESTPLANARTOB(I422, 2, 2, RGB24Filter, 3, 3, 1)
#else
TESTPLANARTOB(I420, 2, 2, ABGR, 4, 4, 1)
TESTPLANARTOB(I420, 2, 2, ARGB, 4, 4, 1)
@@ -832,14 +853,15 @@ TESTPLANARTOB(I422, 2, 1, RGB565, 2, 2, 1)
TESTPLANARTOB(I420, 2, 2, I400, 1, 1, 1)
TESTPLANARTOB(I420, 2, 2, UYVY, 2, 4, 1)
TESTPLANARTOB(I420, 2, 2, YUY2, 2, 4, 1)
-TESTPLANARTOB(I420, 2, 2, ARGBFilter, 4, 4, 1)
TESTPLANARTOB(I422, 2, 1, ABGR, 4, 4, 1)
TESTPLANARTOB(I422, 2, 1, ARGB, 4, 4, 1)
TESTPLANARTOB(I422, 2, 1, BGRA, 4, 4, 1)
TESTPLANARTOB(I422, 2, 1, RGBA, 4, 4, 1)
TESTPLANARTOB(I422, 2, 1, UYVY, 2, 4, 1)
TESTPLANARTOB(I422, 2, 1, YUY2, 2, 4, 1)
+TESTPLANARTOB(I420, 2, 2, ARGBFilter, 4, 4, 1)
TESTPLANARTOB(I422, 2, 1, ARGBFilter, 4, 4, 1)
+TESTPLANARTOB(I420, 2, 2, RGB24Filter, 3, 3, 1)
TESTPLANARTOB(I444, 1, 1, ABGR, 4, 4, 1)
TESTPLANARTOB(I444, 1, 1, ARGB, 4, 4, 1)
#endif
@@ -1056,8 +1078,8 @@ TESTQPLANARTOB(I420Alpha, 2, 2, ARGBFilter, 4, 4, 1)
TESTQPLANARTOB(I422Alpha, 2, 1, ARGBFilter, 4, 4, 1)
#endif
-#define TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, \
- BPP_B, W1280, N, NEG, OFF) \
+#define TESTBPTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \
+ W1280, N, NEG, OFF) \
TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) { \
const int kWidth = W1280; \
const int kHeight = benchmark_height_; \
@@ -1110,15 +1132,15 @@ TESTQPLANARTOB(I422Alpha, 2, 1, ARGBFilter, 4, 4, 1)
free_aligned_buffer_page_end(dst_argb32_opt); \
}
-#define TESTBIPLANARTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B) \
- TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \
- benchmark_width_ + 1, _Any, +, 0) \
- TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \
- benchmark_width_, _Unaligned, +, 2) \
- TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \
- benchmark_width_, _Invert, -, 0) \
- TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \
- benchmark_width_, _Opt, +, 0)
+#define TESTBPTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B) \
+ TESTBPTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \
+ benchmark_width_ + 1, _Any, +, 0) \
+ TESTBPTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \
+ benchmark_width_, _Unaligned, +, 2) \
+ TESTBPTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \
+ benchmark_width_, _Invert, -, 0) \
+ TESTBPTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \
+ benchmark_width_, _Opt, +, 0)
#define JNV12ToARGB(a, b, c, d, e, f, g, h) \
NV12ToARGBMatrix(a, b, c, d, e, f, &kYuvJPEGConstants, g, h)
@@ -1139,29 +1161,29 @@ TESTQPLANARTOB(I422Alpha, 2, 1, ARGBFilter, 4, 4, 1)
#define JNV12ToRGB565(a, b, c, d, e, f, g, h) \
NV12ToRGB565Matrix(a, b, c, d, e, f, &kYuvJPEGConstants, g, h)
-TESTBIPLANARTOB(JNV12, 2, 2, ARGB, ARGB, 4)
-TESTBIPLANARTOB(JNV21, 2, 2, ARGB, ARGB, 4)
-TESTBIPLANARTOB(JNV12, 2, 2, ABGR, ABGR, 4)
-TESTBIPLANARTOB(JNV21, 2, 2, ABGR, ABGR, 4)
-TESTBIPLANARTOB(JNV12, 2, 2, RGB24, RGB24, 3)
-TESTBIPLANARTOB(JNV21, 2, 2, RGB24, RGB24, 3)
-TESTBIPLANARTOB(JNV12, 2, 2, RAW, RAW, 3)
-TESTBIPLANARTOB(JNV21, 2, 2, RAW, RAW, 3)
+TESTBPTOB(JNV12, 2, 2, ARGB, ARGB, 4)
+TESTBPTOB(JNV21, 2, 2, ARGB, ARGB, 4)
+TESTBPTOB(JNV12, 2, 2, ABGR, ABGR, 4)
+TESTBPTOB(JNV21, 2, 2, ABGR, ABGR, 4)
+TESTBPTOB(JNV12, 2, 2, RGB24, RGB24, 3)
+TESTBPTOB(JNV21, 2, 2, RGB24, RGB24, 3)
+TESTBPTOB(JNV12, 2, 2, RAW, RAW, 3)
+TESTBPTOB(JNV21, 2, 2, RAW, RAW, 3)
#ifdef LITTLE_ENDIAN_ONLY_TEST
-TESTBIPLANARTOB(JNV12, 2, 2, RGB565, RGB565, 2)
+TESTBPTOB(JNV12, 2, 2, RGB565, RGB565, 2)
#endif
-TESTBIPLANARTOB(NV12, 2, 2, ARGB, ARGB, 4)
-TESTBIPLANARTOB(NV21, 2, 2, ARGB, ARGB, 4)
-TESTBIPLANARTOB(NV12, 2, 2, ABGR, ABGR, 4)
-TESTBIPLANARTOB(NV21, 2, 2, ABGR, ABGR, 4)
-TESTBIPLANARTOB(NV12, 2, 2, RGB24, RGB24, 3)
-TESTBIPLANARTOB(NV21, 2, 2, RGB24, RGB24, 3)
-TESTBIPLANARTOB(NV12, 2, 2, RAW, RAW, 3)
-TESTBIPLANARTOB(NV21, 2, 2, RAW, RAW, 3)
-TESTBIPLANARTOB(NV21, 2, 2, YUV24, RAW, 3)
+TESTBPTOB(NV12, 2, 2, ARGB, ARGB, 4)
+TESTBPTOB(NV21, 2, 2, ARGB, ARGB, 4)
+TESTBPTOB(NV12, 2, 2, ABGR, ABGR, 4)
+TESTBPTOB(NV21, 2, 2, ABGR, ABGR, 4)
+TESTBPTOB(NV12, 2, 2, RGB24, RGB24, 3)
+TESTBPTOB(NV21, 2, 2, RGB24, RGB24, 3)
+TESTBPTOB(NV12, 2, 2, RAW, RAW, 3)
+TESTBPTOB(NV21, 2, 2, RAW, RAW, 3)
+TESTBPTOB(NV21, 2, 2, YUV24, RAW, 3)
#ifdef LITTLE_ENDIAN_ONLY_TEST
-TESTBIPLANARTOB(NV12, 2, 2, RGB565, RGB565, 2)
+TESTBPTOB(NV12, 2, 2, RGB565, RGB565, 2)
#endif
#define TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
@@ -1236,6 +1258,8 @@ TESTATOPLANAR(ARGB, 4, 1, I422, 2, 1)
TESTATOPLANAR(ARGB, 4, 1, I444, 1, 1)
TESTATOPLANAR(ARGB, 4, 1, J420, 2, 2)
TESTATOPLANAR(ARGB, 4, 1, J422, 2, 1)
+TESTATOPLANAR(ABGR, 4, 1, J420, 2, 2)
+TESTATOPLANAR(ABGR, 4, 1, J422, 2, 1)
#ifdef LITTLE_ENDIAN_ONLY_TEST
TESTATOPLANAR(ARGB4444, 2, 1, I420, 2, 2)
TESTATOPLANAR(RGB565, 2, 1, I420, 2, 2)
@@ -1254,8 +1278,84 @@ TESTATOPLANAR(UYVY, 2, 1, I422, 2, 1)
TESTATOPLANAR(YUY2, 2, 1, I420, 2, 2)
TESTATOPLANAR(YUY2, 2, 1, I422, 2, 1)
-#define TESTATOBIPLANARI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, \
- SUBSAMP_Y, W1280, N, NEG, OFF) \
+#define TESTATOPLANARAI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, \
+ SUBSAMP_Y, W1280, N, NEG, OFF) \
+ TEST_F(LibYUVConvertTest, FMT_A##To##FMT_PLANAR##N) { \
+ const int kWidth = W1280; \
+ const int kHeight = ALIGNINT(benchmark_height_, YALIGN); \
+ const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \
+ const int kStride = (kStrideUV * SUBSAMP_X * 8 * BPP_A + 7) / 8; \
+ align_buffer_page_end(src_argb, kStride* kHeight + OFF); \
+ align_buffer_page_end(dst_a_c, kWidth* kHeight); \
+ align_buffer_page_end(dst_y_c, kWidth* kHeight); \
+ align_buffer_page_end(dst_uv_c, \
+ kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
+ align_buffer_page_end(dst_a_opt, kWidth* kHeight); \
+ align_buffer_page_end(dst_y_opt, kWidth* kHeight); \
+ align_buffer_page_end(dst_uv_opt, \
+ kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
+ memset(dst_a_c, 1, kWidth* kHeight); \
+ memset(dst_y_c, 2, kWidth* kHeight); \
+ memset(dst_uv_c, 3, kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
+ memset(dst_a_opt, 101, kWidth* kHeight); \
+ memset(dst_y_opt, 102, kWidth* kHeight); \
+ memset(dst_uv_opt, 103, kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
+ for (int i = 0; i < kHeight; ++i) \
+ for (int j = 0; j < kStride; ++j) \
+ src_argb[(i * kStride) + j + OFF] = (fastrand() & 0xff); \
+ MaskCpuFlags(disable_cpu_flags_); \
+ FMT_A##To##FMT_PLANAR(src_argb + OFF, kStride, dst_y_c, kWidth, dst_uv_c, \
+ kStrideUV * 2, dst_uv_c + kStrideUV, kStrideUV * 2, \
+ dst_a_c, kWidth, kWidth, NEG kHeight); \
+ MaskCpuFlags(benchmark_cpu_info_); \
+ for (int i = 0; i < benchmark_iterations_; ++i) { \
+ FMT_A##To##FMT_PLANAR(src_argb + OFF, kStride, dst_y_opt, kWidth, \
+ dst_uv_opt, kStrideUV * 2, dst_uv_opt + kStrideUV, \
+ kStrideUV * 2, dst_a_opt, kWidth, kWidth, \
+ NEG kHeight); \
+ } \
+ for (int i = 0; i < kHeight; ++i) { \
+ for (int j = 0; j < kWidth; ++j) { \
+ EXPECT_EQ(dst_y_c[i * kWidth + j], dst_y_opt[i * kWidth + j]); \
+ EXPECT_EQ(dst_a_c[i * kWidth + j], dst_a_opt[i * kWidth + j]); \
+ } \
+ } \
+ for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y) * 2; ++i) { \
+ for (int j = 0; j < kStrideUV; ++j) { \
+ EXPECT_EQ(dst_uv_c[i * kStrideUV + j], dst_uv_opt[i * kStrideUV + j]); \
+ } \
+ } \
+ free_aligned_buffer_page_end(dst_a_c); \
+ free_aligned_buffer_page_end(dst_y_c); \
+ free_aligned_buffer_page_end(dst_uv_c); \
+ free_aligned_buffer_page_end(dst_a_opt); \
+ free_aligned_buffer_page_end(dst_y_opt); \
+ free_aligned_buffer_page_end(dst_uv_opt); \
+ free_aligned_buffer_page_end(src_argb); \
+ }
+
+#if defined(ENABLE_FULL_TESTS)
+#define TESTATOPLANARA(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \
+ TESTATOPLANARAI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
+ benchmark_width_ + 1, _Any, +, 0) \
+ TESTATOPLANARAI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
+ benchmark_width_, _Unaligned, +, 2) \
+ TESTATOPLANARAI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
+ benchmark_width_, _Invert, -, 0) \
+ TESTATOPLANARAI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
+ benchmark_width_, _Opt, +, 0)
+#else
+#define TESTATOPLANARA(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \
+ TESTATOPLANARAI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
+ benchmark_width_ + 1, _Any, +, 0) \
+ TESTATOPLANARAI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
+ benchmark_width_, _Opt, +, 0)
+#endif
+
+TESTATOPLANARA(ARGB, 4, 1, I420Alpha, 2, 2)
+
+#define TESTATOBPI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
+ W1280, N, NEG, OFF) \
TEST_F(LibYUVConvertTest, FMT_A##To##FMT_PLANAR##N) { \
const int kWidth = W1280; \
const int kHeight = benchmark_height_; \
@@ -1301,25 +1401,25 @@ TESTATOPLANAR(YUY2, 2, 1, I422, 2, 1)
free_aligned_buffer_page_end(src_argb); \
}
-#define TESTATOBIPLANAR(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \
- TESTATOBIPLANARI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
- benchmark_width_ + 1, _Any, +, 0) \
- TESTATOBIPLANARI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
- benchmark_width_, _Unaligned, +, 2) \
- TESTATOBIPLANARI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
- benchmark_width_, _Invert, -, 0) \
- TESTATOBIPLANARI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
- benchmark_width_, _Opt, +, 0)
-
-TESTATOBIPLANAR(ARGB, 1, 4, NV12, 2, 2)
-TESTATOBIPLANAR(ARGB, 1, 4, NV21, 2, 2)
-TESTATOBIPLANAR(ABGR, 1, 4, NV12, 2, 2)
-TESTATOBIPLANAR(ABGR, 1, 4, NV21, 2, 2)
-TESTATOBIPLANAR(RAW, 1, 3, JNV21, 2, 2)
-TESTATOBIPLANAR(YUY2, 2, 4, NV12, 2, 2)
-TESTATOBIPLANAR(UYVY, 2, 4, NV12, 2, 2)
-TESTATOBIPLANAR(AYUV, 1, 4, NV12, 2, 2)
-TESTATOBIPLANAR(AYUV, 1, 4, NV21, 2, 2)
+#define TESTATOBP(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \
+ TESTATOBPI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
+ benchmark_width_ + 1, _Any, +, 0) \
+ TESTATOBPI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
+ benchmark_width_, _Unaligned, +, 2) \
+ TESTATOBPI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
+ benchmark_width_, _Invert, -, 0) \
+ TESTATOBPI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
+ benchmark_width_, _Opt, +, 0)
+
+TESTATOBP(ARGB, 1, 4, NV12, 2, 2)
+TESTATOBP(ARGB, 1, 4, NV21, 2, 2)
+TESTATOBP(ABGR, 1, 4, NV12, 2, 2)
+TESTATOBP(ABGR, 1, 4, NV21, 2, 2)
+TESTATOBP(RAW, 1, 3, JNV21, 2, 2)
+TESTATOBP(YUY2, 2, 4, NV12, 2, 2)
+TESTATOBP(UYVY, 2, 4, NV12, 2, 2)
+TESTATOBP(AYUV, 1, 4, NV12, 2, 2)
+TESTATOBP(AYUV, 1, 4, NV21, 2, 2)
#define TESTATOBI(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B, \
EPP_B, STRIDE_B, HEIGHT_B, W1280, N, NEG, OFF) \
@@ -1440,6 +1540,7 @@ TESTATOB(ARGB, uint8_t, 4, 4, 1, ARGBMirror, uint8_t, 4, 4, 1)
TESTATOB(ARGB, uint8_t, 4, 4, 1, BGRA, uint8_t, 4, 4, 1)
TESTATOB(ARGB, uint8_t, 4, 4, 1, I400, uint8_t, 1, 1, 1)
TESTATOB(ARGB, uint8_t, 4, 4, 1, J400, uint8_t, 1, 1, 1)
+TESTATOB(ABGR, uint8_t, 4, 4, 1, J400, uint8_t, 1, 1, 1)
TESTATOB(RGBA, uint8_t, 4, 4, 1, J400, uint8_t, 1, 1, 1)
TESTATOB(ARGB, uint8_t, 4, 4, 1, RAW, uint8_t, 3, 3, 1)
TESTATOB(ARGB, uint8_t, 4, 4, 1, RGB24, uint8_t, 3, 3, 1)
@@ -1450,7 +1551,7 @@ TESTATOB(ARGB, uint8_t, 4, 4, 1, RGB565, uint8_t, 2, 2, 1)
#endif
TESTATOB(ARGB, uint8_t, 4, 4, 1, RGBA, uint8_t, 4, 4, 1)
TESTATOB(ARGB, uint8_t, 4, 4, 1, UYVY, uint8_t, 2, 4, 1)
-TESTATOB(ARGB, uint8_t, 4, 4, 1, YUY2, uint8_t, 2, 4, 1) // 4
+TESTATOB(ARGB, uint8_t, 4, 4, 1, YUY2, uint8_t, 2, 4, 1)
TESTATOB(ARGB1555, uint8_t, 2, 2, 1, ARGB, uint8_t, 4, 4, 1)
TESTATOB(ARGB4444, uint8_t, 2, 2, 1, ARGB, uint8_t, 4, 4, 1)
TESTATOB(BGRA, uint8_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1)
@@ -1484,6 +1585,127 @@ TESTATOB(AB64, uint16_t, 4, 4, 1, ABGR, uint8_t, 4, 4, 1)
TESTATOB(AR64, uint16_t, 4, 4, 1, AB64, uint16_t, 4, 4, 1)
TESTATOB(AB64, uint16_t, 4, 4, 1, AR64, uint16_t, 4, 4, 1)
+// in place test
+#define TESTATOAI(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B, \
+ EPP_B, STRIDE_B, HEIGHT_B, W1280, N, NEG, OFF) \
+ TEST_F(LibYUVConvertTest, FMT_A##To##FMT_B##N) { \
+ const int kWidth = W1280; \
+ const int kHeight = benchmark_height_; \
+ const int kHeightA = (kHeight + HEIGHT_A - 1) / HEIGHT_A * HEIGHT_A; \
+ const int kHeightB = (kHeight + HEIGHT_B - 1) / HEIGHT_B * HEIGHT_B; \
+ const int kStrideA = \
+ (kWidth * EPP_A + STRIDE_A - 1) / STRIDE_A * STRIDE_A; \
+ const int kStrideB = \
+ (kWidth * EPP_B + STRIDE_B - 1) / STRIDE_B * STRIDE_B; \
+ align_buffer_page_end(src_argb, \
+ kStrideA* kHeightA*(int)sizeof(TYPE_A) + OFF); \
+ align_buffer_page_end(dst_argb_c, \
+ kStrideA* kHeightA*(int)sizeof(TYPE_A) + OFF); \
+ align_buffer_page_end(dst_argb_opt, \
+ kStrideA* kHeightA*(int)sizeof(TYPE_A) + OFF); \
+ for (int i = 0; i < kStrideA * kHeightA * (int)sizeof(TYPE_A); ++i) { \
+ src_argb[i + OFF] = (fastrand() & 0xff); \
+ } \
+ memcpy(dst_argb_c + OFF, src_argb, \
+ kStrideA * kHeightA * (int)sizeof(TYPE_A)); \
+ memcpy(dst_argb_opt + OFF, src_argb, \
+ kStrideA * kHeightA * (int)sizeof(TYPE_A)); \
+ MaskCpuFlags(disable_cpu_flags_); \
+ FMT_A##To##FMT_B((TYPE_A*)(dst_argb_c /* src */ + OFF), kStrideA, \
+ (TYPE_B*)dst_argb_c, kStrideB, kWidth, NEG kHeight); \
+ MaskCpuFlags(benchmark_cpu_info_); \
+ for (int i = 0; i < benchmark_iterations_; ++i) { \
+ FMT_A##To##FMT_B((TYPE_A*)(dst_argb_opt /* src */ + OFF), kStrideA, \
+ (TYPE_B*)dst_argb_opt, kStrideB, kWidth, NEG kHeight); \
+ } \
+ memcpy(dst_argb_opt + OFF, src_argb, \
+ kStrideA * kHeightA * (int)sizeof(TYPE_A)); \
+ FMT_A##To##FMT_B((TYPE_A*)(dst_argb_opt /* src */ + OFF), kStrideA, \
+ (TYPE_B*)dst_argb_opt, kStrideB, kWidth, NEG kHeight); \
+ for (int i = 0; i < kStrideB * kHeightB * (int)sizeof(TYPE_B); ++i) { \
+ EXPECT_EQ(dst_argb_c[i], dst_argb_opt[i]); \
+ } \
+ free_aligned_buffer_page_end(src_argb); \
+ free_aligned_buffer_page_end(dst_argb_c); \
+ free_aligned_buffer_page_end(dst_argb_opt); \
+ }
+
+#define TESTATOA(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B, \
+ EPP_B, STRIDE_B, HEIGHT_B) \
+ TESTATOAI(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B, EPP_B, \
+ STRIDE_B, HEIGHT_B, benchmark_width_, _Inplace, +, 0)
+
+TESTATOA(AB30, uint8_t, 4, 4, 1, ABGR, uint8_t, 4, 4, 1)
+TESTATOA(AB30, uint8_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1)
+#ifdef LITTLE_ENDIAN_ONLY_TEST
+TESTATOA(ABGR, uint8_t, 4, 4, 1, AR30, uint8_t, 4, 4, 1)
+#endif
+TESTATOA(ABGR, uint8_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1)
+#ifdef LITTLE_ENDIAN_ONLY_TEST
+TESTATOA(AR30, uint8_t, 4, 4, 1, AB30, uint8_t, 4, 4, 1)
+#endif
+TESTATOA(AR30, uint8_t, 4, 4, 1, ABGR, uint8_t, 4, 4, 1)
+#ifdef LITTLE_ENDIAN_ONLY_TEST
+TESTATOA(AR30, uint8_t, 4, 4, 1, AR30, uint8_t, 4, 4, 1)
+TESTATOA(AR30, uint8_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1)
+#endif
+TESTATOA(ARGB, uint8_t, 4, 4, 1, ABGR, uint8_t, 4, 4, 1)
+#ifdef LITTLE_ENDIAN_ONLY_TEST
+TESTATOA(ARGB, uint8_t, 4, 4, 1, AR30, uint8_t, 4, 4, 1)
+#endif
+TESTATOA(ARGB, uint8_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1)
+TESTATOA(ARGB, uint8_t, 4, 4, 1, ARGB1555, uint8_t, 2, 2, 1)
+TESTATOA(ARGB, uint8_t, 4, 4, 1, ARGB4444, uint8_t, 2, 2, 1)
+// TODO(fbarchard): Support in place for mirror.
+// TESTATOA(ARGB, uint8_t, 4, 4, 1, ARGBMirror, uint8_t, 4, 4, 1)
+TESTATOA(ARGB, uint8_t, 4, 4, 1, BGRA, uint8_t, 4, 4, 1)
+TESTATOA(ARGB, uint8_t, 4, 4, 1, I400, uint8_t, 1, 1, 1)
+TESTATOA(ARGB, uint8_t, 4, 4, 1, J400, uint8_t, 1, 1, 1)
+TESTATOA(RGBA, uint8_t, 4, 4, 1, J400, uint8_t, 1, 1, 1)
+TESTATOA(ARGB, uint8_t, 4, 4, 1, RAW, uint8_t, 3, 3, 1)
+TESTATOA(ARGB, uint8_t, 4, 4, 1, RGB24, uint8_t, 3, 3, 1)
+TESTATOA(ABGR, uint8_t, 4, 4, 1, RAW, uint8_t, 3, 3, 1)
+TESTATOA(ABGR, uint8_t, 4, 4, 1, RGB24, uint8_t, 3, 3, 1)
+#ifdef LITTLE_ENDIAN_ONLY_TEST
+TESTATOA(ARGB, uint8_t, 4, 4, 1, RGB565, uint8_t, 2, 2, 1)
+#endif
+TESTATOA(ARGB, uint8_t, 4, 4, 1, RGBA, uint8_t, 4, 4, 1)
+TESTATOA(ARGB, uint8_t, 4, 4, 1, UYVY, uint8_t, 2, 4, 1)
+TESTATOA(ARGB, uint8_t, 4, 4, 1, YUY2, uint8_t, 2, 4, 1)
+// TODO(fbarchard): Support in place for conversions that increase bpp.
+// TESTATOA(ARGB1555, uint8_t, 2, 2, 1, ARGB, uint8_t, 4, 4, 1)
+// TESTATOA(ARGB4444, uint8_t, 2, 2, 1, ARGB, uint8_t, 4, 4, 1)
+TESTATOA(BGRA, uint8_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1)
+// TESTATOA(I400, uint8_t, 1, 1, 1, ARGB, uint8_t, 4, 4, 1)
+TESTATOA(I400, uint8_t, 1, 1, 1, I400, uint8_t, 1, 1, 1)
+// TESTATOA(I400, uint8_t, 1, 1, 1, I400Mirror, uint8_t, 1, 1, 1)
+// TESTATOA(J400, uint8_t, 1, 1, 1, ARGB, uint8_t, 4, 4, 1)
+TESTATOA(J400, uint8_t, 1, 1, 1, J400, uint8_t, 1, 1, 1)
+// TESTATOA(RAW, uint8_t, 3, 3, 1, ARGB, uint8_t, 4, 4, 1)
+// TESTATOA(RAW, uint8_t, 3, 3, 1, RGBA, uint8_t, 4, 4, 1)
+TESTATOA(RAW, uint8_t, 3, 3, 1, RGB24, uint8_t, 3, 3, 1)
+// TESTATOA(RGB24, uint8_t, 3, 3, 1, ARGB, uint8_t, 4, 4, 1)
+TESTATOA(RGB24, uint8_t, 3, 3, 1, J400, uint8_t, 1, 1, 1)
+// TESTATOA(RGB24, uint8_t, 3, 3, 1, RGB24Mirror, uint8_t, 3, 3, 1)
+TESTATOA(RAW, uint8_t, 3, 3, 1, J400, uint8_t, 1, 1, 1)
+#ifdef LITTLE_ENDIAN_ONLY_TEST
+// TESTATOA(RGB565, uint8_t, 2, 2, 1, ARGB, uint8_t, 4, 4, 1)
+#endif
+TESTATOA(RGBA, uint8_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1)
+// TESTATOA(UYVY, uint8_t, 2, 4, 1, ARGB, uint8_t, 4, 4, 1)
+// TESTATOA(YUY2, uint8_t, 2, 4, 1, ARGB, uint8_t, 4, 4, 1)
+TESTATOA(YUY2, uint8_t, 2, 4, 1, Y, uint8_t, 1, 1, 1)
+// TESTATOA(ARGB, uint8_t, 4, 4, 1, AR64, uint16_t, 4, 4, 1)
+// TESTATOA(ARGB, uint8_t, 4, 4, 1, AB64, uint16_t, 4, 4, 1)
+// TESTATOA(ABGR, uint8_t, 4, 4, 1, AR64, uint16_t, 4, 4, 1)
+// TESTATOA(ABGR, uint8_t, 4, 4, 1, AB64, uint16_t, 4, 4, 1)
+TESTATOA(AR64, uint16_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1)
+TESTATOA(AB64, uint16_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1)
+TESTATOA(AR64, uint16_t, 4, 4, 1, ABGR, uint8_t, 4, 4, 1)
+TESTATOA(AB64, uint16_t, 4, 4, 1, ABGR, uint8_t, 4, 4, 1)
+TESTATOA(AR64, uint16_t, 4, 4, 1, AB64, uint16_t, 4, 4, 1)
+TESTATOA(AB64, uint16_t, 4, 4, 1, AR64, uint16_t, 4, 4, 1)
+
#define TESTATOBDI(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, STRIDE_B, \
HEIGHT_B, W1280, N, NEG, OFF) \
TEST_F(LibYUVConvertTest, FMT_A##To##FMT_B##Dither##N) { \
@@ -2065,6 +2287,9 @@ TEST_F(LibYUVConvertTest, TestMJPGToI420) {
int half_height = (height + 1) / 2;
int benchmark_iterations = benchmark_iterations_ * benchmark_width_ *
benchmark_height_ / (width * height);
+ if (benchmark_iterations < 1) {
+ benchmark_iterations = 1;
+ }
align_buffer_page_end(dst_y, width * height);
align_buffer_page_end(dst_u, half_width * half_height);
@@ -2099,6 +2324,9 @@ TEST_F(LibYUVConvertTest, TestMJPGToI420_NV21) {
int half_height = (height + 1) / 2;
int benchmark_iterations = benchmark_iterations_ * benchmark_width_ *
benchmark_height_ / (width * height);
+ if (benchmark_iterations < 1) {
+ benchmark_iterations = 1;
+ }
// Convert to NV21
align_buffer_page_end(dst_y, width * height);
@@ -2158,6 +2386,9 @@ TEST_F(LibYUVConvertTest, TestMJPGToI420_NV12) {
int half_height = (height + 1) / 2;
int benchmark_iterations = benchmark_iterations_ * benchmark_width_ *
benchmark_height_ / (width * height);
+ if (benchmark_iterations < 1) {
+ benchmark_iterations = 1;
+ }
// Convert to NV12
align_buffer_page_end(dst_y, width * height);
@@ -2217,6 +2448,9 @@ TEST_F(LibYUVConvertTest, TestMJPGToNV21_420) {
int half_height = (height + 1) / 2;
int benchmark_iterations = benchmark_iterations_ * benchmark_width_ *
benchmark_height_ / (width * height);
+ if (benchmark_iterations < 1) {
+ benchmark_iterations = 1;
+ }
align_buffer_page_end(dst_y, width * height);
align_buffer_page_end(dst_uv, half_width * half_height * 2);
@@ -2247,6 +2481,9 @@ TEST_F(LibYUVConvertTest, TestMJPGToNV12_420) {
int half_height = (height + 1) / 2;
int benchmark_iterations = benchmark_iterations_ * benchmark_width_ *
benchmark_height_ / (width * height);
+ if (benchmark_iterations < 1) {
+ benchmark_iterations = 1;
+ }
align_buffer_page_end(dst_y, width * height);
align_buffer_page_end(dst_uv, half_width * half_height * 2);
@@ -2282,6 +2519,9 @@ TEST_F(LibYUVConvertTest, DISABLED_TestMJPGToNV21_422) {
int half_height = (height + 1) / 2;
int benchmark_iterations = benchmark_iterations_ * benchmark_width_ *
benchmark_height_ / (width * height);
+ if (benchmark_iterations < 1) {
+ benchmark_iterations = 1;
+ }
align_buffer_page_end(dst_y, width * height);
align_buffer_page_end(dst_uv, half_width * half_height * 2);
@@ -2312,6 +2552,9 @@ TEST_F(LibYUVConvertTest, DISABLED_TestMJPGToNV12_422) {
int half_height = (height + 1) / 2;
int benchmark_iterations = benchmark_iterations_ * benchmark_width_ *
benchmark_height_ / (width * height);
+ if (benchmark_iterations < 1) {
+ benchmark_iterations = 1;
+ }
align_buffer_page_end(dst_y, width * height);
align_buffer_page_end(dst_uv, half_width * half_height * 2);
@@ -2346,6 +2589,9 @@ TEST_F(LibYUVConvertTest, TestMJPGToNV21_400) {
int half_height = (height + 1) / 2;
int benchmark_iterations = benchmark_iterations_ * benchmark_width_ *
benchmark_height_ / (width * height);
+ if (benchmark_iterations < 1) {
+ benchmark_iterations = 1;
+ }
align_buffer_page_end(dst_y, width * height);
align_buffer_page_end(dst_uv, half_width * half_height * 2);
@@ -2376,6 +2622,9 @@ TEST_F(LibYUVConvertTest, TestMJPGToNV12_400) {
int half_height = (height + 1) / 2;
int benchmark_iterations = benchmark_iterations_ * benchmark_width_ *
benchmark_height_ / (width * height);
+ if (benchmark_iterations < 1) {
+ benchmark_iterations = 1;
+ }
align_buffer_page_end(dst_y, width * height);
align_buffer_page_end(dst_uv, half_width * half_height * 2);
@@ -2410,6 +2659,9 @@ TEST_F(LibYUVConvertTest, TestMJPGToNV21_444) {
int half_height = (height + 1) / 2;
int benchmark_iterations = benchmark_iterations_ * benchmark_width_ *
benchmark_height_ / (width * height);
+ if (benchmark_iterations < 1) {
+ benchmark_iterations = 1;
+ }
align_buffer_page_end(dst_y, width * height);
align_buffer_page_end(dst_uv, half_width * half_height * 2);
@@ -2440,6 +2692,9 @@ TEST_F(LibYUVConvertTest, TestMJPGToNV12_444) {
int half_height = (height + 1) / 2;
int benchmark_iterations = benchmark_iterations_ * benchmark_width_ *
benchmark_height_ / (width * height);
+ if (benchmark_iterations < 1) {
+ benchmark_iterations = 1;
+ }
align_buffer_page_end(dst_y, width * height);
align_buffer_page_end(dst_uv, half_width * half_height * 2);
@@ -2472,6 +2727,9 @@ TEST_F(LibYUVConvertTest, TestMJPGToARGB) {
int benchmark_iterations = benchmark_iterations_ * benchmark_width_ *
benchmark_height_ / (width * height);
+ if (benchmark_iterations < 1) {
+ benchmark_iterations = 1;
+ }
align_buffer_page_end(dst_argb, width * height * 4);
for (int times = 0; times < benchmark_iterations; ++times) {
@@ -2921,6 +3179,51 @@ TESTPLANARTOBD(I420, 2, 2, RGB565, 2, 2, 1, ARGB, 4)
TESTPTOB(TestYUY2ToNV12, YUY2ToI420, YUY2ToNV12)
TESTPTOB(TestUYVYToNV12, UYVYToI420, UYVYToNV12)
+TEST_F(LibYUVConvertTest, MM21ToYUY2) {
+ const int kWidth = (benchmark_width_ + 15) & (~15);
+ const int kHeight = (benchmark_height_ + 31) & (~31);
+
+ align_buffer_page_end(orig_y, kWidth * kHeight);
+ align_buffer_page_end(orig_uv,
+ 2 * SUBSAMPLE(kWidth, 2) * SUBSAMPLE(kHeight, 2));
+
+ align_buffer_page_end(tmp_y, kWidth * kHeight);
+ align_buffer_page_end(tmp_u, SUBSAMPLE(kWidth, 2) * SUBSAMPLE(kHeight, 2));
+ align_buffer_page_end(tmp_v, SUBSAMPLE(kWidth, 2) * SUBSAMPLE(kHeight, 2));
+
+ align_buffer_page_end(dst_yuyv, 4 * SUBSAMPLE(kWidth, 2) * kHeight);
+ align_buffer_page_end(golden_yuyv, 4 * SUBSAMPLE(kWidth, 2) * kHeight);
+
+ MemRandomize(orig_y, kWidth * kHeight);
+ MemRandomize(orig_uv, 2 * SUBSAMPLE(kWidth, 2) * SUBSAMPLE(kHeight, 2));
+
+ /* Convert MM21 to YUY2 in 2 steps for reference */
+ libyuv::MM21ToI420(orig_y, kWidth, orig_uv, 2 * SUBSAMPLE(kWidth, 2), tmp_y,
+ kWidth, tmp_u, SUBSAMPLE(kWidth, 2), tmp_v,
+ SUBSAMPLE(kWidth, 2), kWidth, kHeight);
+ libyuv::I420ToYUY2(tmp_y, kWidth, tmp_u, SUBSAMPLE(kWidth, 2), tmp_v,
+ SUBSAMPLE(kWidth, 2), golden_yuyv,
+ 4 * SUBSAMPLE(kWidth, 2), kWidth, kHeight);
+
+ /* Convert to NV12 */
+ for (int i = 0; i < benchmark_iterations_; ++i) {
+ libyuv::MM21ToYUY2(orig_y, kWidth, orig_uv, 2 * SUBSAMPLE(kWidth, 2),
+ dst_yuyv, 4 * SUBSAMPLE(kWidth, 2), kWidth, kHeight);
+ }
+
+ for (int i = 0; i < 4 * SUBSAMPLE(kWidth, 2) * kHeight; ++i) {
+ EXPECT_EQ(dst_yuyv[i], golden_yuyv[i]);
+ }
+
+ free_aligned_buffer_page_end(orig_y);
+ free_aligned_buffer_page_end(orig_uv);
+ free_aligned_buffer_page_end(tmp_y);
+ free_aligned_buffer_page_end(tmp_u);
+ free_aligned_buffer_page_end(tmp_v);
+ free_aligned_buffer_page_end(dst_yuyv);
+ free_aligned_buffer_page_end(golden_yuyv);
+}
+
// Transitive test. A to B to C is same as A to C.
// Benchmarks A To B to C for comparison to 1 step, benchmarked elsewhere.
#define TESTPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B, \
@@ -3353,6 +3656,8 @@ TEST_F(LibYUVConvertTest, ABGRToAR30Row_Opt) {
I012ToARGBMatrix(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j)
#define I012ToAR30(a, b, c, d, e, f, g, h, i, j) \
I012ToAR30Matrix(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j)
+#define I012ToAB30(a, b, c, d, e, f, g, h, i, j) \
+ I012ToAB30Matrix(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j)
#define I410ToARGB(a, b, c, d, e, f, g, h, i, j) \
I410ToARGBMatrix(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j)
@@ -3495,6 +3800,7 @@ TESTPLANAR16TOB(H410, 1, 1, 0x3ff, AB30, 4, 4, 1)
TESTPLANAR16TOB(U410, 1, 1, 0x3ff, AR30, 4, 4, 1)
TESTPLANAR16TOB(U410, 1, 1, 0x3ff, AB30, 4, 4, 1)
TESTPLANAR16TOB(I012, 2, 2, 0xfff, AR30, 4, 4, 1)
+TESTPLANAR16TOB(I012, 2, 2, 0xfff, AB30, 4, 4, 1)
TESTPLANAR16TOB(I010, 2, 2, 0x3ff, AR30Filter, 4, 4, 1)
TESTPLANAR16TOB(I210, 2, 1, 0x3ff, AR30Filter, 4, 4, 1)
#endif // LITTLE_ENDIAN_ONLY_TEST
@@ -3733,8 +4039,8 @@ TESTQPLANAR16TOB(I010Alpha, 2, 2, ARGBFilter, 4, 4, 1, 10)
TESTQPLANAR16TOB(I210Alpha, 2, 1, ARGBFilter, 4, 4, 1, 10)
#endif // DISABLE_SLOW_TESTS
-#define TESTBIPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \
- ALIGN, YALIGN, W1280, N, NEG, SOFF, DOFF, S_DEPTH) \
+#define TESTBP16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
+ YALIGN, W1280, N, NEG, SOFF, DOFF, S_DEPTH) \
TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) { \
const int kWidth = W1280; \
const int kHeight = ALIGNINT(benchmark_height_, YALIGN); \
@@ -3777,16 +4083,16 @@ TESTQPLANAR16TOB(I210Alpha, 2, 1, ARGBFilter, 4, 4, 1, 10)
free_aligned_buffer_page_end(dst_argb_opt); \
}
-#define TESTBIPLANAR16TOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \
- ALIGN, YALIGN, S_DEPTH) \
- TESTBIPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
- YALIGN, benchmark_width_ + 1, _Any, +, 0, 0, S_DEPTH) \
- TESTBIPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
- YALIGN, benchmark_width_, _Unaligned, +, 4, 4, S_DEPTH) \
- TESTBIPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
- YALIGN, benchmark_width_, _Invert, -, 0, 0, S_DEPTH) \
- TESTBIPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
- YALIGN, benchmark_width_, _Opt, +, 0, 0, S_DEPTH)
+#define TESTBP16TOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
+ YALIGN, S_DEPTH) \
+ TESTBP16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, YALIGN, \
+ benchmark_width_ + 1, _Any, +, 0, 0, S_DEPTH) \
+ TESTBP16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, YALIGN, \
+ benchmark_width_, _Unaligned, +, 4, 4, S_DEPTH) \
+ TESTBP16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, YALIGN, \
+ benchmark_width_, _Invert, -, 0, 0, S_DEPTH) \
+ TESTBP16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, YALIGN, \
+ benchmark_width_, _Opt, +, 0, 0, S_DEPTH)
#define P010ToARGB(a, b, c, d, e, f, g, h) \
P010ToARGBMatrix(a, b, c, d, e, f, &kYuvH709Constants, g, h)
@@ -3829,23 +4135,23 @@ TESTQPLANAR16TOB(I210Alpha, 2, 1, ARGBFilter, 4, 4, 1, 10)
kFilterBilinear)
#if !defined(DISABLE_SLOW_TESTS) || defined(__x86_64__) || defined(__i386__)
-TESTBIPLANAR16TOB(P010, 2, 2, ARGB, 4, 4, 1, 10)
-TESTBIPLANAR16TOB(P210, 2, 1, ARGB, 4, 4, 1, 10)
-TESTBIPLANAR16TOB(P012, 2, 2, ARGB, 4, 4, 1, 12)
-TESTBIPLANAR16TOB(P212, 2, 1, ARGB, 4, 4, 1, 12)
-TESTBIPLANAR16TOB(P016, 2, 2, ARGB, 4, 4, 1, 16)
-TESTBIPLANAR16TOB(P216, 2, 1, ARGB, 4, 4, 1, 16)
-TESTBIPLANAR16TOB(P010, 2, 2, ARGBFilter, 4, 4, 1, 10)
-TESTBIPLANAR16TOB(P210, 2, 1, ARGBFilter, 4, 4, 1, 10)
+TESTBP16TOB(P010, 2, 2, ARGB, 4, 4, 1, 10)
+TESTBP16TOB(P210, 2, 1, ARGB, 4, 4, 1, 10)
+TESTBP16TOB(P012, 2, 2, ARGB, 4, 4, 1, 12)
+TESTBP16TOB(P212, 2, 1, ARGB, 4, 4, 1, 12)
+TESTBP16TOB(P016, 2, 2, ARGB, 4, 4, 1, 16)
+TESTBP16TOB(P216, 2, 1, ARGB, 4, 4, 1, 16)
+TESTBP16TOB(P010, 2, 2, ARGBFilter, 4, 4, 1, 10)
+TESTBP16TOB(P210, 2, 1, ARGBFilter, 4, 4, 1, 10)
#ifdef LITTLE_ENDIAN_ONLY_TEST
-TESTBIPLANAR16TOB(P010, 2, 2, AR30, 4, 4, 1, 10)
-TESTBIPLANAR16TOB(P210, 2, 1, AR30, 4, 4, 1, 10)
-TESTBIPLANAR16TOB(P012, 2, 2, AR30, 4, 4, 1, 12)
-TESTBIPLANAR16TOB(P212, 2, 1, AR30, 4, 4, 1, 12)
-TESTBIPLANAR16TOB(P016, 2, 2, AR30, 4, 4, 1, 16)
-TESTBIPLANAR16TOB(P216, 2, 1, AR30, 4, 4, 1, 16)
-TESTBIPLANAR16TOB(P010, 2, 2, AR30Filter, 4, 4, 1, 10)
-TESTBIPLANAR16TOB(P210, 2, 1, AR30Filter, 4, 4, 1, 10)
+TESTBP16TOB(P010, 2, 2, AR30, 4, 4, 1, 10)
+TESTBP16TOB(P210, 2, 1, AR30, 4, 4, 1, 10)
+TESTBP16TOB(P012, 2, 2, AR30, 4, 4, 1, 12)
+TESTBP16TOB(P212, 2, 1, AR30, 4, 4, 1, 12)
+TESTBP16TOB(P016, 2, 2, AR30, 4, 4, 1, 16)
+TESTBP16TOB(P216, 2, 1, AR30, 4, 4, 1, 16)
+TESTBP16TOB(P010, 2, 2, AR30Filter, 4, 4, 1, 10)
+TESTBP16TOB(P210, 2, 1, AR30Filter, 4, 4, 1, 10)
#endif // LITTLE_ENDIAN_ONLY_TEST
#endif // DISABLE_SLOW_TESTS
diff --git a/files/unit_test/cpu_test.cc b/files/unit_test/cpu_test.cc
index 080778f5..93867fa7 100644
--- a/files/unit_test/cpu_test.cc
+++ b/files/unit_test/cpu_test.cc
@@ -20,13 +20,23 @@ namespace libyuv {
TEST_F(LibYUVBaseTest, TestCpuHas) {
int cpu_flags = TestCpuFlag(-1);
- printf("Cpu Flags %d\n", cpu_flags);
+ printf("Cpu Flags 0x%x\n", cpu_flags);
#if defined(__arm__) || defined(__aarch64__)
int has_arm = TestCpuFlag(kCpuHasARM);
- printf("Has ARM %d\n", has_arm);
+ printf("Has ARM 0x%x\n", has_arm);
int has_neon = TestCpuFlag(kCpuHasNEON);
- printf("Has NEON %d\n", has_neon);
+ printf("Has NEON 0x%x\n", has_neon);
#endif
+#if defined(__riscv) && defined(__linux__)
+ int has_riscv = TestCpuFlag(kCpuHasRISCV);
+ printf("Has RISCV 0x%x\n", has_riscv);
+ int has_rvv = TestCpuFlag(kCpuHasRVV);
+ printf("Has RVV 0x%x\n", has_rvv);
+ int has_rvvzvfh = TestCpuFlag(kCpuHasRVVZVFH);
+ printf("Has RVVZVFH 0x%x\n", has_rvvzvfh);
+#endif
+#if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || \
+ defined(_M_X64)
int has_x86 = TestCpuFlag(kCpuHasX86);
int has_sse2 = TestCpuFlag(kCpuHasSSE2);
int has_ssse3 = TestCpuFlag(kCpuHasSSSE3);
@@ -45,39 +55,38 @@ TEST_F(LibYUVBaseTest, TestCpuHas) {
int has_avx512vbmi2 = TestCpuFlag(kCpuHasAVX512VBMI2);
int has_avx512vbitalg = TestCpuFlag(kCpuHasAVX512VBITALG);
int has_avx512vpopcntdq = TestCpuFlag(kCpuHasAVX512VPOPCNTDQ);
- printf("Has X86 %d\n", has_x86);
- printf("Has SSE2 %d\n", has_sse2);
- printf("Has SSSE3 %d\n", has_ssse3);
- printf("Has SSE41 %d\n", has_sse41);
- printf("Has SSE42 %d\n", has_sse42);
- printf("Has AVX %d\n", has_avx);
- printf("Has AVX2 %d\n", has_avx2);
- printf("Has ERMS %d\n", has_erms);
- printf("Has FMA3 %d\n", has_fma3);
- printf("Has F16C %d\n", has_f16c);
- printf("Has GFNI %d\n", has_gfni);
- printf("Has AVX512BW %d\n", has_avx512bw);
- printf("Has AVX512VL %d\n", has_avx512vl);
- printf("Has AVX512VNNI %d\n", has_avx512vnni);
- printf("Has AVX512VBMI %d\n", has_avx512vbmi);
- printf("Has AVX512VBMI2 %d\n", has_avx512vbmi2);
- printf("Has AVX512VBITALG %d\n", has_avx512vbitalg);
- printf("Has AVX512VPOPCNTDQ %d\n", has_avx512vpopcntdq);
-
+ printf("Has X86 0x%x\n", has_x86);
+ printf("Has SSE2 0x%x\n", has_sse2);
+ printf("Has SSSE3 0x%x\n", has_ssse3);
+ printf("Has SSE41 0x%x\n", has_sse41);
+ printf("Has SSE42 0x%x\n", has_sse42);
+ printf("Has AVX 0x%x\n", has_avx);
+ printf("Has AVX2 0x%x\n", has_avx2);
+ printf("Has ERMS 0x%x\n", has_erms);
+ printf("Has FMA3 0x%x\n", has_fma3);
+ printf("Has F16C 0x%x\n", has_f16c);
+ printf("Has GFNI 0x%x\n", has_gfni);
+ printf("Has AVX512BW 0x%x\n", has_avx512bw);
+ printf("Has AVX512VL 0x%x\n", has_avx512vl);
+ printf("Has AVX512VNNI 0x%x\n", has_avx512vnni);
+ printf("Has AVX512VBMI 0x%x\n", has_avx512vbmi);
+ printf("Has AVX512VBMI2 0x%x\n", has_avx512vbmi2);
+ printf("Has AVX512VBITALG 0x%x\n", has_avx512vbitalg);
+ printf("Has AVX512VPOPCNTDQ 0x%x\n", has_avx512vpopcntdq);
+#endif
#if defined(__mips__)
int has_mips = TestCpuFlag(kCpuHasMIPS);
- printf("Has MIPS %d\n", has_mips);
+ printf("Has MIPS 0x%x\n", has_mips);
int has_msa = TestCpuFlag(kCpuHasMSA);
- printf("Has MSA %d\n", has_msa);
+ printf("Has MSA 0x%x\n", has_msa);
#endif
-
#if defined(__loongarch__)
int has_loongarch = TestCpuFlag(kCpuHasLOONGARCH);
- printf("Has LOONGARCH %d\n", has_loongarch);
+ printf("Has LOONGARCH 0x%x\n", has_loongarch);
int has_lsx = TestCpuFlag(kCpuHasLSX);
- printf("Has LSX %d\n", has_lsx);
+ printf("Has LSX 0x%x\n", has_lsx);
int has_lasx = TestCpuFlag(kCpuHasLASX);
- printf("Has LASX %d\n", has_lasx);
+ printf("Has LASX 0x%x\n", has_lasx);
#endif
}
@@ -104,27 +113,33 @@ TEST_F(LibYUVBaseTest, TestCompilerMacros) {
#ifdef __i386__
printf("__i386__ %d\n", __i386__);
#endif
-#ifdef __mips
- printf("__mips %d\n", __mips);
-#endif
-#ifdef __mips_isa_rev
- printf("__mips_isa_rev %d\n", __mips_isa_rev);
-#endif
#ifdef __x86_64__
printf("__x86_64__ %d\n", __x86_64__);
#endif
+#ifdef _M_IX86
+ printf("_M_IX86 %d\n", _M_IX86);
+#endif
+#ifdef _M_X64
+ printf("_M_X64 %d\n", _M_X64);
+#endif
#ifdef _MSC_VER
printf("_MSC_VER %d\n", _MSC_VER);
#endif
#ifdef __aarch64__
printf("__aarch64__ %d\n", __aarch64__);
#endif
-#ifdef __APPLE__
- printf("__APPLE__ %d\n", __APPLE__);
-#endif
#ifdef __arm__
printf("__arm__ %d\n", __arm__);
#endif
+#ifdef __riscv
+ printf("__riscv %d\n", __riscv);
+#endif
+#ifdef __riscv_vector
+ printf("__riscv_vector %d\n", __riscv_vector);
+#endif
+#ifdef __APPLE__
+ printf("__APPLE__ %d\n", __APPLE__);
+#endif
#ifdef __clang__
printf("__clang__ %d\n", __clang__);
#endif
@@ -140,20 +155,11 @@ TEST_F(LibYUVBaseTest, TestCompilerMacros) {
#ifdef __mips_msa
printf("__mips_msa %d\n", __mips_msa);
#endif
-#ifdef __native_client__
- printf("__native_client__ %d\n", __native_client__);
-#endif
-#ifdef __pic__
- printf("__pic__ %d\n", __pic__);
-#endif
-#ifdef __pnacl__
- printf("__pnacl__ %d\n", __pnacl__);
-#endif
-#ifdef _M_IX86
- printf("_M_IX86 %d\n", _M_IX86);
+#ifdef __mips
+ printf("__mips %d\n", __mips);
#endif
-#ifdef _M_X64
- printf("_M_X64 %d\n", _M_X64);
+#ifdef __mips_isa_rev
+ printf("__mips_isa_rev %d\n", __mips_isa_rev);
#endif
#ifdef _MIPS_ARCH_LOONGSON3A
printf("_MIPS_ARCH_LOONGSON3A %d\n", _MIPS_ARCH_LOONGSON3A);
@@ -164,6 +170,15 @@ TEST_F(LibYUVBaseTest, TestCompilerMacros) {
#ifdef _WIN32
printf("_WIN32 %d\n", _WIN32);
#endif
+#ifdef __native_client__
+ printf("__native_client__ %d\n", __native_client__);
+#endif
+#ifdef __pic__
+ printf("__pic__ %d\n", __pic__);
+#endif
+#ifdef __pnacl__
+ printf("__pnacl__ %d\n", __pnacl__);
+#endif
#ifdef GG_LONGLONG
printf("GG_LONGLONG %d\n", GG_LONGLONG);
#endif
@@ -200,8 +215,9 @@ TEST_F(LibYUVBaseTest, TestCpuId) {
cpu_info[0] = cpu_info[1]; // Reorder output
cpu_info[1] = cpu_info[3];
cpu_info[3] = 0;
- printf("Cpu Vendor: %s %x %x %x\n", reinterpret_cast<char*>(&cpu_info[0]),
- cpu_info[0], cpu_info[1], cpu_info[2]);
+ printf("Cpu Vendor: %s 0x%x 0x%x 0x%x\n",
+ reinterpret_cast<char*>(&cpu_info[0]), cpu_info[0], cpu_info[1],
+ cpu_info[2]);
EXPECT_EQ(12u, strlen(reinterpret_cast<char*>(&cpu_info[0])));
// CPU Family and Model
@@ -264,6 +280,32 @@ TEST_F(LibYUVBaseTest, TestLinuxMipsMsa) {
}
}
+TEST_F(LibYUVBaseTest, TestLinuxRVV) {
+ if (FileExists("../../unit_test/testdata/riscv64.txt")) {
+ printf("Note: testing to load \"../../unit_test/testdata/riscv64.txt\"\n");
+
+ EXPECT_EQ(0, RiscvCpuCaps("../../unit_test/testdata/riscv64.txt"));
+ EXPECT_EQ(kCpuHasRVV,
+ RiscvCpuCaps("../../unit_test/testdata/riscv64_rvv.txt"));
+ EXPECT_EQ(kCpuHasRVV | kCpuHasRVVZVFH,
+ RiscvCpuCaps("../../unit_test/testdata/riscv64_rvv_zvfh.txt"));
+ } else {
+ printf(
+ "WARNING: unable to load "
+ "\"../../unit_test/testdata/riscv64.txt\"\n");
+ }
+#if defined(__linux__) && defined(__riscv)
+ if (FileExists("/proc/cpuinfo")) {
+ if (!(kCpuHasRVV & RiscvCpuCaps("/proc/cpuinfo"))) {
+ // This can happen on RVV emulator but /proc/cpuinfo is from host.
+ printf("WARNING: RVV build enabled but CPU does not have RVV\n");
+ }
+ } else {
+ printf("WARNING: unable to load \"/proc/cpuinfo\"\n");
+ }
+#endif
+}
+
// TODO(fbarchard): Fix clangcl test of cpuflags.
#ifdef _MSC_VER
TEST_F(LibYUVBaseTest, DISABLED_TestSetCpuFlags) {
diff --git a/files/unit_test/planar_test.cc b/files/unit_test/planar_test.cc
index 3a8c470b..ad97b87e 100644
--- a/files/unit_test/planar_test.cc
+++ b/files/unit_test/planar_test.cc
@@ -1638,29 +1638,29 @@ TEST_F(LibYUVPlanarTest, TestDetilePlane) {
int i, j;
// orig is tiled. Allocate enough memory for tiles.
- int orig_width = (benchmark_width_ + 15) & ~15;
- int orig_height = (benchmark_height_ + 15) & ~15;
- int orig_plane_size = orig_width * orig_height;
+ int tile_width = (benchmark_width_ + 15) & ~15;
+ int tile_height = (benchmark_height_ + 15) & ~15;
+ int tile_plane_size = tile_width * tile_height;
int y_plane_size = benchmark_width_ * benchmark_height_;
- align_buffer_page_end(orig_y, orig_plane_size);
+ align_buffer_page_end(tile_y, tile_plane_size);
align_buffer_page_end(dst_c, y_plane_size);
align_buffer_page_end(dst_opt, y_plane_size);
- MemRandomize(orig_y, orig_plane_size);
+ MemRandomize(tile_y, tile_plane_size);
memset(dst_c, 0, y_plane_size);
memset(dst_opt, 0, y_plane_size);
// Disable all optimizations.
MaskCpuFlags(disable_cpu_flags_);
for (j = 0; j < benchmark_iterations_; j++) {
- DetilePlane(orig_y, orig_width, dst_c, benchmark_width_, benchmark_width_,
+ DetilePlane(tile_y, tile_width, dst_c, benchmark_width_, benchmark_width_,
benchmark_height_, 16);
}
// Enable optimizations.
MaskCpuFlags(benchmark_cpu_info_);
for (j = 0; j < benchmark_iterations_; j++) {
- DetilePlane(orig_y, orig_width, dst_opt, benchmark_width_, benchmark_width_,
+ DetilePlane(tile_y, tile_width, dst_opt, benchmark_width_, benchmark_width_,
benchmark_height_, 16);
}
@@ -1668,7 +1668,46 @@ TEST_F(LibYUVPlanarTest, TestDetilePlane) {
EXPECT_EQ(dst_c[i], dst_opt[i]);
}
- free_aligned_buffer_page_end(orig_y);
+ free_aligned_buffer_page_end(tile_y);
+ free_aligned_buffer_page_end(dst_c);
+ free_aligned_buffer_page_end(dst_opt);
+}
+
+TEST_F(LibYUVPlanarTest, TestDetilePlane_16) {
+ int i, j;
+
+ // orig is tiled. Allocate enough memory for tiles.
+ int tile_width = (benchmark_width_ + 15) & ~15;
+ int tile_height = (benchmark_height_ + 15) & ~15;
+ int tile_plane_size = tile_width * tile_height * 2;
+ int y_plane_size = benchmark_width_ * benchmark_height_ * 2;
+ align_buffer_page_end(tile_y, tile_plane_size);
+ align_buffer_page_end(dst_c, y_plane_size);
+ align_buffer_page_end(dst_opt, y_plane_size);
+
+ MemRandomize(tile_y, tile_plane_size);
+ memset(dst_c, 0, y_plane_size);
+ memset(dst_opt, 0, y_plane_size);
+
+ // Disable all optimizations.
+ MaskCpuFlags(disable_cpu_flags_);
+ for (j = 0; j < benchmark_iterations_; j++) {
+ DetilePlane_16((const uint16_t*)tile_y, tile_width, (uint16_t*)dst_c,
+ benchmark_width_, benchmark_width_, benchmark_height_, 16);
+ }
+
+ // Enable optimizations.
+ MaskCpuFlags(benchmark_cpu_info_);
+ for (j = 0; j < benchmark_iterations_; j++) {
+ DetilePlane_16((const uint16_t*)tile_y, tile_width, (uint16_t*)dst_opt,
+ benchmark_width_, benchmark_width_, benchmark_height_, 16);
+ }
+
+ for (i = 0; i < y_plane_size; ++i) {
+ EXPECT_EQ(dst_c[i], dst_opt[i]);
+ }
+
+ free_aligned_buffer_page_end(tile_y);
free_aligned_buffer_page_end(dst_c);
free_aligned_buffer_page_end(dst_opt);
}
@@ -1678,33 +1717,33 @@ TEST_F(LibYUVPlanarTest, TestDetileSplitUVPlane_Correctness) {
int i, j;
// orig is tiled. Allocate enough memory for tiles.
- int orig_width = (benchmark_width_ + 15) & ~15;
- int orig_height = (benchmark_height_ + 15) & ~15;
- int orig_plane_size = orig_width * orig_height;
+ int tile_width = (benchmark_width_ + 15) & ~15;
+ int tile_height = (benchmark_height_ + 15) & ~15;
+ int tile_plane_size = tile_width * tile_height;
int uv_plane_size = ((benchmark_width_ + 1) / 2) * benchmark_height_;
- align_buffer_page_end(orig_uv, orig_plane_size);
- align_buffer_page_end(detiled_uv, orig_plane_size);
+ align_buffer_page_end(tile_uv, tile_plane_size);
+ align_buffer_page_end(detiled_uv, tile_plane_size);
align_buffer_page_end(dst_u_two_stage, uv_plane_size);
align_buffer_page_end(dst_u_opt, uv_plane_size);
align_buffer_page_end(dst_v_two_stage, uv_plane_size);
align_buffer_page_end(dst_v_opt, uv_plane_size);
- MemRandomize(orig_uv, orig_plane_size);
- memset(detiled_uv, 0, orig_plane_size);
+ MemRandomize(tile_uv, tile_plane_size);
+ memset(detiled_uv, 0, tile_plane_size);
memset(dst_u_two_stage, 0, uv_plane_size);
memset(dst_u_opt, 0, uv_plane_size);
memset(dst_v_two_stage, 0, uv_plane_size);
memset(dst_v_opt, 0, uv_plane_size);
- DetileSplitUVPlane(orig_uv, orig_width, dst_u_opt, (benchmark_width_ + 1) / 2,
+ DetileSplitUVPlane(tile_uv, tile_width, dst_u_opt, (benchmark_width_ + 1) / 2,
dst_v_opt, (benchmark_width_ + 1) / 2, benchmark_width_,
benchmark_height_, 16);
// Benchmark 2 step conversion for comparison.
for (j = 0; j < benchmark_iterations_; j++) {
- DetilePlane(orig_uv, orig_width, detiled_uv, benchmark_width_,
+ DetilePlane(tile_uv, tile_width, detiled_uv, benchmark_width_,
benchmark_width_, benchmark_height_, 16);
- SplitUVPlane(detiled_uv, orig_width, dst_u_two_stage,
+ SplitUVPlane(detiled_uv, tile_width, dst_u_two_stage,
(benchmark_width_ + 1) / 2, dst_v_two_stage,
(benchmark_width_ + 1) / 2, (benchmark_width_ + 1) / 2,
benchmark_height_);
@@ -1715,7 +1754,7 @@ TEST_F(LibYUVPlanarTest, TestDetileSplitUVPlane_Correctness) {
EXPECT_EQ(dst_v_two_stage[i], dst_v_opt[i]);
}
- free_aligned_buffer_page_end(orig_uv);
+ free_aligned_buffer_page_end(tile_uv);
free_aligned_buffer_page_end(detiled_uv);
free_aligned_buffer_page_end(dst_u_two_stage);
free_aligned_buffer_page_end(dst_u_opt);
@@ -1727,17 +1766,17 @@ TEST_F(LibYUVPlanarTest, TestDetileSplitUVPlane_Benchmark) {
int i, j;
// orig is tiled. Allocate enough memory for tiles.
- int orig_width = (benchmark_width_ + 15) & ~15;
- int orig_height = (benchmark_height_ + 15) & ~15;
- int orig_plane_size = orig_width * orig_height;
+ int tile_width = (benchmark_width_ + 15) & ~15;
+ int tile_height = (benchmark_height_ + 15) & ~15;
+ int tile_plane_size = tile_width * tile_height;
int uv_plane_size = ((benchmark_width_ + 1) / 2) * benchmark_height_;
- align_buffer_page_end(orig_uv, orig_plane_size);
+ align_buffer_page_end(tile_uv, tile_plane_size);
align_buffer_page_end(dst_u_c, uv_plane_size);
align_buffer_page_end(dst_u_opt, uv_plane_size);
align_buffer_page_end(dst_v_c, uv_plane_size);
align_buffer_page_end(dst_v_opt, uv_plane_size);
- MemRandomize(orig_uv, orig_plane_size);
+ MemRandomize(tile_uv, tile_plane_size);
memset(dst_u_c, 0, uv_plane_size);
memset(dst_u_opt, 0, uv_plane_size);
memset(dst_v_c, 0, uv_plane_size);
@@ -1746,7 +1785,7 @@ TEST_F(LibYUVPlanarTest, TestDetileSplitUVPlane_Benchmark) {
// Disable all optimizations.
MaskCpuFlags(disable_cpu_flags_);
- DetileSplitUVPlane(orig_uv, orig_width, dst_u_c, (benchmark_width_ + 1) / 2,
+ DetileSplitUVPlane(tile_uv, tile_width, dst_u_c, (benchmark_width_ + 1) / 2,
dst_v_c, (benchmark_width_ + 1) / 2, benchmark_width_,
benchmark_height_, 16);
@@ -1755,7 +1794,7 @@ TEST_F(LibYUVPlanarTest, TestDetileSplitUVPlane_Benchmark) {
for (j = 0; j < benchmark_iterations_; j++) {
DetileSplitUVPlane(
- orig_uv, orig_width, dst_u_opt, (benchmark_width_ + 1) / 2, dst_v_opt,
+ tile_uv, tile_width, dst_u_opt, (benchmark_width_ + 1) / 2, dst_v_opt,
(benchmark_width_ + 1) / 2, benchmark_width_, benchmark_height_, 16);
}
@@ -1764,7 +1803,7 @@ TEST_F(LibYUVPlanarTest, TestDetileSplitUVPlane_Benchmark) {
EXPECT_EQ(dst_v_c[i], dst_v_opt[i]);
}
- free_aligned_buffer_page_end(orig_uv);
+ free_aligned_buffer_page_end(tile_uv);
free_aligned_buffer_page_end(dst_u_c);
free_aligned_buffer_page_end(dst_u_opt);
free_aligned_buffer_page_end(dst_v_c);
@@ -3495,8 +3534,8 @@ TESTTPLANARTOP(MergeXR30, uint16_t, uint8_t, 16)
// TODO(fbarchard): improve test for platforms and cpu detect
#ifdef HAS_MERGEUVROW_16_AVX2
TEST_F(LibYUVPlanarTest, MergeUVRow_16_Opt) {
- // Round count up to multiple of 16
- const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15;
+ // Round count up to multiple of 8
+ const int kPixels = (benchmark_width_ * benchmark_height_ + 7) & ~7;
align_buffer_page_end(src_pixels_u, kPixels * 2);
align_buffer_page_end(src_pixels_v, kPixels * 2);
diff --git a/files/unit_test/rotate_argb_test.cc b/files/unit_test/rotate_argb_test.cc
index 01ed69ca..74952c4e 100644
--- a/files/unit_test/rotate_argb_test.cc
+++ b/files/unit_test/rotate_argb_test.cc
@@ -225,4 +225,110 @@ TEST_F(LibYUVRotateTest, RotatePlane90_TestStride) {
free_aligned_buffer_page_end(src_argb);
}
+static void TestRotatePlane_16(int src_width,
+ int src_height,
+ int dst_width,
+ int dst_height,
+ libyuv::RotationMode mode,
+ int benchmark_iterations,
+ int disable_cpu_flags,
+ int benchmark_cpu_info) {
+ if (src_width < 1) {
+ src_width = 1;
+ }
+ if (src_height < 1) {
+ src_height = 1;
+ }
+ if (dst_width < 1) {
+ dst_width = 1;
+ }
+ if (dst_height < 1) {
+ dst_height = 1;
+ }
+ int src_stride = src_width;
+ int src_plane_size = src_stride * abs(src_height);
+ align_buffer_page_end_16(src, src_plane_size);
+ for (int i = 0; i < src_plane_size; ++i) {
+ src[i] = fastrand() & 0xff;
+ }
+
+ int dst_stride = dst_width;
+ int dst_plane_size = dst_stride * dst_height;
+ align_buffer_page_end_16(dst_c, dst_plane_size);
+ align_buffer_page_end_16(dst_opt, dst_plane_size);
+ memset(dst_c, 2, dst_plane_size);
+ memset(dst_opt, 3, dst_plane_size);
+
+ MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
+ RotatePlane_16(src, src_stride, dst_c, dst_stride, src_width, src_height,
+ mode);
+
+ MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization.
+ for (int i = 0; i < benchmark_iterations; ++i) {
+ RotatePlane_16(src, src_stride, dst_opt, dst_stride, src_width, src_height,
+ mode);
+ }
+
+ // Rotation should be exact.
+ for (int i = 0; i < dst_plane_size; ++i) {
+ EXPECT_EQ(dst_c[i], dst_opt[i]);
+ }
+
+ free_aligned_buffer_page_end_16(dst_c);
+ free_aligned_buffer_page_end_16(dst_opt);
+ free_aligned_buffer_page_end_16(src);
+}
+
+TEST_F(LibYUVRotateTest, RotatePlane0_16_Opt) {
+ TestRotatePlane_16(benchmark_width_, benchmark_height_, benchmark_width_,
+ benchmark_height_, kRotate0, benchmark_iterations_,
+ disable_cpu_flags_, benchmark_cpu_info_);
+}
+
+TEST_F(LibYUVRotateTest, RotatePlane90_16_Opt) {
+ TestRotatePlane_16(benchmark_width_, benchmark_height_, benchmark_height_,
+ benchmark_width_, kRotate90, benchmark_iterations_,
+ disable_cpu_flags_, benchmark_cpu_info_);
+}
+
+TEST_F(LibYUVRotateTest, RotatePlane180_16_Opt) {
+ TestRotatePlane_16(benchmark_width_, benchmark_height_, benchmark_width_,
+ benchmark_height_, kRotate180, benchmark_iterations_,
+ disable_cpu_flags_, benchmark_cpu_info_);
+}
+
+TEST_F(LibYUVRotateTest, RotatePlane270_16_Opt) {
+ TestRotatePlane_16(benchmark_width_, benchmark_height_, benchmark_height_,
+ benchmark_width_, kRotate270, benchmark_iterations_,
+ disable_cpu_flags_, benchmark_cpu_info_);
+}
+
+TEST_F(LibYUVRotateTest, RotatePlane0_16_Odd) {
+ TestRotatePlane_16(benchmark_width_ + 1, benchmark_height_ + 1,
+ benchmark_width_ + 1, benchmark_height_ + 1, kRotate0,
+ benchmark_iterations_, disable_cpu_flags_,
+ benchmark_cpu_info_);
+}
+
+TEST_F(LibYUVRotateTest, RotatePlane90_16_Odd) {
+ TestRotatePlane_16(benchmark_width_ + 1, benchmark_height_ + 1,
+ benchmark_height_ + 1, benchmark_width_ + 1, kRotate90,
+ benchmark_iterations_, disable_cpu_flags_,
+ benchmark_cpu_info_);
+}
+
+TEST_F(LibYUVRotateTest, RotatePlane180_16_Odd) {
+ TestRotatePlane_16(benchmark_width_ + 1, benchmark_height_ + 1,
+ benchmark_width_ + 1, benchmark_height_ + 1, kRotate180,
+ benchmark_iterations_, disable_cpu_flags_,
+ benchmark_cpu_info_);
+}
+
+TEST_F(LibYUVRotateTest, RotatePlane270_16_Odd) {
+ TestRotatePlane_16(benchmark_width_ + 1, benchmark_height_ + 1,
+ benchmark_height_ + 1, benchmark_width_ + 1, kRotate270,
+ benchmark_iterations_, disable_cpu_flags_,
+ benchmark_cpu_info_);
+}
+
} // namespace libyuv
diff --git a/files/unit_test/rotate_test.cc b/files/unit_test/rotate_test.cc
index d3887414..abc08efa 100644
--- a/files/unit_test/rotate_test.cc
+++ b/files/unit_test/rotate_test.cc
@@ -14,6 +14,10 @@
#include "libyuv/cpu_id.h"
#include "libyuv/rotate.h"
+#ifdef ENABLE_ROW_TESTS
+#include "libyuv/rotate_row.h"
+#endif
+
namespace libyuv {
#define SUBSAMPLE(v, a) ((((v) + (a)-1)) / (a))
@@ -596,4 +600,363 @@ TESTAPLANARTOP(Android420, NV21, 2, 1, 0, 2, 2, I420, 2, 2)
#undef TESTAPLANARTOP
#undef TESTAPLANARTOPI
+static void I010TestRotate(int src_width,
+ int src_height,
+ int dst_width,
+ int dst_height,
+ libyuv::RotationMode mode,
+ int benchmark_iterations,
+ int disable_cpu_flags,
+ int benchmark_cpu_info) {
+ if (src_width < 1) {
+ src_width = 1;
+ }
+ if (src_height == 0) {
+ src_height = 1;
+ }
+ if (dst_width < 1) {
+ dst_width = 1;
+ }
+ if (dst_height < 1) {
+ dst_height = 1;
+ }
+ int src_i010_y_size = src_width * Abs(src_height);
+ int src_i010_uv_size = ((src_width + 1) / 2) * ((Abs(src_height) + 1) / 2);
+ int src_i010_size = src_i010_y_size + src_i010_uv_size * 2;
+ align_buffer_page_end_16(src_i010, src_i010_size);
+ for (int i = 0; i < src_i010_size; ++i) {
+ src_i010[i] = fastrand() & 0x3ff;
+ }
+
+ int dst_i010_y_size = dst_width * dst_height;
+ int dst_i010_uv_size = ((dst_width + 1) / 2) * ((dst_height + 1) / 2);
+ int dst_i010_size = dst_i010_y_size + dst_i010_uv_size * 2;
+ align_buffer_page_end_16(dst_i010_c, dst_i010_size);
+ align_buffer_page_end_16(dst_i010_opt, dst_i010_size);
+ memset(dst_i010_c, 2, dst_i010_size * 2);
+ memset(dst_i010_opt, 3, dst_i010_size * 2);
+
+ MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
+ I010Rotate(src_i010, src_width, src_i010 + src_i010_y_size,
+ (src_width + 1) / 2, src_i010 + src_i010_y_size + src_i010_uv_size,
+ (src_width + 1) / 2, dst_i010_c, dst_width,
+ dst_i010_c + dst_i010_y_size, (dst_width + 1) / 2,
+ dst_i010_c + dst_i010_y_size + dst_i010_uv_size,
+ (dst_width + 1) / 2, src_width, src_height, mode);
+
+ MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization.
+ for (int i = 0; i < benchmark_iterations; ++i) {
+ I010Rotate(
+ src_i010, src_width, src_i010 + src_i010_y_size, (src_width + 1) / 2,
+ src_i010 + src_i010_y_size + src_i010_uv_size, (src_width + 1) / 2,
+ dst_i010_opt, dst_width, dst_i010_opt + dst_i010_y_size,
+ (dst_width + 1) / 2, dst_i010_opt + dst_i010_y_size + dst_i010_uv_size,
+ (dst_width + 1) / 2, src_width, src_height, mode);
+ }
+
+ // Rotation should be exact.
+ for (int i = 0; i < dst_i010_size; ++i) {
+ EXPECT_EQ(dst_i010_c[i], dst_i010_opt[i]);
+ }
+
+ free_aligned_buffer_page_end_16(dst_i010_c);
+ free_aligned_buffer_page_end_16(dst_i010_opt);
+ free_aligned_buffer_page_end_16(src_i010);
+}
+
+TEST_F(LibYUVRotateTest, I010Rotate0_Opt) {
+ I010TestRotate(benchmark_width_, benchmark_height_, benchmark_width_,
+ benchmark_height_, kRotate0, benchmark_iterations_,
+ disable_cpu_flags_, benchmark_cpu_info_);
+}
+
+TEST_F(LibYUVRotateTest, I010Rotate90_Opt) {
+ I010TestRotate(benchmark_width_, benchmark_height_, benchmark_height_,
+ benchmark_width_, kRotate90, benchmark_iterations_,
+ disable_cpu_flags_, benchmark_cpu_info_);
+}
+
+TEST_F(LibYUVRotateTest, I010Rotate180_Opt) {
+ I010TestRotate(benchmark_width_, benchmark_height_, benchmark_width_,
+ benchmark_height_, kRotate180, benchmark_iterations_,
+ disable_cpu_flags_, benchmark_cpu_info_);
+}
+
+TEST_F(LibYUVRotateTest, I010Rotate270_Opt) {
+ I010TestRotate(benchmark_width_, benchmark_height_, benchmark_height_,
+ benchmark_width_, kRotate270, benchmark_iterations_,
+ disable_cpu_flags_, benchmark_cpu_info_);
+}
+
+static void I210TestRotate(int src_width,
+ int src_height,
+ int dst_width,
+ int dst_height,
+ libyuv::RotationMode mode,
+ int benchmark_iterations,
+ int disable_cpu_flags,
+ int benchmark_cpu_info) {
+ if (src_width < 1) {
+ src_width = 1;
+ }
+ if (src_height == 0) {
+ src_height = 1;
+ }
+ if (dst_width < 1) {
+ dst_width = 1;
+ }
+ if (dst_height < 1) {
+ dst_height = 1;
+ }
+ int src_i210_y_size = src_width * Abs(src_height);
+ int src_i210_uv_size = ((src_width + 1) / 2) * Abs(src_height);
+ int src_i210_size = src_i210_y_size + src_i210_uv_size * 2;
+ align_buffer_page_end_16(src_i210, src_i210_size);
+ for (int i = 0; i < src_i210_size; ++i) {
+ src_i210[i] = fastrand() & 0x3ff;
+ }
+
+ int dst_i210_y_size = dst_width * dst_height;
+ int dst_i210_uv_size = ((dst_width + 1) / 2) * dst_height;
+ int dst_i210_size = dst_i210_y_size + dst_i210_uv_size * 2;
+ align_buffer_page_end_16(dst_i210_c, dst_i210_size);
+ align_buffer_page_end_16(dst_i210_opt, dst_i210_size);
+ memset(dst_i210_c, 2, dst_i210_size * 2);
+ memset(dst_i210_opt, 3, dst_i210_size * 2);
+
+ MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
+ I210Rotate(src_i210, src_width, src_i210 + src_i210_y_size,
+ (src_width + 1) / 2, src_i210 + src_i210_y_size + src_i210_uv_size,
+ (src_width + 1) / 2, dst_i210_c, dst_width,
+ dst_i210_c + dst_i210_y_size, (dst_width + 1) / 2,
+ dst_i210_c + dst_i210_y_size + dst_i210_uv_size,
+ (dst_width + 1) / 2, src_width, src_height, mode);
+
+ MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization.
+ for (int i = 0; i < benchmark_iterations; ++i) {
+ I210Rotate(
+ src_i210, src_width, src_i210 + src_i210_y_size, (src_width + 1) / 2,
+ src_i210 + src_i210_y_size + src_i210_uv_size, (src_width + 1) / 2,
+ dst_i210_opt, dst_width, dst_i210_opt + dst_i210_y_size,
+ (dst_width + 1) / 2, dst_i210_opt + dst_i210_y_size + dst_i210_uv_size,
+ (dst_width + 1) / 2, src_width, src_height, mode);
+ }
+
+ // Rotation should be exact.
+ for (int i = 0; i < dst_i210_size; ++i) {
+ EXPECT_EQ(dst_i210_c[i], dst_i210_opt[i]);
+ }
+
+ free_aligned_buffer_page_end_16(dst_i210_c);
+ free_aligned_buffer_page_end_16(dst_i210_opt);
+ free_aligned_buffer_page_end_16(src_i210);
+}
+
+TEST_F(LibYUVRotateTest, I210Rotate0_Opt) {
+ I210TestRotate(benchmark_width_, benchmark_height_, benchmark_width_,
+ benchmark_height_, kRotate0, benchmark_iterations_,
+ disable_cpu_flags_, benchmark_cpu_info_);
+}
+
+TEST_F(LibYUVRotateTest, I210Rotate90_Opt) {
+ I210TestRotate(benchmark_width_, benchmark_height_, benchmark_height_,
+ benchmark_width_, kRotate90, benchmark_iterations_,
+ disable_cpu_flags_, benchmark_cpu_info_);
+}
+
+TEST_F(LibYUVRotateTest, I210Rotate180_Opt) {
+ I210TestRotate(benchmark_width_, benchmark_height_, benchmark_width_,
+ benchmark_height_, kRotate180, benchmark_iterations_,
+ disable_cpu_flags_, benchmark_cpu_info_);
+}
+
+TEST_F(LibYUVRotateTest, I210Rotate270_Opt) {
+ I210TestRotate(benchmark_width_, benchmark_height_, benchmark_height_,
+ benchmark_width_, kRotate270, benchmark_iterations_,
+ disable_cpu_flags_, benchmark_cpu_info_);
+}
+
+static void I410TestRotate(int src_width,
+ int src_height,
+ int dst_width,
+ int dst_height,
+ libyuv::RotationMode mode,
+ int benchmark_iterations,
+ int disable_cpu_flags,
+ int benchmark_cpu_info) {
+ if (src_width < 1) {
+ src_width = 1;
+ }
+ if (src_height == 0) {
+ src_height = 1;
+ }
+ if (dst_width < 1) {
+ dst_width = 1;
+ }
+ if (dst_height < 1) {
+ dst_height = 1;
+ }
+ int src_i410_y_size = src_width * Abs(src_height);
+ int src_i410_uv_size = src_width * Abs(src_height);
+ int src_i410_size = src_i410_y_size + src_i410_uv_size * 2;
+ align_buffer_page_end_16(src_i410, src_i410_size);
+ for (int i = 0; i < src_i410_size; ++i) {
+ src_i410[i] = fastrand() & 0x3ff;
+ }
+
+ int dst_i410_y_size = dst_width * dst_height;
+ int dst_i410_uv_size = dst_width * dst_height;
+ int dst_i410_size = dst_i410_y_size + dst_i410_uv_size * 2;
+ align_buffer_page_end_16(dst_i410_c, dst_i410_size);
+ align_buffer_page_end_16(dst_i410_opt, dst_i410_size);
+ memset(dst_i410_c, 2, dst_i410_size * 2);
+ memset(dst_i410_opt, 3, dst_i410_size * 2);
+
+ MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
+ I410Rotate(src_i410, src_width, src_i410 + src_i410_y_size, src_width,
+ src_i410 + src_i410_y_size + src_i410_uv_size, src_width,
+ dst_i410_c, dst_width, dst_i410_c + dst_i410_y_size, dst_width,
+ dst_i410_c + dst_i410_y_size + dst_i410_uv_size, dst_width,
+ src_width, src_height, mode);
+
+ MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization.
+ for (int i = 0; i < benchmark_iterations; ++i) {
+ I410Rotate(src_i410, src_width, src_i410 + src_i410_y_size, src_width,
+ src_i410 + src_i410_y_size + src_i410_uv_size, src_width,
+ dst_i410_opt, dst_width, dst_i410_opt + dst_i410_y_size,
+ dst_width, dst_i410_opt + dst_i410_y_size + dst_i410_uv_size,
+ dst_width, src_width, src_height, mode);
+ }
+
+ // Rotation should be exact.
+ for (int i = 0; i < dst_i410_size; ++i) {
+ EXPECT_EQ(dst_i410_c[i], dst_i410_opt[i]);
+ }
+
+ free_aligned_buffer_page_end_16(dst_i410_c);
+ free_aligned_buffer_page_end_16(dst_i410_opt);
+ free_aligned_buffer_page_end_16(src_i410);
+}
+
+TEST_F(LibYUVRotateTest, I410Rotate0_Opt) {
+ I410TestRotate(benchmark_width_, benchmark_height_, benchmark_width_,
+ benchmark_height_, kRotate0, benchmark_iterations_,
+ disable_cpu_flags_, benchmark_cpu_info_);
+}
+
+TEST_F(LibYUVRotateTest, I410Rotate90_Opt) {
+ I410TestRotate(benchmark_width_, benchmark_height_, benchmark_height_,
+ benchmark_width_, kRotate90, benchmark_iterations_,
+ disable_cpu_flags_, benchmark_cpu_info_);
+}
+
+TEST_F(LibYUVRotateTest, I410Rotate180_Opt) {
+ I410TestRotate(benchmark_width_, benchmark_height_, benchmark_width_,
+ benchmark_height_, kRotate180, benchmark_iterations_,
+ disable_cpu_flags_, benchmark_cpu_info_);
+}
+
+TEST_F(LibYUVRotateTest, I410Rotate270_Opt) {
+ I410TestRotate(benchmark_width_, benchmark_height_, benchmark_height_,
+ benchmark_width_, kRotate270, benchmark_iterations_,
+ disable_cpu_flags_, benchmark_cpu_info_);
+}
+
+#if defined(ENABLE_ROW_TESTS)
+
+TEST_F(LibYUVRotateTest, Transpose4x4_Test) {
+ // dst width and height
+ const int width = 4;
+ const int height = 4;
+ int src_pixels[4][4];
+ int dst_pixels_c[4][4];
+ int dst_pixels_opt[4][4];
+
+ for (int i = 0; i < 4; ++i) {
+ for (int j = 0; j < 4; ++j) {
+ src_pixels[i][j] = i * 10 + j;
+ }
+ }
+ memset(dst_pixels_c, 1, width * height * 4);
+ memset(dst_pixels_opt, 2, width * height * 4);
+
+ Transpose4x4_32_C((const uint8_t*)src_pixels, height * 4,
+ (uint8_t*)dst_pixels_c, width * 4, width);
+
+ const int benchmark_iterations =
+ (benchmark_iterations_ * benchmark_width_ * benchmark_height_ + 15) /
+ (4 * 4);
+ for (int i = 0; i < benchmark_iterations; ++i) {
+#if defined(HAS_TRANSPOSE4X4_32_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ Transpose4x4_32_NEON((const uint8_t*)src_pixels, height * 4,
+ (uint8_t*)dst_pixels_opt, width * 4, width);
+ } else
+#elif defined(HAS_TRANSPOSE4X4_32_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2)) {
+ Transpose4x4_32_SSE2((const uint8_t*)src_pixels, height * 4,
+ (uint8_t*)dst_pixels_opt, width * 4, width);
+ } else
+#endif
+ {
+ Transpose4x4_32_C((const uint8_t*)src_pixels, height * 4,
+ (uint8_t*)dst_pixels_opt, width * 4, width);
+ }
+ }
+
+ for (int i = 0; i < 4; ++i) {
+ for (int j = 0; j < 4; ++j) {
+ EXPECT_EQ(dst_pixels_c[i][j], src_pixels[j][i]);
+ EXPECT_EQ(dst_pixels_c[i][j], dst_pixels_opt[i][j]);
+ }
+ }
+}
+
+TEST_F(LibYUVRotateTest, Transpose4x4_Opt) {
+ // dst width and height
+ const int width = ((benchmark_width_ * benchmark_height_ + 3) / 4 + 3) & ~3;
+ const int height = 4;
+ align_buffer_page_end(src_pixels, height * width * 4);
+ align_buffer_page_end(dst_pixels_c, width * height * 4);
+ align_buffer_page_end(dst_pixels_opt, width * height * 4);
+
+ MemRandomize(src_pixels, height * width * 4);
+ memset(dst_pixels_c, 1, width * height * 4);
+ memset(dst_pixels_opt, 2, width * height * 4);
+
+ Transpose4x4_32_C((const uint8_t*)src_pixels, height * 4,
+ (uint8_t*)dst_pixels_c, width * 4, width);
+
+ for (int i = 0; i < benchmark_iterations_; ++i) {
+#if defined(HAS_TRANSPOSE4X4_32_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ Transpose4x4_32_NEON((const uint8_t*)src_pixels, height * 4,
+ (uint8_t*)dst_pixels_opt, width * 4, width);
+ } else
+#elif defined(HAS_TRANSPOSE4X4_32_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ Transpose4x4_32_AVX2((const uint8_t*)src_pixels, height * 4,
+ (uint8_t*)dst_pixels_opt, width * 4, width);
+ } else if (TestCpuFlag(kCpuHasSSE2)) {
+ Transpose4x4_32_SSE2((const uint8_t*)src_pixels, height * 4,
+ (uint8_t*)dst_pixels_opt, width * 4, width);
+ } else
+#endif
+ {
+ Transpose4x4_32_C((const uint8_t*)src_pixels, height * 4,
+ (uint8_t*)dst_pixels_opt, width * 4, width);
+ }
+ }
+
+ for (int i = 0; i < width * height; ++i) {
+ EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
+ }
+
+ free_aligned_buffer_page_end(src_pixels);
+ free_aligned_buffer_page_end(dst_pixels_c);
+ free_aligned_buffer_page_end(dst_pixels_opt);
+}
+
+#endif // ENABLE_ROW_TESTS
+
} // namespace libyuv
diff --git a/files/unit_test/scale_uv_test.cc b/files/unit_test/scale_uv_test.cc
index 3d524bef..dab217c9 100644
--- a/files/unit_test/scale_uv_test.cc
+++ b/files/unit_test/scale_uv_test.cc
@@ -39,55 +39,35 @@ static int UVTestFilter(int src_width,
return 0;
}
- int i, j;
- const int b = 0; // 128 to test for padding/stride.
- int64_t src_uv_plane_size =
- (Abs(src_width) + b * 2) * (Abs(src_height) + b * 2) * 2LL;
- int src_stride_uv = (b * 2 + Abs(src_width)) * 2;
+ int i;
+ int64_t src_uv_plane_size = Abs(src_width) * Abs(src_height) * 2LL;
+ int src_stride_uv = Abs(src_width) * 2;
+ int64_t dst_uv_plane_size = dst_width * dst_height * 2LL;
+ int dst_stride_uv = dst_width * 2;
align_buffer_page_end(src_uv, src_uv_plane_size);
- if (!src_uv) {
- printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
- return 0;
- }
- MemRandomize(src_uv, src_uv_plane_size);
-
- int64_t dst_uv_plane_size = (dst_width + b * 2) * (dst_height + b * 2) * 2LL;
- int dst_stride_uv = (b * 2 + dst_width) * 2;
-
align_buffer_page_end(dst_uv_c, dst_uv_plane_size);
align_buffer_page_end(dst_uv_opt, dst_uv_plane_size);
- if (!dst_uv_c || !dst_uv_opt) {
+
+ if (!src_uv || !dst_uv_c || !dst_uv_opt) {
printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
return 0;
}
+ MemRandomize(src_uv, src_uv_plane_size);
memset(dst_uv_c, 2, dst_uv_plane_size);
- memset(dst_uv_opt, 3, dst_uv_plane_size);
-
- // Warm up both versions for consistent benchmarks.
- MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
- UVScale(src_uv + (src_stride_uv * b) + b * 2, src_stride_uv, src_width,
- src_height, dst_uv_c + (dst_stride_uv * b) + b * 2, dst_stride_uv,
- dst_width, dst_height, f);
- MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization.
- UVScale(src_uv + (src_stride_uv * b) + b * 2, src_stride_uv, src_width,
- src_height, dst_uv_opt + (dst_stride_uv * b) + b * 2, dst_stride_uv,
- dst_width, dst_height, f);
+ memset(dst_uv_opt, 123, dst_uv_plane_size);
MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
double c_time = get_time();
- UVScale(src_uv + (src_stride_uv * b) + b * 2, src_stride_uv, src_width,
- src_height, dst_uv_c + (dst_stride_uv * b) + b * 2, dst_stride_uv,
+ UVScale(src_uv, src_stride_uv, src_width, src_height, dst_uv_c, dst_stride_uv,
dst_width, dst_height, f);
-
c_time = (get_time() - c_time);
MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization.
double opt_time = get_time();
for (i = 0; i < benchmark_iterations; ++i) {
- UVScale(src_uv + (src_stride_uv * b) + b * 2, src_stride_uv, src_width,
- src_height, dst_uv_opt + (dst_stride_uv * b) + b * 2, dst_stride_uv,
- dst_width, dst_height, f);
+ UVScale(src_uv, src_stride_uv, src_width, src_height, dst_uv_opt,
+ dst_stride_uv, dst_width, dst_height, f);
}
opt_time = (get_time() - opt_time) / benchmark_iterations;
@@ -95,18 +75,11 @@ static int UVTestFilter(int src_width,
printf("filter %d - %8d us C - %8d us OPT\n", f,
static_cast<int>(c_time * 1e6), static_cast<int>(opt_time * 1e6));
- // C version may be a little off from the optimized. Order of
- // operations may introduce rounding somewhere. So do a difference
- // of the buffers and look to see that the max difference isn't
- // over 2.
int max_diff = 0;
- for (i = b; i < (dst_height + b); ++i) {
- for (j = b * 2; j < (dst_width + b) * 2; ++j) {
- int abs_diff = Abs(dst_uv_c[(i * dst_stride_uv) + j] -
- dst_uv_opt[(i * dst_stride_uv) + j]);
- if (abs_diff > max_diff) {
- max_diff = abs_diff;
- }
+ for (i = 0; i < dst_uv_plane_size; ++i) {
+ int abs_diff = Abs(dst_uv_c[i] - dst_uv_opt[i]);
+ if (abs_diff > max_diff) {
+ max_diff = abs_diff;
}
}
@@ -121,28 +94,26 @@ static int UVTestFilter(int src_width,
#define DX(x, nom, denom) static_cast<int>((Abs(x) / nom) * nom)
#define SX(x, nom, denom) static_cast<int>((x / nom) * denom)
-#define TEST_FACTOR1(name, filter, nom, denom, max_diff) \
+#define TEST_FACTOR1(name, filter, nom, denom) \
TEST_F(LibYUVScaleTest, UVScaleDownBy##name##_##filter) { \
int diff = UVTestFilter( \
SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \
DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \
kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \
benchmark_cpu_info_); \
- EXPECT_LE(diff, max_diff); \
+ EXPECT_EQ(0, diff); \
}
#if defined(ENABLE_FULL_TESTS)
-// Test a scale factor with all 4 filters. Expect unfiltered to be exact, but
-// filtering is different fixed point implementations for SSSE3, Neon and C.
-#define TEST_FACTOR(name, nom, denom) \
- TEST_FACTOR1(name, None, nom, denom, 0) \
- TEST_FACTOR1(name, Linear, nom, denom, 3) \
- TEST_FACTOR1(name, Bilinear, nom, denom, 3) \
- TEST_FACTOR1(name, Box, nom, denom, 3)
+// Test a scale factor with all 4 filters. Expect exact for SIMD vs C.
+#define TEST_FACTOR(name, nom, denom) \
+ TEST_FACTOR1(name, None, nom, denom) \
+ TEST_FACTOR1(name, Linear, nom, denom) \
+ TEST_FACTOR1(name, Bilinear, nom, denom) \
+ TEST_FACTOR1(name, Box, nom, denom)
#else
// Test a scale factor with Bilinear.
-#define TEST_FACTOR(name, nom, denom) \
- TEST_FACTOR1(name, Bilinear, nom, denom, 3)
+#define TEST_FACTOR(name, nom, denom) TEST_FACTOR1(name, Bilinear, nom, denom)
#endif
TEST_FACTOR(2, 1, 2)
diff --git a/files/unit_test/testdata/riscv64.txt b/files/unit_test/testdata/riscv64.txt
new file mode 100644
index 00000000..fbb4200f
--- /dev/null
+++ b/files/unit_test/testdata/riscv64.txt
@@ -0,0 +1,4 @@
+processor : 0
+hart : 1
+isa : rv64imac
+mmu : sv48 \ No newline at end of file
diff --git a/files/unit_test/testdata/riscv64_rvv.txt b/files/unit_test/testdata/riscv64_rvv.txt
new file mode 100644
index 00000000..af1b3f36
--- /dev/null
+++ b/files/unit_test/testdata/riscv64_rvv.txt
@@ -0,0 +1,4 @@
+processor : 0
+hart : 1
+isa : rv64imafdcv
+mmu : sv48 \ No newline at end of file
diff --git a/files/unit_test/testdata/riscv64_rvv_zvfh.txt b/files/unit_test/testdata/riscv64_rvv_zvfh.txt
new file mode 100644
index 00000000..c416c1af
--- /dev/null
+++ b/files/unit_test/testdata/riscv64_rvv_zvfh.txt
@@ -0,0 +1,4 @@
+processor : 0
+hart : 1
+isa : rv64imafdcv_zfh_zvfh
+mmu : sv48 \ No newline at end of file
diff --git a/files/unit_test/unit_test.cc b/files/unit_test/unit_test.cc
index 61145a46..b66ebfab 100644
--- a/files/unit_test/unit_test.cc
+++ b/files/unit_test/unit_test.cc
@@ -88,6 +88,11 @@ int TestCpuEnv(int cpu_info) {
cpu_info &= ~libyuv::kCpuHasLASX;
}
#endif
+#if defined(__riscv) && defined(__linux__)
+ if (TestEnv("LIBYUV_DISABLE_RVV")) {
+ cpu_info &= ~libyuv::kCpuHasRVV;
+ }
+#endif
#if !defined(__pnacl__) && !defined(__CLR_VER) && \
(defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || \
defined(_M_IX86))
diff --git a/files/unit_test/unit_test.h b/files/unit_test/unit_test.h
index 0a8df4d2..99cc8d19 100644
--- a/files/unit_test/unit_test.h
+++ b/files/unit_test/unit_test.h
@@ -11,10 +11,10 @@
#ifndef UNIT_TEST_UNIT_TEST_H_ // NOLINT
#define UNIT_TEST_UNIT_TEST_H_
+#include <stddef.h> // For NULL
#ifdef _WIN32
#include <windows.h>
#else
-#include <sys/resource.h>
#include <sys/time.h>
#endif
@@ -77,7 +77,18 @@ static inline bool SizeValid(int src_width,
#define free_aligned_buffer_page_end(var) \
free(var##_mem); \
- var = 0
+ var = NULL
+
+#define align_buffer_page_end_16(var, size) \
+ uint8_t* var##_mem = \
+ reinterpret_cast<uint8_t*>(malloc(((size)*2 + 4095 + 63) & ~4095)); \
+ uint16_t* var = reinterpret_cast<uint16_t*>( \
+ (intptr_t)(var##_mem + (((size)*2 + 4095 + 63) & ~4095) - (size)*2) & \
+ ~63)
+
+#define free_aligned_buffer_page_end_16(var) \
+ free(var##_mem); \
+ var = NULL
#ifdef WIN32
static inline double get_time() {