aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGrant Jensen <grantjensen@google.com>2022-08-24 11:37:13 -0700
committerXNNPACK Team <xnnpack-github-robot@google.com>2022-08-24 11:38:01 -0700
commit84e5af66f53cd9b2b57dff1ef3aacf2b988824ff (patch)
treecfc4c881e9b313a7c6cb33ac1a8a78c72cbee2ef
parentedfac6301d77feb42f4f0562d02251fd02fe4825 (diff)
downloadXNNPACK-84e5af66f53cd9b2b57dff1ef3aacf2b988824ff.tar.gz
Add x8 & x16 eager API call for transpose.
PiperOrigin-RevId: 469782719
-rw-r--r--include/xnnpack.h18
-rw-r--r--src/operators/transpose-nd.c43
-rw-r--r--test/transpose-nd.cc126
-rw-r--r--test/transpose-operator-tester.h58
4 files changed, 245 insertions, 0 deletions
diff --git a/include/xnnpack.h b/include/xnnpack.h
index b183d32a9..5414d8ffa 100644
--- a/include/xnnpack.h
+++ b/include/xnnpack.h
@@ -2876,6 +2876,15 @@ enum xnn_status xnn_setup_transpose_nd_x16(
const size_t* output_perm,
pthreadpool_t threadpool);
+enum xnn_status xnn_run_transpose_nd_x16(
+ uint32_t flags,
+ const void* input,
+ void* output,
+ const size_t num_dims,
+ const size_t* input_shape,
+ const size_t* output_perm,
+ pthreadpool_t threadpool);
+
#endif // XNN_NO_X16_OPERATORS
#ifndef XNN_NO_QC8_OPERATORS
@@ -3728,6 +3737,15 @@ enum xnn_status xnn_setup_transpose_nd_x8(
const size_t* output_perm,
pthreadpool_t threadpool);
+enum xnn_status xnn_run_transpose_nd_x8(
+ uint32_t flags,
+ const void* input,
+ void* output,
+ const size_t num_dims,
+ const size_t* input_shape,
+ const size_t* output_perm,
+ pthreadpool_t threadpool);
+
#endif // XNN_NO_X8_OPERATORS
#ifndef XNN_NO_CVT_OPERATORS
diff --git a/src/operators/transpose-nd.c b/src/operators/transpose-nd.c
index 2d57791f7..454b8036f 100644
--- a/src/operators/transpose-nd.c
+++ b/src/operators/transpose-nd.c
@@ -486,6 +486,49 @@ enum xnn_status xnn_run_transpose_nd_x32(
threadpool);
}
+enum xnn_status xnn_run_transpose_nd_x16(
+ uint32_t flags,
+ const void* input,
+ void* output,
+ const size_t num_dims,
+ const size_t* input_shape,
+ const size_t* output_perm,
+ pthreadpool_t threadpool) {
+
+ return run_transpose_nd(
+ flags,
+ input,
+ output,
+ num_dims,
+ input_shape,
+ output_perm,
+ sizeof(uint16_t),
+ XNN_INIT_FLAG_X16,
+ xnn_operator_type_transpose_nd_x16,
+ threadpool);
+}
+
+enum xnn_status xnn_run_transpose_nd_x8(
+ uint32_t flags,
+ const void* input,
+ void* output,
+ const size_t num_dims,
+ const size_t* input_shape,
+ const size_t* output_perm,
+ pthreadpool_t threadpool) {
+
+ return run_transpose_nd(
+ flags,
+ input,
+ output,
+ num_dims,
+ input_shape,
+ output_perm,
+ sizeof(uint8_t),
+ XNN_INIT_FLAG_X8,
+ xnn_operator_type_transpose_nd_x8,
+ threadpool);
+}
enum xnn_status xnn_create_depth_to_space_nchw2nhwc_x32(
size_t output_channels,
diff --git a/test/transpose-nd.cc b/test/transpose-nd.cc
index da1b37e42..1dd8dd8e8 100644
--- a/test/transpose-nd.cc
+++ b/test/transpose-nd.cc
@@ -20,6 +20,14 @@ TEST(TRANSPOSE_ND_X8, 1D) {
.TestX8();
}
+TEST(TRANSPOSE_ND_X8, Run1D) {
+ TransposeOperatorTester()
+ .num_dims(1)
+ .shape({713})
+ .perm({0})
+ .TestRunX8();
+}
+
TEST(TRANSPOSE_ND_X8, 2D) {
std::vector<size_t> perm{0,1};
do {
@@ -31,6 +39,17 @@ TEST(TRANSPOSE_ND_X8, 2D) {
} while (std::next_permutation(perm.begin(), perm.end()));
}
+TEST(TRANSPOSE_ND_X8, Run2D) {
+ std::vector<size_t> perm{0,1};
+ do {
+ TransposeOperatorTester()
+ .num_dims(2)
+ .shape({37, 113})
+ .perm(perm)
+ .TestRunX8();
+ } while (std::next_permutation(perm.begin(), perm.end()));
+}
+
TEST(TRANSPOSE_ND_X8, 3D) {
std::vector<size_t> perm{0,1,2};
do {
@@ -42,6 +61,17 @@ TEST(TRANSPOSE_ND_X8, 3D) {
} while (std::next_permutation(perm.begin(), perm.end()));
}
+TEST(TRANSPOSE_ND_X8, Run3D) {
+ std::vector<size_t> perm{0,1,2};
+ do {
+ TransposeOperatorTester()
+ .num_dims(3)
+ .shape({5, 7, 11})
+ .perm(perm)
+ .TestRunX8();
+ } while (std::next_permutation(perm.begin(), perm.end()));
+}
+
TEST(TRANSPOSE_ND_X32, 4D_copy) {
TransposeOperatorTester()
.num_dims(4)
@@ -61,6 +91,17 @@ TEST(TRANSPOSE_ND_X8, 4D) {
} while (std::next_permutation(perm.begin(), perm.end()));
}
+TEST(TRANSPOSE_ND_X8, Run4D) {
+ std::vector<size_t> perm{0,1,2,3};
+ do {
+ TransposeOperatorTester()
+ .num_dims(4)
+ .shape({5,7,11,13})
+ .perm(perm)
+ .TestRunX8();
+ } while (std::next_permutation(perm.begin(), perm.end()));
+}
+
TEST(TRANSPOSE_ND_X8, 5D) {
std::vector<size_t> perm{0,1,2,3,4};
do {
@@ -72,6 +113,17 @@ TEST(TRANSPOSE_ND_X8, 5D) {
} while (std::next_permutation(perm.begin(), perm.end()));
}
+TEST(TRANSPOSE_ND_X8, Run5D) {
+ std::vector<size_t> perm{0,1,2,3,4};
+ do {
+ TransposeOperatorTester()
+ .num_dims(5)
+ .shape({3,5,7,11,13})
+ .perm(perm)
+ .TestRunX8();
+ } while (std::next_permutation(perm.begin(), perm.end()));
+}
+
TEST(TRANSPOSE_ND_X8, 6D) {
std::vector<size_t> perm{0,1,2,3,4,5};
do {
@@ -83,6 +135,17 @@ TEST(TRANSPOSE_ND_X8, 6D) {
} while (std::next_permutation(perm.begin(), perm.end()));
}
+TEST(TRANSPOSE_ND_X8, Run6D) {
+ std::vector<size_t> perm{0,1,2,3,4,5};
+ do {
+ TransposeOperatorTester()
+ .num_dims(6)
+ .shape({2,3,5,7,11,13})
+ .perm(perm)
+ .TestRunX8();
+ } while (std::next_permutation(perm.begin(), perm.end()));
+}
+
TEST(TRANSPOSE_ND_X16, 1D) {
TransposeOperatorTester()
.num_dims(1)
@@ -91,6 +154,14 @@ TEST(TRANSPOSE_ND_X16, 1D) {
.TestX16();
}
+TEST(TRANSPOSE_ND_X16, Run1D) {
+ TransposeOperatorTester()
+ .num_dims(1)
+ .shape({713})
+ .perm({0})
+ .TestRunX16();
+}
+
TEST(TRANSPOSE_ND_X16, 2D) {
std::vector<size_t> perm{0,1};
do {
@@ -102,6 +173,17 @@ TEST(TRANSPOSE_ND_X16, 2D) {
} while (std::next_permutation(perm.begin(), perm.end()));
}
+TEST(TRANSPOSE_ND_X16, Run2D) {
+ std::vector<size_t> perm{0,1};
+ do {
+ TransposeOperatorTester()
+ .num_dims(2)
+ .shape({37, 113})
+ .perm(perm)
+ .TestRunX16();
+ } while (std::next_permutation(perm.begin(), perm.end()));
+}
+
TEST(TRANSPOSE_ND_X16, 3D) {
std::vector<size_t> perm{0,1,2};
do {
@@ -113,6 +195,17 @@ TEST(TRANSPOSE_ND_X16, 3D) {
} while (std::next_permutation(perm.begin(), perm.end()));
}
+TEST(TRANSPOSE_ND_X16, Run3D) {
+ std::vector<size_t> perm{0,1,2};
+ do {
+ TransposeOperatorTester()
+ .num_dims(3)
+ .shape({5, 7, 11})
+ .perm(perm)
+ .TestRunX16();
+ } while (std::next_permutation(perm.begin(), perm.end()));
+}
+
TEST(TRANSPOSE_ND_X16, 4D) {
std::vector<size_t> perm{0,1,2,3};
do {
@@ -124,6 +217,17 @@ TEST(TRANSPOSE_ND_X16, 4D) {
} while (std::next_permutation(perm.begin(), perm.end()));
}
+TEST(TRANSPOSE_ND_X16, Run4D) {
+ std::vector<size_t> perm{0,1,2,3};
+ do {
+ TransposeOperatorTester()
+ .num_dims(4)
+ .shape({5,7,11,13})
+ .perm(perm)
+ .TestRunX16();
+ } while (std::next_permutation(perm.begin(), perm.end()));
+}
+
TEST(TRANSPOSE_ND_X16, 5D) {
std::vector<size_t> perm{0,1,2,3,4};
do {
@@ -135,6 +239,17 @@ TEST(TRANSPOSE_ND_X16, 5D) {
} while (std::next_permutation(perm.begin(), perm.end()));
}
+TEST(TRANSPOSE_ND_X16, Run5D) {
+ std::vector<size_t> perm{0,1,2,3,4};
+ do {
+ TransposeOperatorTester()
+ .num_dims(5)
+ .shape({3,5,7,11,13})
+ .perm(perm)
+ .TestX16();
+ } while (std::next_permutation(perm.begin(), perm.end()));
+}
+
TEST(TRANSPOSE_ND_X16, 6D) {
std::vector<size_t> perm{0,1,2,3,4,5};
do {
@@ -146,6 +261,17 @@ TEST(TRANSPOSE_ND_X16, 6D) {
} while (std::next_permutation(perm.begin(), perm.end()));
}
+TEST(TRANSPOSE_ND_X16, Run6D) {
+ std::vector<size_t> perm{0,1,2,3,4,5};
+ do {
+ TransposeOperatorTester()
+ .num_dims(6)
+ .shape({2,3,5,7,11,13})
+ .perm(perm)
+ .TestRunX16();
+ } while (std::next_permutation(perm.begin(), perm.end()));
+}
+
TEST(TRANSPOSE_ND_X32, 1D) {
TransposeOperatorTester()
.num_dims(1)
diff --git a/test/transpose-operator-tester.h b/test/transpose-operator-tester.h
index a5d37fcde..6e067754a 100644
--- a/test/transpose-operator-tester.h
+++ b/test/transpose-operator-tester.h
@@ -99,6 +99,35 @@ class TransposeOperatorTester {
}
}
+ void TestRunX8() const {
+ const size_t count = std::accumulate(dims().cbegin(), dims().cend(), 1, std::multiplies<size_t>());
+ std::vector<uint8_t> input(count + XNN_EXTRA_BYTES / sizeof(uint8_t));
+ std::vector<uint8_t> output(count);
+ std::vector<size_t> input_stride(input.size(), 1);
+ std::vector<size_t> output_stride(input.size(), 1);
+ for (size_t i = num_dims() - 1; i > 0; --i) {
+ input_stride[i - 1] = input_stride[i] * shape_[i];
+ output_stride[i - 1] = output_stride[i] * shape_[perm()[i]];
+ }
+ ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
+ std::iota(input.begin(), input.end(), 0);
+ std::fill(output.begin(), output.end(), UINT8_C(0xA5));
+
+ // Call transpose eager API
+ ASSERT_EQ(xnn_status_success,
+ xnn_run_transpose_nd_x8(
+ 0 /* flags */,
+ input.data(), output.data(),
+ num_dims(), shape_.data(), perm_.data(),
+ nullptr /* thread pool */));
+
+ // Verify results.
+ for (size_t i = 0; i < count; ++i) {
+ const size_t in_idx = reference_index(input_stride.data(), output_stride.data(), perm_.data(), num_dims(), i);
+ ASSERT_EQ(input[in_idx], output[i]);
+ }
+ }
+
void TestX16() const {
size_t count = std::accumulate(dims().cbegin(), dims().cend(), 1, std::multiplies<size_t>());
std::vector<uint16_t> input(count + XNN_EXTRA_BYTES / sizeof(uint16_t));
@@ -139,6 +168,35 @@ class TransposeOperatorTester {
}
}
+ void TestRunX16() const {
+ const size_t count = std::accumulate(dims().cbegin(), dims().cend(), 1, std::multiplies<size_t>());
+ std::vector<uint16_t> input(count + XNN_EXTRA_BYTES / sizeof(uint16_t));
+ std::vector<uint16_t> output(count);
+ std::vector<size_t> input_stride(input.size(), 1);
+ std::vector<size_t> output_stride(input.size(), 1);
+ for (size_t i = num_dims() - 1; i > 0; --i) {
+ input_stride[i - 1] = input_stride[i] * shape_[i];
+ output_stride[i - 1] = output_stride[i] * shape_[perm()[i]];
+ }
+ ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
+ std::iota(input.begin(), input.end(), 0);
+ std::fill(output.begin(), output.end(), UINT16_C(0xDEADBEEF));
+
+ // Call transpose eager API
+ ASSERT_EQ(xnn_status_success,
+ xnn_run_transpose_nd_x16(
+ 0 /* flags */,
+ input.data(), output.data(),
+ num_dims(), shape_.data(), perm_.data(),
+ nullptr /* thread pool */));
+
+ // Verify results.
+ for (size_t i = 0; i < count; ++i) {
+ const size_t in_idx = reference_index(input_stride.data(), output_stride.data(), perm_.data(), num_dims(), i);
+ ASSERT_EQ(input[in_idx], output[i]);
+ }
+ }
+
void TestX32() const {
size_t count = std::accumulate(dims().cbegin(), dims().cend(), 1, std::multiplies<size_t>());
std::vector<uint32_t> input(count + XNN_EXTRA_BYTES / sizeof(uint32_t));