aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarat Dukhan <maratek@google.com>2022-02-04 03:34:32 -0800
committerXNNPACK Team <xnnpack-github-robot@google.com>2022-02-04 03:35:29 -0800
commit2bd2bd2413a23903b5e34621a2c69ea0fd5b51b2 (patch)
tree66bc554005566beeccff096cee67a2932b448858
parent670826b21afece38d90b03501bfea74378bfd7dd (diff)
downloadXNNPACK-2bd2bd2413a23903b5e34621a2c69ea0fd5b51b2.tar.gz
X8 & X16 Copy NC operators
PiperOrigin-RevId: 426361456
-rw-r--r--include/xnnpack.h28
-rw-r--r--src/operator-strings.c4
-rw-r--r--src/operators/unary-elementwise-nc.c78
-rw-r--r--src/xnnpack/operator.h2
-rw-r--r--test/copy-nc.cc120
-rw-r--r--test/copy-operator-tester.h110
6 files changed, 336 insertions, 6 deletions
diff --git a/include/xnnpack.h b/include/xnnpack.h
index f8a581c09..88d20c7c0 100644
--- a/include/xnnpack.h
+++ b/include/xnnpack.h
@@ -2075,6 +2075,20 @@ enum xnn_status xnn_setup_constant_pad_nd_x16(
void* output,
pthreadpool_t threadpool);
+enum xnn_status xnn_create_copy_nc_x16(
+ size_t channels,
+ size_t input_stride,
+ size_t output_stride,
+ uint32_t flags,
+ xnn_operator_t* copy_op_out);
+
+enum xnn_status xnn_setup_copy_nc_x16(
+ xnn_operator_t copy_op,
+ size_t batch_size,
+ const void* input,
+ void* output,
+ pthreadpool_t threadpool);
+
#endif // XNN_NO_X16_OPERATORS
#ifndef XNN_NO_QC8_OPERATORS
@@ -2812,6 +2826,20 @@ enum xnn_status xnn_setup_resize_bilinear2d_nhwc_u8(
#ifndef XNN_NO_X8_OPERATORS
+enum xnn_status xnn_create_copy_nc_x8(
+ size_t channels,
+ size_t input_stride,
+ size_t output_stride,
+ uint32_t flags,
+ xnn_operator_t* copy_op_out);
+
+enum xnn_status xnn_setup_copy_nc_x8(
+ xnn_operator_t copy_op,
+ size_t batch_size,
+ const void* input,
+ void* output,
+ pthreadpool_t threadpool);
+
enum xnn_status xnn_create_channel_shuffle_nc_x8(
size_t groups,
size_t group_channels,
diff --git a/src/operator-strings.c b/src/operator-strings.c
index dce9c4671..fd8a3ccef 100644
--- a/src/operator-strings.c
+++ b/src/operator-strings.c
@@ -78,6 +78,10 @@ const char* xnn_operator_type_to_string(enum xnn_operator_type type) {
return "Convolution (NHWC, QU8)";
case xnn_operator_type_convolution_nchw_f32:
return "Convolution (NCHW, F32)";
+ case xnn_operator_type_copy_nc_x8:
+ return "Copy (NC, X8)";
+ case xnn_operator_type_copy_nc_x16:
+ return "Copy (NC, X16)";
case xnn_operator_type_copy_nc_x32:
return "Copy (NC, X32)";
case xnn_operator_type_deconvolution_nhwc_f32:
diff --git a/src/operators/unary-elementwise-nc.c b/src/operators/unary-elementwise-nc.c
index 67170710a..19e24c456 100644
--- a/src/operators/unary-elementwise-nc.c
+++ b/src/operators/unary-elementwise-nc.c
@@ -479,6 +479,36 @@ enum xnn_status xnn_create_convert_nc_qu8_f32(
convert_op_out);
}
+enum xnn_status xnn_create_copy_nc_x8(
+ size_t channels,
+ size_t input_stride,
+ size_t output_stride,
+ uint32_t flags,
+ xnn_operator_t* copy_op_out)
+{
+ return create_unary_elementwise_nc(
+ channels, input_stride, output_stride, flags,
+ NULL, 0,
+ xnn_operator_type_copy_nc_x8,
+ xnn_params.xx.copy,
+ copy_op_out);
+}
+
+enum xnn_status xnn_create_copy_nc_x16(
+ size_t channels,
+ size_t input_stride,
+ size_t output_stride,
+ uint32_t flags,
+ xnn_operator_t* copy_op_out)
+{
+ return create_unary_elementwise_nc(
+ channels, input_stride, output_stride, flags,
+ NULL, 0,
+ xnn_operator_type_copy_nc_x16,
+ xnn_params.xx.copy,
+ copy_op_out);
+}
+
enum xnn_status xnn_create_copy_nc_x32(
size_t channels,
size_t input_stride,
@@ -1001,6 +1031,54 @@ enum xnn_status xnn_setup_convert_nc_qu8_f32(
pthreadpool_get_threads_count(threadpool));
}
+enum xnn_status xnn_setup_copy_nc_x8(
+ xnn_operator_t copy_op,
+ size_t batch_size,
+ const void* input,
+ void* output,
+ pthreadpool_t threadpool)
+{
+ if (copy_op->type != xnn_operator_type_copy_nc_x8) {
+ xnn_log_error("failed to setup operator: operator type mismatch (expected %s, got %s)",
+ xnn_operator_type_to_string(xnn_operator_type_copy_nc_x8),
+ xnn_operator_type_to_string(copy_op->type));
+ return xnn_status_invalid_parameter;
+ }
+ copy_op->state = xnn_run_state_invalid;
+
+ return setup_unary_elementwise_nc(
+ copy_op,
+ batch_size, input, output,
+ 0 /* log2(sizeof(uint16_t)) */,
+ 0 /* log2(sizeof(uint16_t)) */,
+ NULL, 0,
+ pthreadpool_get_threads_count(threadpool));
+}
+
+enum xnn_status xnn_setup_copy_nc_x16(
+ xnn_operator_t copy_op,
+ size_t batch_size,
+ const void* input,
+ void* output,
+ pthreadpool_t threadpool)
+{
+ if (copy_op->type != xnn_operator_type_copy_nc_x16) {
+ xnn_log_error("failed to setup operator: operator type mismatch (expected %s, got %s)",
+ xnn_operator_type_to_string(xnn_operator_type_copy_nc_x16),
+ xnn_operator_type_to_string(copy_op->type));
+ return xnn_status_invalid_parameter;
+ }
+ copy_op->state = xnn_run_state_invalid;
+
+ return setup_unary_elementwise_nc(
+ copy_op,
+ batch_size, input, output,
+ 1 /* log2(sizeof(uint16_t)) */,
+ 1 /* log2(sizeof(uint16_t)) */,
+ NULL, 0,
+ pthreadpool_get_threads_count(threadpool));
+}
+
enum xnn_status xnn_setup_copy_nc_x32(
xnn_operator_t copy_op,
size_t batch_size,
diff --git a/src/xnnpack/operator.h b/src/xnnpack/operator.h
index 7f4429f3f..637098947 100644
--- a/src/xnnpack/operator.h
+++ b/src/xnnpack/operator.h
@@ -63,6 +63,8 @@ enum xnn_operator_type {
xnn_operator_type_convolution_nhwc_qc8,
xnn_operator_type_convolution_nhwc_qs8,
xnn_operator_type_convolution_nhwc_qu8,
+ xnn_operator_type_copy_nc_x8,
+ xnn_operator_type_copy_nc_x16,
xnn_operator_type_copy_nc_x32,
xnn_operator_type_deconvolution_nhwc_f32,
xnn_operator_type_deconvolution_nhwc_qs8,
diff --git a/test/copy-nc.cc b/test/copy-nc.cc
index 0c70b1a6f..74767f997 100644
--- a/test/copy-nc.cc
+++ b/test/copy-nc.cc
@@ -8,7 +8,117 @@
#include "copy-operator-tester.h"
-TEST(CLAMP_NC_X32, unit_batch) {
+TEST(COPY_NC_X8, unit_batch) {
+ for (size_t channels = 1; channels < 100; channels++) {
+ CopyOperatorTester()
+ .batch_size(1)
+ .channels(channels)
+ .iterations(3)
+ .TestX8();
+ }
+}
+
+TEST(COPY_NC_X8, small_batch) {
+ for (size_t channels = 1; channels < 100; channels++) {
+ CopyOperatorTester()
+ .batch_size(3)
+ .channels(channels)
+ .iterations(3)
+ .TestX8();
+ }
+}
+
+TEST(COPY_NC_X8, small_batch_with_input_stride) {
+ for (size_t channels = 1; channels < 100; channels += 15) {
+ CopyOperatorTester()
+ .batch_size(3)
+ .channels(channels)
+ .input_stride(129)
+ .iterations(3)
+ .TestX8();
+ }
+}
+
+TEST(COPY_NC_X8, small_batch_with_output_stride) {
+ for (size_t channels = 1; channels < 100; channels += 15) {
+ CopyOperatorTester()
+ .batch_size(3)
+ .channels(channels)
+ .output_stride(117)
+ .iterations(3)
+ .TestX8();
+ }
+}
+
+TEST(COPY_NC_X8, small_batch_with_input_and_output_stride) {
+ for (size_t channels = 1; channels < 100; channels += 15) {
+ CopyOperatorTester()
+ .batch_size(3)
+ .channels(channels)
+ .input_stride(129)
+ .output_stride(117)
+ .iterations(3)
+ .TestX8();
+ }
+}
+
+
+TEST(COPY_NC_X16, unit_batch) {
+ for (size_t channels = 1; channels < 100; channels++) {
+ CopyOperatorTester()
+ .batch_size(1)
+ .channels(channels)
+ .iterations(3)
+ .TestX16();
+ }
+}
+
+TEST(COPY_NC_X16, small_batch) {
+ for (size_t channels = 1; channels < 100; channels++) {
+ CopyOperatorTester()
+ .batch_size(3)
+ .channels(channels)
+ .iterations(3)
+ .TestX16();
+ }
+}
+
+TEST(COPY_NC_X16, small_batch_with_input_stride) {
+ for (size_t channels = 1; channels < 100; channels += 15) {
+ CopyOperatorTester()
+ .batch_size(3)
+ .channels(channels)
+ .input_stride(129)
+ .iterations(3)
+ .TestX16();
+ }
+}
+
+TEST(COPY_NC_X16, small_batch_with_output_stride) {
+ for (size_t channels = 1; channels < 100; channels += 15) {
+ CopyOperatorTester()
+ .batch_size(3)
+ .channels(channels)
+ .output_stride(117)
+ .iterations(3)
+ .TestX16();
+ }
+}
+
+TEST(COPY_NC_X16, small_batch_with_input_and_output_stride) {
+ for (size_t channels = 1; channels < 100; channels += 15) {
+ CopyOperatorTester()
+ .batch_size(3)
+ .channels(channels)
+ .input_stride(129)
+ .output_stride(117)
+ .iterations(3)
+ .TestX16();
+ }
+}
+
+
+TEST(COPY_NC_X32, unit_batch) {
for (size_t channels = 1; channels < 100; channels++) {
CopyOperatorTester()
.batch_size(1)
@@ -18,7 +128,7 @@ TEST(CLAMP_NC_X32, unit_batch) {
}
}
-TEST(CLAMP_NC_X32, small_batch) {
+TEST(COPY_NC_X32, small_batch) {
for (size_t channels = 1; channels < 100; channels++) {
CopyOperatorTester()
.batch_size(3)
@@ -28,7 +138,7 @@ TEST(CLAMP_NC_X32, small_batch) {
}
}
-TEST(CLAMP_NC_X32, small_batch_with_input_stride) {
+TEST(COPY_NC_X32, small_batch_with_input_stride) {
for (size_t channels = 1; channels < 100; channels += 15) {
CopyOperatorTester()
.batch_size(3)
@@ -39,7 +149,7 @@ TEST(CLAMP_NC_X32, small_batch_with_input_stride) {
}
}
-TEST(CLAMP_NC_X32, small_batch_with_output_stride) {
+TEST(COPY_NC_X32, small_batch_with_output_stride) {
for (size_t channels = 1; channels < 100; channels += 15) {
CopyOperatorTester()
.batch_size(3)
@@ -50,7 +160,7 @@ TEST(CLAMP_NC_X32, small_batch_with_output_stride) {
}
}
-TEST(CLAMP_NC_X32, small_batch_with_input_and_output_stride) {
+TEST(COPY_NC_X32, small_batch_with_input_and_output_stride) {
for (size_t channels = 1; channels < 100; channels += 15) {
CopyOperatorTester()
.batch_size(3)
diff --git a/test/copy-operator-tester.h b/test/copy-operator-tester.h
index aee4fe910..a23ab409c 100644
--- a/test/copy-operator-tester.h
+++ b/test/copy-operator-tester.h
@@ -80,12 +80,120 @@ class CopyOperatorTester {
return this->iterations_;
}
+ void TestX8() const {
+ std::random_device random_device;
+ auto rng = std::mt19937(random_device());
+ auto u8rng = std::bind(
+ std::uniform_int_distribution<uint32_t>( std::numeric_limits<uint8_t>::min(), std::numeric_limits<uint8_t>::max()),
+ rng);
+
+ std::vector<uint8_t> input(XNN_EXTRA_BYTES / sizeof(uint8_t) +
+ (batch_size() - 1) * input_stride() + channels());
+ std::vector<uint8_t> output((batch_size() - 1) * output_stride() + channels());
+ std::vector<uint8_t> output_ref(batch_size() * channels());
+ for (size_t iteration = 0; iteration < iterations(); iteration++) {
+ std::generate(input.begin(), input.end(), std::ref(u8rng));
+ std::fill(output.begin(), output.end(), UINT16_C(0xFA));
+
+ // Compute reference results.
+ for (size_t i = 0; i < batch_size(); i++) {
+ for (size_t c = 0; c < channels(); c++) {
+ output_ref[i * channels() + c] = input[i * input_stride() + c];
+ }
+ }
+
+ // Create, setup, run, and destroy Copy operator.
+ ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
+ xnn_operator_t copy_op = nullptr;
+
+ ASSERT_EQ(xnn_status_success,
+ xnn_create_copy_nc_x8(
+ channels(), input_stride(), output_stride(),
+ 0, &copy_op));
+ ASSERT_NE(nullptr, copy_op);
+
+ // Smart pointer to automatically delete copy_op.
+ std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_copy_op(copy_op, xnn_delete_operator);
+
+ ASSERT_EQ(xnn_status_success,
+ xnn_setup_copy_nc_x8(
+ copy_op,
+ batch_size(),
+ input.data(), output.data(),
+ nullptr /* thread pool */));
+
+ ASSERT_EQ(xnn_status_success,
+ xnn_run_operator(copy_op, nullptr /* thread pool */));
+
+ // Verify results.
+ for (size_t i = 0; i < batch_size(); i++) {
+ for (size_t c = 0; c < channels(); c++) {
+ ASSERT_EQ(output_ref[i * channels() + c], output[i * output_stride() + c])
+ << "at batch " << i << " / " << batch_size() << ", channel = " << c << " / " << channels();
+ }
+ }
+ }
+ }
+
+ void TestX16() const {
+ std::random_device random_device;
+ auto rng = std::mt19937(random_device());
+ auto u16rng = std::bind(std::uniform_int_distribution<uint16_t>(), rng);
+
+ std::vector<uint16_t> input(XNN_EXTRA_BYTES / sizeof(uint16_t) +
+ (batch_size() - 1) * input_stride() + channels());
+ std::vector<uint16_t> output((batch_size() - 1) * output_stride() + channels());
+ std::vector<uint16_t> output_ref(batch_size() * channels());
+ for (size_t iteration = 0; iteration < iterations(); iteration++) {
+ std::generate(input.begin(), input.end(), std::ref(u16rng));
+ std::fill(output.begin(), output.end(), UINT16_C(0xDEAD));
+
+ // Compute reference results.
+ for (size_t i = 0; i < batch_size(); i++) {
+ for (size_t c = 0; c < channels(); c++) {
+ output_ref[i * channels() + c] = input[i * input_stride() + c];
+ }
+ }
+
+ // Create, setup, run, and destroy Copy operator.
+ ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
+ xnn_operator_t copy_op = nullptr;
+
+ ASSERT_EQ(xnn_status_success,
+ xnn_create_copy_nc_x16(
+ channels(), input_stride(), output_stride(),
+ 0, &copy_op));
+ ASSERT_NE(nullptr, copy_op);
+
+ // Smart pointer to automatically delete copy_op.
+ std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_copy_op(copy_op, xnn_delete_operator);
+
+ ASSERT_EQ(xnn_status_success,
+ xnn_setup_copy_nc_x16(
+ copy_op,
+ batch_size(),
+ input.data(), output.data(),
+ nullptr /* thread pool */));
+
+ ASSERT_EQ(xnn_status_success,
+ xnn_run_operator(copy_op, nullptr /* thread pool */));
+
+ // Verify results.
+ for (size_t i = 0; i < batch_size(); i++) {
+ for (size_t c = 0; c < channels(); c++) {
+ ASSERT_EQ(output_ref[i * channels() + c], output[i * output_stride() + c])
+ << "at batch " << i << " / " << batch_size() << ", channel = " << c << " / " << channels();
+ }
+ }
+ }
+ }
+
void TestX32() const {
std::random_device random_device;
auto rng = std::mt19937(random_device());
auto u32rng = std::bind(std::uniform_int_distribution<uint32_t>(), rng);
- std::vector<uint32_t> input(XNN_EXTRA_BYTES / sizeof(float) +
+ std::vector<uint32_t> input(XNN_EXTRA_BYTES / sizeof(uint32_t) +
(batch_size() - 1) * input_stride() + channels());
std::vector<uint32_t> output((batch_size() - 1) * output_stride() + channels());
std::vector<uint32_t> output_ref(batch_size() * channels());