diff options
author | Marat Dukhan <maratek@google.com> | 2022-02-04 03:34:32 -0800 |
---|---|---|
committer | XNNPACK Team <xnnpack-github-robot@google.com> | 2022-02-04 03:35:29 -0800 |
commit | 2bd2bd2413a23903b5e34621a2c69ea0fd5b51b2 (patch) | |
tree | 66bc554005566beeccff096cee67a2932b448858 | |
parent | 670826b21afece38d90b03501bfea74378bfd7dd (diff) | |
download | XNNPACK-2bd2bd2413a23903b5e34621a2c69ea0fd5b51b2.tar.gz |
X8 & X16 Copy NC operators
PiperOrigin-RevId: 426361456
-rw-r--r-- | include/xnnpack.h | 28 | ||||
-rw-r--r-- | src/operator-strings.c | 4 | ||||
-rw-r--r-- | src/operators/unary-elementwise-nc.c | 78 | ||||
-rw-r--r-- | src/xnnpack/operator.h | 2 | ||||
-rw-r--r-- | test/copy-nc.cc | 120 | ||||
-rw-r--r-- | test/copy-operator-tester.h | 110 |
6 files changed, 336 insertions, 6 deletions
diff --git a/include/xnnpack.h b/include/xnnpack.h index f8a581c09..88d20c7c0 100644 --- a/include/xnnpack.h +++ b/include/xnnpack.h @@ -2075,6 +2075,20 @@ enum xnn_status xnn_setup_constant_pad_nd_x16( void* output, pthreadpool_t threadpool); +enum xnn_status xnn_create_copy_nc_x16( + size_t channels, + size_t input_stride, + size_t output_stride, + uint32_t flags, + xnn_operator_t* copy_op_out); + +enum xnn_status xnn_setup_copy_nc_x16( + xnn_operator_t copy_op, + size_t batch_size, + const void* input, + void* output, + pthreadpool_t threadpool); + #endif // XNN_NO_X16_OPERATORS #ifndef XNN_NO_QC8_OPERATORS @@ -2812,6 +2826,20 @@ enum xnn_status xnn_setup_resize_bilinear2d_nhwc_u8( #ifndef XNN_NO_X8_OPERATORS +enum xnn_status xnn_create_copy_nc_x8( + size_t channels, + size_t input_stride, + size_t output_stride, + uint32_t flags, + xnn_operator_t* copy_op_out); + +enum xnn_status xnn_setup_copy_nc_x8( + xnn_operator_t copy_op, + size_t batch_size, + const void* input, + void* output, + pthreadpool_t threadpool); + enum xnn_status xnn_create_channel_shuffle_nc_x8( size_t groups, size_t group_channels, diff --git a/src/operator-strings.c b/src/operator-strings.c index dce9c4671..fd8a3ccef 100644 --- a/src/operator-strings.c +++ b/src/operator-strings.c @@ -78,6 +78,10 @@ const char* xnn_operator_type_to_string(enum xnn_operator_type type) { return "Convolution (NHWC, QU8)"; case xnn_operator_type_convolution_nchw_f32: return "Convolution (NCHW, F32)"; + case xnn_operator_type_copy_nc_x8: + return "Copy (NC, X8)"; + case xnn_operator_type_copy_nc_x16: + return "Copy (NC, X16)"; case xnn_operator_type_copy_nc_x32: return "Copy (NC, X32)"; case xnn_operator_type_deconvolution_nhwc_f32: diff --git a/src/operators/unary-elementwise-nc.c b/src/operators/unary-elementwise-nc.c index 67170710a..19e24c456 100644 --- a/src/operators/unary-elementwise-nc.c +++ b/src/operators/unary-elementwise-nc.c @@ -479,6 +479,36 @@ enum xnn_status xnn_create_convert_nc_qu8_f32( convert_op_out); } +enum xnn_status xnn_create_copy_nc_x8( + size_t channels, + size_t input_stride, + size_t output_stride, + uint32_t flags, + xnn_operator_t* copy_op_out) +{ + return create_unary_elementwise_nc( + channels, input_stride, output_stride, flags, + NULL, 0, + xnn_operator_type_copy_nc_x8, + xnn_params.xx.copy, + copy_op_out); +} + +enum xnn_status xnn_create_copy_nc_x16( + size_t channels, + size_t input_stride, + size_t output_stride, + uint32_t flags, + xnn_operator_t* copy_op_out) +{ + return create_unary_elementwise_nc( + channels, input_stride, output_stride, flags, + NULL, 0, + xnn_operator_type_copy_nc_x16, + xnn_params.xx.copy, + copy_op_out); +} + enum xnn_status xnn_create_copy_nc_x32( size_t channels, size_t input_stride, @@ -1001,6 +1031,54 @@ enum xnn_status xnn_setup_convert_nc_qu8_f32( pthreadpool_get_threads_count(threadpool)); } +enum xnn_status xnn_setup_copy_nc_x8( + xnn_operator_t copy_op, + size_t batch_size, + const void* input, + void* output, + pthreadpool_t threadpool) +{ + if (copy_op->type != xnn_operator_type_copy_nc_x8) { + xnn_log_error("failed to setup operator: operator type mismatch (expected %s, got %s)", + xnn_operator_type_to_string(xnn_operator_type_copy_nc_x8), + xnn_operator_type_to_string(copy_op->type)); + return xnn_status_invalid_parameter; + } + copy_op->state = xnn_run_state_invalid; + + return setup_unary_elementwise_nc( + copy_op, + batch_size, input, output, + 0 /* log2(sizeof(uint16_t)) */, + 0 /* log2(sizeof(uint16_t)) */, + NULL, 0, + pthreadpool_get_threads_count(threadpool)); +} + +enum xnn_status xnn_setup_copy_nc_x16( + xnn_operator_t copy_op, + size_t batch_size, + const void* input, + void* output, + pthreadpool_t threadpool) +{ + if (copy_op->type != xnn_operator_type_copy_nc_x16) { + xnn_log_error("failed to setup operator: operator type mismatch (expected %s, got %s)", + xnn_operator_type_to_string(xnn_operator_type_copy_nc_x16), + xnn_operator_type_to_string(copy_op->type)); + return xnn_status_invalid_parameter; + } + copy_op->state = xnn_run_state_invalid; + + return setup_unary_elementwise_nc( + copy_op, + batch_size, input, output, + 1 /* log2(sizeof(uint16_t)) */, + 1 /* log2(sizeof(uint16_t)) */, + NULL, 0, + pthreadpool_get_threads_count(threadpool)); +} + enum xnn_status xnn_setup_copy_nc_x32( xnn_operator_t copy_op, size_t batch_size, diff --git a/src/xnnpack/operator.h b/src/xnnpack/operator.h index 7f4429f3f..637098947 100644 --- a/src/xnnpack/operator.h +++ b/src/xnnpack/operator.h @@ -63,6 +63,8 @@ enum xnn_operator_type { xnn_operator_type_convolution_nhwc_qc8, xnn_operator_type_convolution_nhwc_qs8, xnn_operator_type_convolution_nhwc_qu8, + xnn_operator_type_copy_nc_x8, + xnn_operator_type_copy_nc_x16, xnn_operator_type_copy_nc_x32, xnn_operator_type_deconvolution_nhwc_f32, xnn_operator_type_deconvolution_nhwc_qs8, diff --git a/test/copy-nc.cc b/test/copy-nc.cc index 0c70b1a6f..74767f997 100644 --- a/test/copy-nc.cc +++ b/test/copy-nc.cc @@ -8,7 +8,117 @@ #include "copy-operator-tester.h" -TEST(CLAMP_NC_X32, unit_batch) { +TEST(COPY_NC_X8, unit_batch) { + for (size_t channels = 1; channels < 100; channels++) { + CopyOperatorTester() + .batch_size(1) + .channels(channels) + .iterations(3) + .TestX8(); + } +} + +TEST(COPY_NC_X8, small_batch) { + for (size_t channels = 1; channels < 100; channels++) { + CopyOperatorTester() + .batch_size(3) + .channels(channels) + .iterations(3) + .TestX8(); + } +} + +TEST(COPY_NC_X8, small_batch_with_input_stride) { + for (size_t channels = 1; channels < 100; channels += 15) { + CopyOperatorTester() + .batch_size(3) + .channels(channels) + .input_stride(129) + .iterations(3) + .TestX8(); + } +} + +TEST(COPY_NC_X8, small_batch_with_output_stride) { + for (size_t channels = 1; channels < 100; channels += 15) { + CopyOperatorTester() + .batch_size(3) + .channels(channels) + .output_stride(117) + .iterations(3) + .TestX8(); + } +} + +TEST(COPY_NC_X8, small_batch_with_input_and_output_stride) { + for (size_t channels = 1; channels < 100; channels += 15) { + CopyOperatorTester() + .batch_size(3) + .channels(channels) + .input_stride(129) + .output_stride(117) + .iterations(3) + .TestX8(); + } +} + + +TEST(COPY_NC_X16, unit_batch) { + for (size_t channels = 1; channels < 100; channels++) { + CopyOperatorTester() + .batch_size(1) + .channels(channels) + .iterations(3) + .TestX16(); + } +} + +TEST(COPY_NC_X16, small_batch) { + for (size_t channels = 1; channels < 100; channels++) { + CopyOperatorTester() + .batch_size(3) + .channels(channels) + .iterations(3) + .TestX16(); + } +} + +TEST(COPY_NC_X16, small_batch_with_input_stride) { + for (size_t channels = 1; channels < 100; channels += 15) { + CopyOperatorTester() + .batch_size(3) + .channels(channels) + .input_stride(129) + .iterations(3) + .TestX16(); + } +} + +TEST(COPY_NC_X16, small_batch_with_output_stride) { + for (size_t channels = 1; channels < 100; channels += 15) { + CopyOperatorTester() + .batch_size(3) + .channels(channels) + .output_stride(117) + .iterations(3) + .TestX16(); + } +} + +TEST(COPY_NC_X16, small_batch_with_input_and_output_stride) { + for (size_t channels = 1; channels < 100; channels += 15) { + CopyOperatorTester() + .batch_size(3) + .channels(channels) + .input_stride(129) + .output_stride(117) + .iterations(3) + .TestX16(); + } +} + + +TEST(COPY_NC_X32, unit_batch) { for (size_t channels = 1; channels < 100; channels++) { CopyOperatorTester() .batch_size(1) @@ -18,7 +128,7 @@ TEST(CLAMP_NC_X32, unit_batch) { } } -TEST(CLAMP_NC_X32, small_batch) { +TEST(COPY_NC_X32, small_batch) { for (size_t channels = 1; channels < 100; channels++) { CopyOperatorTester() .batch_size(3) @@ -28,7 +138,7 @@ TEST(CLAMP_NC_X32, small_batch) { } } -TEST(CLAMP_NC_X32, small_batch_with_input_stride) { +TEST(COPY_NC_X32, small_batch_with_input_stride) { for (size_t channels = 1; channels < 100; channels += 15) { CopyOperatorTester() .batch_size(3) @@ -39,7 +149,7 @@ TEST(CLAMP_NC_X32, small_batch_with_input_stride) { } } -TEST(CLAMP_NC_X32, small_batch_with_output_stride) { +TEST(COPY_NC_X32, small_batch_with_output_stride) { for (size_t channels = 1; channels < 100; channels += 15) { CopyOperatorTester() .batch_size(3) @@ -50,7 +160,7 @@ TEST(CLAMP_NC_X32, small_batch_with_output_stride) { } } -TEST(CLAMP_NC_X32, small_batch_with_input_and_output_stride) { +TEST(COPY_NC_X32, small_batch_with_input_and_output_stride) { for (size_t channels = 1; channels < 100; channels += 15) { CopyOperatorTester() .batch_size(3) diff --git a/test/copy-operator-tester.h b/test/copy-operator-tester.h index aee4fe910..a23ab409c 100644 --- a/test/copy-operator-tester.h +++ b/test/copy-operator-tester.h @@ -80,12 +80,120 @@ class CopyOperatorTester { return this->iterations_; } + void TestX8() const { + std::random_device random_device; + auto rng = std::mt19937(random_device()); + auto u8rng = std::bind( + std::uniform_int_distribution<uint32_t>( std::numeric_limits<uint8_t>::min(), std::numeric_limits<uint8_t>::max()), + rng); + + std::vector<uint8_t> input(XNN_EXTRA_BYTES / sizeof(uint8_t) + + (batch_size() - 1) * input_stride() + channels()); + std::vector<uint8_t> output((batch_size() - 1) * output_stride() + channels()); + std::vector<uint8_t> output_ref(batch_size() * channels()); + for (size_t iteration = 0; iteration < iterations(); iteration++) { + std::generate(input.begin(), input.end(), std::ref(u8rng)); + std::fill(output.begin(), output.end(), UINT16_C(0xFA)); + + // Compute reference results. + for (size_t i = 0; i < batch_size(); i++) { + for (size_t c = 0; c < channels(); c++) { + output_ref[i * channels() + c] = input[i * input_stride() + c]; + } + } + + // Create, setup, run, and destroy Copy operator. + ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */)); + xnn_operator_t copy_op = nullptr; + + ASSERT_EQ(xnn_status_success, + xnn_create_copy_nc_x8( + channels(), input_stride(), output_stride(), + 0, ©_op)); + ASSERT_NE(nullptr, copy_op); + + // Smart pointer to automatically delete copy_op. + std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_copy_op(copy_op, xnn_delete_operator); + + ASSERT_EQ(xnn_status_success, + xnn_setup_copy_nc_x8( + copy_op, + batch_size(), + input.data(), output.data(), + nullptr /* thread pool */)); + + ASSERT_EQ(xnn_status_success, + xnn_run_operator(copy_op, nullptr /* thread pool */)); + + // Verify results. + for (size_t i = 0; i < batch_size(); i++) { + for (size_t c = 0; c < channels(); c++) { + ASSERT_EQ(output_ref[i * channels() + c], output[i * output_stride() + c]) + << "at batch " << i << " / " << batch_size() << ", channel = " << c << " / " << channels(); + } + } + } + } + + void TestX16() const { + std::random_device random_device; + auto rng = std::mt19937(random_device()); + auto u16rng = std::bind(std::uniform_int_distribution<uint16_t>(), rng); + + std::vector<uint16_t> input(XNN_EXTRA_BYTES / sizeof(uint16_t) + + (batch_size() - 1) * input_stride() + channels()); + std::vector<uint16_t> output((batch_size() - 1) * output_stride() + channels()); + std::vector<uint16_t> output_ref(batch_size() * channels()); + for (size_t iteration = 0; iteration < iterations(); iteration++) { + std::generate(input.begin(), input.end(), std::ref(u16rng)); + std::fill(output.begin(), output.end(), UINT16_C(0xDEAD)); + + // Compute reference results. + for (size_t i = 0; i < batch_size(); i++) { + for (size_t c = 0; c < channels(); c++) { + output_ref[i * channels() + c] = input[i * input_stride() + c]; + } + } + + // Create, setup, run, and destroy Copy operator. + ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */)); + xnn_operator_t copy_op = nullptr; + + ASSERT_EQ(xnn_status_success, + xnn_create_copy_nc_x16( + channels(), input_stride(), output_stride(), + 0, ©_op)); + ASSERT_NE(nullptr, copy_op); + + // Smart pointer to automatically delete copy_op. + std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_copy_op(copy_op, xnn_delete_operator); + + ASSERT_EQ(xnn_status_success, + xnn_setup_copy_nc_x16( + copy_op, + batch_size(), + input.data(), output.data(), + nullptr /* thread pool */)); + + ASSERT_EQ(xnn_status_success, + xnn_run_operator(copy_op, nullptr /* thread pool */)); + + // Verify results. + for (size_t i = 0; i < batch_size(); i++) { + for (size_t c = 0; c < channels(); c++) { + ASSERT_EQ(output_ref[i * channels() + c], output[i * output_stride() + c]) + << "at batch " << i << " / " << batch_size() << ", channel = " << c << " / " << channels(); + } + } + } + } + void TestX32() const { std::random_device random_device; auto rng = std::mt19937(random_device()); auto u32rng = std::bind(std::uniform_int_distribution<uint32_t>(), rng); - std::vector<uint32_t> input(XNN_EXTRA_BYTES / sizeof(float) + + std::vector<uint32_t> input(XNN_EXTRA_BYTES / sizeof(uint32_t) + (batch_size() - 1) * input_stride() + channels()); std::vector<uint32_t> output((batch_size() - 1) * output_stride() + channels()); std::vector<uint32_t> output_ref(batch_size() * channels()); |