diff options
author | Marat Dukhan <maratek@google.com> | 2022-02-03 19:21:41 -0800 |
---|---|---|
committer | XNNPACK Team <xnnpack-github-robot@google.com> | 2022-02-03 19:22:40 -0800 |
commit | 6b45a7f5c46bfc54e3624be285b108e2aae62335 (patch) | |
tree | 160abdfd5b744857db5e3c6fdd6ff6625db72218 | |
parent | cde8bdfe9085e99106ee2f031ab8b8a490d416c5 (diff) | |
download | XNNPACK-6b45a7f5c46bfc54e3624be285b108e2aae62335.tar.gz |
16-bit Constant Pad ND operator
PiperOrigin-RevId: 426293079
-rw-r--r-- | include/xnnpack.h | 19 | ||||
-rw-r--r-- | src/operator-strings.c | 2 | ||||
-rw-r--r-- | src/operators/constant-pad-nd.c | 27 | ||||
-rw-r--r-- | src/xnnpack/operator.h | 1 | ||||
-rw-r--r-- | test/constant-pad-nd.cc | 138 | ||||
-rw-r--r-- | test/constant-pad-operator-tester.h | 115 |
6 files changed, 302 insertions, 0 deletions
diff --git a/include/xnnpack.h b/include/xnnpack.h index a4014736b..764703958 100644 --- a/include/xnnpack.h +++ b/include/xnnpack.h @@ -2015,6 +2015,25 @@ enum xnn_status xnn_setup_multiply_nd_f16( #endif // XNN_NO_F16_OPERATORS +#ifndef XNN_NO_X16_OPERATORS + +enum xnn_status xnn_create_constant_pad_nd_x16( + const void* padding_value, + uint32_t flags, + xnn_operator_t* constant_pad_op_out); + +enum xnn_status xnn_setup_constant_pad_nd_x16( + xnn_operator_t constant_pad_op, + size_t num_dims, + const size_t* input_shape, + const size_t* pre_padding, + const size_t* post_padding, + const void* input, + void* output, + pthreadpool_t threadpool); + +#endif // XNN_NO_X16_OPERATORS + #ifndef XNN_NO_QC8_OPERATORS enum xnn_status xnn_create_convolution2d_nhwc_qc8( diff --git a/src/operator-strings.c b/src/operator-strings.c index 94923e9e0..c3eac2676 100644 --- a/src/operator-strings.c +++ b/src/operator-strings.c @@ -50,6 +50,8 @@ const char* xnn_operator_type_to_string(enum xnn_operator_type type) { return "Clamp (NC, U8)"; case xnn_operator_type_constant_pad_nd_x8: return "Constant Pad (ND, X8)"; + case xnn_operator_type_constant_pad_nd_x16: + return "Constant Pad (ND, X16)"; case xnn_operator_type_constant_pad_nd_x32: return "Constant Pad (ND, X32)"; case xnn_operator_type_convert_nc_f16_f32: diff --git a/src/operators/constant-pad-nd.c b/src/operators/constant-pad-nd.c index c1d84b261..08409a44a 100644 --- a/src/operators/constant-pad-nd.c +++ b/src/operators/constant-pad-nd.c @@ -68,6 +68,16 @@ enum xnn_status xnn_create_constant_pad_nd_x8( padding_pattern * UINT32_C(0x01010101), flags, xnn_operator_type_constant_pad_nd_x8, constant_pad_op_out); } +enum xnn_status xnn_create_constant_pad_nd_x16( + const void* padding_value, + uint32_t flags, + xnn_operator_t* constant_pad_op_out) +{ + const uint32_t padding_pattern = *((const uint16_t*) padding_value); + return create_constant_pad_nd( + padding_pattern * UINT32_C(0x00010001), flags, xnn_operator_type_constant_pad_nd_x16, constant_pad_op_out); +} + enum xnn_status xnn_create_constant_pad_nd_x32( const void* padding_value, uint32_t flags, @@ -212,6 +222,23 @@ enum xnn_status xnn_setup_constant_pad_nd_x8( pthreadpool_get_threads_count(threadpool)); } +enum xnn_status xnn_setup_constant_pad_nd_x16( + xnn_operator_t constant_pad_op, + size_t num_dims, + const size_t* input_shape, + const size_t* pre_padding, + const size_t* post_padding, + const void* input, + void* output, + pthreadpool_t threadpool) +{ + return setup_constant_pad_nd( + constant_pad_op, xnn_operator_type_constant_pad_nd_x16, + num_dims, input_shape, pre_padding, post_padding, + input, output, 1 /* log2(element size) */, + pthreadpool_get_threads_count(threadpool)); +} + enum xnn_status xnn_setup_constant_pad_nd_x32( xnn_operator_t constant_pad_op, size_t num_dims, diff --git a/src/xnnpack/operator.h b/src/xnnpack/operator.h index 5a11e36a9..cd00a24c3 100644 --- a/src/xnnpack/operator.h +++ b/src/xnnpack/operator.h @@ -49,6 +49,7 @@ enum xnn_operator_type { xnn_operator_type_clamp_nc_u8, xnn_operator_type_ceiling_nc_f32, xnn_operator_type_constant_pad_nd_x8, + xnn_operator_type_constant_pad_nd_x16, xnn_operator_type_constant_pad_nd_x32, xnn_operator_type_convert_nc_f16_f32, xnn_operator_type_convert_nc_f32_f16, diff --git a/test/constant-pad-nd.cc b/test/constant-pad-nd.cc index c4b98629e..c9887eb44 100644 --- a/test/constant-pad-nd.cc +++ b/test/constant-pad-nd.cc @@ -165,6 +165,144 @@ TEST(CONSTANT_PAD_ND_X8, 6d) { } +TEST(CONSTANT_PAD_ND_X16, 0d) { + ConstantPadOperatorTester() + .TestX16(); +} + +TEST(CONSTANT_PAD_ND_X16, 1d) { + for (size_t dim1_pre_pad = 0; dim1_pre_pad <= kDim1PrePad; dim1_pre_pad += kDim1PrePad) { + for (size_t dim1_post_pad = 0; dim1_post_pad <= kDim1PostPad; dim1_post_pad += kDim1PostPad) { + ConstantPadOperatorTester() + .input_shape({kDim1}) + .pre_paddings({dim1_pre_pad}) + .post_paddings({dim1_post_pad}) + .TestX16(); + } + } +} + +TEST(CONSTANT_PAD_ND_X16, 2d) { + for (size_t dim1_pre_pad = 0; dim1_pre_pad <= kDim1PrePad; dim1_pre_pad += kDim1PrePad) { + for (size_t dim1_post_pad = 0; dim1_post_pad <= kDim1PostPad; dim1_post_pad += kDim1PostPad) { + for (size_t dim2_pre_pad = 0; dim2_pre_pad <= kDim2PrePad; dim2_pre_pad += kDim2PrePad) { + for (size_t dim2_post_pad = 0; dim2_post_pad <= kDim2PostPad; dim2_post_pad += kDim2PostPad) { + ConstantPadOperatorTester() + .input_shape({kDim1, kDim2}) + .pre_paddings({dim1_pre_pad, dim2_pre_pad}) + .post_paddings({dim1_post_pad, dim2_post_pad}) + .TestX16(); + } + } + } + } +} + +TEST(CONSTANT_PAD_ND_X16, 3d) { + for (size_t dim1_pre_pad = 0; dim1_pre_pad <= kDim1PrePad; dim1_pre_pad += kDim1PrePad) { + for (size_t dim1_post_pad = 0; dim1_post_pad <= kDim1PostPad; dim1_post_pad += kDim1PostPad) { + for (size_t dim2_pre_pad = 0; dim2_pre_pad <= kDim2PrePad; dim2_pre_pad += kDim2PrePad) { + for (size_t dim2_post_pad = 0; dim2_post_pad <= kDim2PostPad; dim2_post_pad += kDim2PostPad) { + for (size_t dim3_pre_pad = 0; dim3_pre_pad <= kDim3PrePad; dim3_pre_pad += kDim3PrePad) { + for (size_t dim3_post_pad = 0; dim3_post_pad <= kDim3PostPad; dim3_post_pad += kDim3PostPad) { + ConstantPadOperatorTester() + .input_shape({kDim1, kDim2, kDim3}) + .pre_paddings({dim1_pre_pad, dim2_pre_pad, dim3_pre_pad}) + .post_paddings({dim1_post_pad, dim2_post_pad, dim3_post_pad}) + .TestX16(); + } + } + } + } + } + } +} + +TEST(CONSTANT_PAD_ND_X16, 4d) { + for (size_t dim1_pre_pad = 0; dim1_pre_pad <= kDim1PrePad; dim1_pre_pad += kDim1PrePad) { + for (size_t dim1_post_pad = 0; dim1_post_pad <= kDim1PostPad; dim1_post_pad += kDim1PostPad) { + for (size_t dim2_pre_pad = 0; dim2_pre_pad <= kDim2PrePad; dim2_pre_pad += kDim2PrePad) { + for (size_t dim2_post_pad = 0; dim2_post_pad <= kDim2PostPad; dim2_post_pad += kDim2PostPad) { + for (size_t dim3_pre_pad = 0; dim3_pre_pad <= kDim3PrePad; dim3_pre_pad += kDim3PrePad) { + for (size_t dim3_post_pad = 0; dim3_post_pad <= kDim3PostPad; dim3_post_pad += kDim3PostPad) { + for (size_t dim4_pre_pad = 0; dim4_pre_pad <= kDim4PrePad; dim4_pre_pad += kDim4PrePad) { + for (size_t dim4_post_pad = 0; dim4_post_pad <= kDim4PostPad; dim4_post_pad += kDim4PostPad) { + ConstantPadOperatorTester() + .input_shape({kDim1, kDim2, kDim3, kDim4}) + .pre_paddings({dim1_pre_pad, dim2_pre_pad, dim3_pre_pad, dim4_pre_pad}) + .post_paddings({dim1_post_pad, dim2_post_pad, dim3_post_pad, dim4_post_pad}) + .TestX16(); + } + } + } + } + } + } + } + } +} + +TEST(CONSTANT_PAD_ND_X16, 5d) { + for (size_t dim1_pre_pad = 0; dim1_pre_pad <= kDim1PrePad; dim1_pre_pad += kDim1PrePad) { + for (size_t dim1_post_pad = 0; dim1_post_pad <= kDim1PostPad; dim1_post_pad += kDim1PostPad) { + for (size_t dim2_pre_pad = 0; dim2_pre_pad <= kDim2PrePad; dim2_pre_pad += kDim2PrePad) { + for (size_t dim2_post_pad = 0; dim2_post_pad <= kDim2PostPad; dim2_post_pad += kDim2PostPad) { + for (size_t dim3_pre_pad = 0; dim3_pre_pad <= kDim3PrePad; dim3_pre_pad += kDim3PrePad) { + for (size_t dim3_post_pad = 0; dim3_post_pad <= kDim3PostPad; dim3_post_pad += kDim3PostPad) { + for (size_t dim4_pre_pad = 0; dim4_pre_pad <= kDim4PrePad; dim4_pre_pad += kDim4PrePad) { + for (size_t dim4_post_pad = 0; dim4_post_pad <= kDim4PostPad; dim4_post_pad += kDim4PostPad) { + for (size_t dim5_pre_pad = 0; dim5_pre_pad <= kDim5PrePad; dim5_pre_pad += kDim5PrePad) { + for (size_t dim5_post_pad = 0; dim5_post_pad <= kDim5PostPad; dim5_post_pad += kDim5PostPad) { + ConstantPadOperatorTester() + .input_shape({kDim1, kDim2, kDim3, kDim4, kDim5}) + .pre_paddings({dim1_pre_pad, dim2_pre_pad, dim3_pre_pad, dim4_pre_pad, dim5_pre_pad}) + .post_paddings({dim1_post_pad, dim2_post_pad, dim3_post_pad, dim4_post_pad, dim5_post_pad}) + .TestX16(); + } + } + } + } + } + } + } + } + } + } +} + +TEST(CONSTANT_PAD_ND_X16, 6d) { + for (size_t dim1_pre_pad = 0; dim1_pre_pad <= kDim1PrePad; dim1_pre_pad += kDim1PrePad) { + for (size_t dim1_post_pad = 0; dim1_post_pad <= kDim1PostPad; dim1_post_pad += kDim1PostPad) { + for (size_t dim2_pre_pad = 0; dim2_pre_pad <= kDim2PrePad; dim2_pre_pad += kDim2PrePad) { + for (size_t dim2_post_pad = 0; dim2_post_pad <= kDim2PostPad; dim2_post_pad += kDim2PostPad) { + for (size_t dim3_pre_pad = 0; dim3_pre_pad <= kDim3PrePad; dim3_pre_pad += kDim3PrePad) { + for (size_t dim3_post_pad = 0; dim3_post_pad <= kDim3PostPad; dim3_post_pad += kDim3PostPad) { + for (size_t dim4_pre_pad = 0; dim4_pre_pad <= kDim4PrePad; dim4_pre_pad += kDim4PrePad) { + for (size_t dim4_post_pad = 0; dim4_post_pad <= kDim4PostPad; dim4_post_pad += kDim4PostPad) { + for (size_t dim5_pre_pad = 0; dim5_pre_pad <= kDim5PrePad; dim5_pre_pad += kDim5PrePad) { + for (size_t dim5_post_pad = 0; dim5_post_pad <= kDim5PostPad; dim5_post_pad += kDim5PostPad) { + for (size_t dim6_pre_pad = 0; dim6_pre_pad <= kDim6PrePad; dim6_pre_pad += kDim6PrePad) { + for (size_t dim6_post_pad = 0; dim6_post_pad <= kDim6PostPad; dim6_post_pad += kDim6PostPad) { + ConstantPadOperatorTester() + .input_shape({kDim1, kDim2, kDim3, kDim4, kDim5, kDim6}) + .pre_paddings({dim1_pre_pad, dim2_pre_pad, dim3_pre_pad, dim4_pre_pad, dim5_pre_pad, dim6_pre_pad}) + .post_paddings({dim1_post_pad, dim2_post_pad, dim3_post_pad, dim4_post_pad, dim5_post_pad, dim6_post_pad}) + .TestX16(); + } + } + } + } + } + } + } + } + } + } + } + } +} + + TEST(CONSTANT_PAD_ND_X32, 0d) { ConstantPadOperatorTester() .TestX32(); diff --git a/test/constant-pad-operator-tester.h b/test/constant-pad-operator-tester.h index 2f848ed5d..f41d34634 100644 --- a/test/constant-pad-operator-tester.h +++ b/test/constant-pad-operator-tester.h @@ -217,6 +217,121 @@ class ConstantPadOperatorTester { } } + void TestX16() const { + ASSERT_EQ(num_dims(), num_pre_paddings()); + ASSERT_EQ(num_dims(), num_post_paddings()); + + std::random_device random_device; + auto rng = std::mt19937(random_device()); + auto u16rng = std::bind(std::uniform_int_distribution<uint16_t>(), rng); + + // Compute generalized shapes. + std::array<size_t, XNN_MAX_TENSOR_DIMS> input_dims; + std::array<size_t, XNN_MAX_TENSOR_DIMS> input_pre_paddings; + std::array<size_t, XNN_MAX_TENSOR_DIMS> input_post_paddings; + std::array<size_t, XNN_MAX_TENSOR_DIMS> output_dims; + std::fill(input_dims.begin(), input_dims.end(), 1); + std::fill(input_pre_paddings.begin(), input_pre_paddings.end(), 0); + std::fill(input_post_paddings.begin(), input_post_paddings.end(), 0); + std::fill(output_dims.begin(), output_dims.end(), 1); + for (size_t i = 0; i < num_dims(); i++) { + input_dims[XNN_MAX_TENSOR_DIMS - num_dims() + i] = input_dim(i); + input_pre_paddings[XNN_MAX_TENSOR_DIMS - num_dims() + i] = pre_padding(i); + input_post_paddings[XNN_MAX_TENSOR_DIMS - num_dims() + i] = post_padding(i); + output_dims[XNN_MAX_TENSOR_DIMS - num_dims() + i] = output_dim(i); + } + + // Compute generalized strides. + std::array<size_t, XNN_MAX_TENSOR_DIMS> input_strides; + std::array<size_t, XNN_MAX_TENSOR_DIMS> output_strides; + size_t input_stride = 1, output_stride = 1; + for (size_t i = XNN_MAX_TENSOR_DIMS; i != 0; i--) { + input_strides[i - 1] = input_stride; + output_strides[i - 1] = output_stride; + input_stride *= input_dims[i - 1]; + output_stride *= output_dims[i - 1]; + } + + std::vector<uint16_t> input(XNN_EXTRA_BYTES / sizeof(uint16_t) + num_input_elements()); + std::vector<uint16_t> output(num_output_elements()); + std::vector<uint16_t> output_ref(num_output_elements()); + for (size_t iteration = 0; iteration < iterations(); iteration++) { + std::generate(input.begin(), input.end(), std::ref(u16rng)); + std::fill(output.begin(), output.end(), UINT16_C(0xDEAD)); + const uint16_t padding_value = u16rng(); + + // Compute reference results. + std::fill(output_ref.begin(), output_ref.end(), padding_value); + for (size_t i = 0; i < input_dims[0]; i++) { + for (size_t j = 0; j < input_dims[1]; j++) { + for (size_t k = 0; k < input_dims[2]; k++) { + for (size_t l = 0; l < input_dims[3]; l++) { + for (size_t m = 0; m < input_dims[4]; m++) { + for (size_t n = 0; n < input_dims[5]; n++) { + const size_t output_index = + (i + input_pre_paddings[0]) * output_strides[0] + + (j + input_pre_paddings[1]) * output_strides[1] + + (k + input_pre_paddings[2]) * output_strides[2] + + (l + input_pre_paddings[3]) * output_strides[3] + + (m + input_pre_paddings[4]) * output_strides[4] + + (n + input_pre_paddings[5]) * output_strides[5]; + const size_t input_index = + i * input_strides[0] + j * input_strides[1] + k * input_strides[2] + + l * input_strides[3] + m * input_strides[4] + n * input_strides[5]; + output_ref[output_index] = input[input_index]; + } + } + } + } + } + } + + // Create, setup, run, and destroy a binary elementwise operator. + ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */)); + xnn_operator_t pad_op = nullptr; + + ASSERT_EQ(xnn_status_success, + xnn_create_constant_pad_nd_x16( + &padding_value, 0, &pad_op)); + ASSERT_NE(nullptr, pad_op); + + // Smart pointer to automatically delete pad_op. + std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_pad_op(pad_op, xnn_delete_operator); + + ASSERT_EQ(xnn_status_success, + xnn_setup_constant_pad_nd_x16( + pad_op, + num_dims(), + input_shape().data(), pre_paddings().data(), post_paddings().data(), + input.data(), output.data(), + nullptr /* thread pool */)); + + ASSERT_EQ(xnn_status_success, + xnn_run_operator(pad_op, nullptr /* thread pool */)); + + // Verify results. + for (size_t i = 0; i < output_dims[0]; i++) { + for (size_t j = 0; j < output_dims[1]; j++) { + for (size_t k = 0; k < output_dims[2]; k++) { + for (size_t l = 0; l < output_dims[3]; l++) { + for (size_t m = 0; m < output_dims[4]; m++) { + for (size_t n = 0; n < output_dims[5]; n++) { + const size_t index = + i * output_strides[0] + j * output_strides[1] + k * output_strides[2] + + l * output_strides[3] + m * output_strides[4] + n * output_strides[5]; + ASSERT_EQ(output[index], output_ref[index]) + << "(i, j, k, l, m, n) = (" + << i << ", " << j << ", " << k << ", " << l << ", " << m << ", " << n << ")" + << ", padding value = " << padding_value; + } + } + } + } + } + } + } + } + void TestX32() const { ASSERT_EQ(num_dims(), num_pre_paddings()); ASSERT_EQ(num_dims(), num_post_paddings()); |