aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarat Dukhan <maratek@google.com>2022-02-03 19:21:41 -0800
committerXNNPACK Team <xnnpack-github-robot@google.com>2022-02-03 19:22:40 -0800
commit6b45a7f5c46bfc54e3624be285b108e2aae62335 (patch)
tree160abdfd5b744857db5e3c6fdd6ff6625db72218
parentcde8bdfe9085e99106ee2f031ab8b8a490d416c5 (diff)
downloadXNNPACK-6b45a7f5c46bfc54e3624be285b108e2aae62335.tar.gz
16-bit Constant Pad ND operator
PiperOrigin-RevId: 426293079
-rw-r--r--include/xnnpack.h19
-rw-r--r--src/operator-strings.c2
-rw-r--r--src/operators/constant-pad-nd.c27
-rw-r--r--src/xnnpack/operator.h1
-rw-r--r--test/constant-pad-nd.cc138
-rw-r--r--test/constant-pad-operator-tester.h115
6 files changed, 302 insertions, 0 deletions
diff --git a/include/xnnpack.h b/include/xnnpack.h
index a4014736b..764703958 100644
--- a/include/xnnpack.h
+++ b/include/xnnpack.h
@@ -2015,6 +2015,25 @@ enum xnn_status xnn_setup_multiply_nd_f16(
#endif // XNN_NO_F16_OPERATORS
+#ifndef XNN_NO_X16_OPERATORS
+
+enum xnn_status xnn_create_constant_pad_nd_x16(
+ const void* padding_value,
+ uint32_t flags,
+ xnn_operator_t* constant_pad_op_out);
+
+enum xnn_status xnn_setup_constant_pad_nd_x16(
+ xnn_operator_t constant_pad_op,
+ size_t num_dims,
+ const size_t* input_shape,
+ const size_t* pre_padding,
+ const size_t* post_padding,
+ const void* input,
+ void* output,
+ pthreadpool_t threadpool);
+
+#endif // XNN_NO_X16_OPERATORS
+
#ifndef XNN_NO_QC8_OPERATORS
enum xnn_status xnn_create_convolution2d_nhwc_qc8(
diff --git a/src/operator-strings.c b/src/operator-strings.c
index 94923e9e0..c3eac2676 100644
--- a/src/operator-strings.c
+++ b/src/operator-strings.c
@@ -50,6 +50,8 @@ const char* xnn_operator_type_to_string(enum xnn_operator_type type) {
return "Clamp (NC, U8)";
case xnn_operator_type_constant_pad_nd_x8:
return "Constant Pad (ND, X8)";
+ case xnn_operator_type_constant_pad_nd_x16:
+ return "Constant Pad (ND, X16)";
case xnn_operator_type_constant_pad_nd_x32:
return "Constant Pad (ND, X32)";
case xnn_operator_type_convert_nc_f16_f32:
diff --git a/src/operators/constant-pad-nd.c b/src/operators/constant-pad-nd.c
index c1d84b261..08409a44a 100644
--- a/src/operators/constant-pad-nd.c
+++ b/src/operators/constant-pad-nd.c
@@ -68,6 +68,16 @@ enum xnn_status xnn_create_constant_pad_nd_x8(
padding_pattern * UINT32_C(0x01010101), flags, xnn_operator_type_constant_pad_nd_x8, constant_pad_op_out);
}
+enum xnn_status xnn_create_constant_pad_nd_x16(
+ const void* padding_value,
+ uint32_t flags,
+ xnn_operator_t* constant_pad_op_out)
+{
+ const uint32_t padding_pattern = *((const uint16_t*) padding_value);
+ return create_constant_pad_nd(
+ padding_pattern * UINT32_C(0x00010001), flags, xnn_operator_type_constant_pad_nd_x16, constant_pad_op_out);
+}
+
enum xnn_status xnn_create_constant_pad_nd_x32(
const void* padding_value,
uint32_t flags,
@@ -212,6 +222,23 @@ enum xnn_status xnn_setup_constant_pad_nd_x8(
pthreadpool_get_threads_count(threadpool));
}
+enum xnn_status xnn_setup_constant_pad_nd_x16(
+ xnn_operator_t constant_pad_op,
+ size_t num_dims,
+ const size_t* input_shape,
+ const size_t* pre_padding,
+ const size_t* post_padding,
+ const void* input,
+ void* output,
+ pthreadpool_t threadpool)
+{
+ return setup_constant_pad_nd(
+ constant_pad_op, xnn_operator_type_constant_pad_nd_x16,
+ num_dims, input_shape, pre_padding, post_padding,
+ input, output, 1 /* log2(element size) */,
+ pthreadpool_get_threads_count(threadpool));
+}
+
enum xnn_status xnn_setup_constant_pad_nd_x32(
xnn_operator_t constant_pad_op,
size_t num_dims,
diff --git a/src/xnnpack/operator.h b/src/xnnpack/operator.h
index 5a11e36a9..cd00a24c3 100644
--- a/src/xnnpack/operator.h
+++ b/src/xnnpack/operator.h
@@ -49,6 +49,7 @@ enum xnn_operator_type {
xnn_operator_type_clamp_nc_u8,
xnn_operator_type_ceiling_nc_f32,
xnn_operator_type_constant_pad_nd_x8,
+ xnn_operator_type_constant_pad_nd_x16,
xnn_operator_type_constant_pad_nd_x32,
xnn_operator_type_convert_nc_f16_f32,
xnn_operator_type_convert_nc_f32_f16,
diff --git a/test/constant-pad-nd.cc b/test/constant-pad-nd.cc
index c4b98629e..c9887eb44 100644
--- a/test/constant-pad-nd.cc
+++ b/test/constant-pad-nd.cc
@@ -165,6 +165,144 @@ TEST(CONSTANT_PAD_ND_X8, 6d) {
}
+TEST(CONSTANT_PAD_ND_X16, 0d) {
+ ConstantPadOperatorTester()
+ .TestX16();
+}
+
+TEST(CONSTANT_PAD_ND_X16, 1d) {
+ for (size_t dim1_pre_pad = 0; dim1_pre_pad <= kDim1PrePad; dim1_pre_pad += kDim1PrePad) {
+ for (size_t dim1_post_pad = 0; dim1_post_pad <= kDim1PostPad; dim1_post_pad += kDim1PostPad) {
+ ConstantPadOperatorTester()
+ .input_shape({kDim1})
+ .pre_paddings({dim1_pre_pad})
+ .post_paddings({dim1_post_pad})
+ .TestX16();
+ }
+ }
+}
+
+TEST(CONSTANT_PAD_ND_X16, 2d) {
+ for (size_t dim1_pre_pad = 0; dim1_pre_pad <= kDim1PrePad; dim1_pre_pad += kDim1PrePad) {
+ for (size_t dim1_post_pad = 0; dim1_post_pad <= kDim1PostPad; dim1_post_pad += kDim1PostPad) {
+ for (size_t dim2_pre_pad = 0; dim2_pre_pad <= kDim2PrePad; dim2_pre_pad += kDim2PrePad) {
+ for (size_t dim2_post_pad = 0; dim2_post_pad <= kDim2PostPad; dim2_post_pad += kDim2PostPad) {
+ ConstantPadOperatorTester()
+ .input_shape({kDim1, kDim2})
+ .pre_paddings({dim1_pre_pad, dim2_pre_pad})
+ .post_paddings({dim1_post_pad, dim2_post_pad})
+ .TestX16();
+ }
+ }
+ }
+ }
+}
+
+TEST(CONSTANT_PAD_ND_X16, 3d) {
+ for (size_t dim1_pre_pad = 0; dim1_pre_pad <= kDim1PrePad; dim1_pre_pad += kDim1PrePad) {
+ for (size_t dim1_post_pad = 0; dim1_post_pad <= kDim1PostPad; dim1_post_pad += kDim1PostPad) {
+ for (size_t dim2_pre_pad = 0; dim2_pre_pad <= kDim2PrePad; dim2_pre_pad += kDim2PrePad) {
+ for (size_t dim2_post_pad = 0; dim2_post_pad <= kDim2PostPad; dim2_post_pad += kDim2PostPad) {
+ for (size_t dim3_pre_pad = 0; dim3_pre_pad <= kDim3PrePad; dim3_pre_pad += kDim3PrePad) {
+ for (size_t dim3_post_pad = 0; dim3_post_pad <= kDim3PostPad; dim3_post_pad += kDim3PostPad) {
+ ConstantPadOperatorTester()
+ .input_shape({kDim1, kDim2, kDim3})
+ .pre_paddings({dim1_pre_pad, dim2_pre_pad, dim3_pre_pad})
+ .post_paddings({dim1_post_pad, dim2_post_pad, dim3_post_pad})
+ .TestX16();
+ }
+ }
+ }
+ }
+ }
+ }
+}
+
+TEST(CONSTANT_PAD_ND_X16, 4d) {
+ for (size_t dim1_pre_pad = 0; dim1_pre_pad <= kDim1PrePad; dim1_pre_pad += kDim1PrePad) {
+ for (size_t dim1_post_pad = 0; dim1_post_pad <= kDim1PostPad; dim1_post_pad += kDim1PostPad) {
+ for (size_t dim2_pre_pad = 0; dim2_pre_pad <= kDim2PrePad; dim2_pre_pad += kDim2PrePad) {
+ for (size_t dim2_post_pad = 0; dim2_post_pad <= kDim2PostPad; dim2_post_pad += kDim2PostPad) {
+ for (size_t dim3_pre_pad = 0; dim3_pre_pad <= kDim3PrePad; dim3_pre_pad += kDim3PrePad) {
+ for (size_t dim3_post_pad = 0; dim3_post_pad <= kDim3PostPad; dim3_post_pad += kDim3PostPad) {
+ for (size_t dim4_pre_pad = 0; dim4_pre_pad <= kDim4PrePad; dim4_pre_pad += kDim4PrePad) {
+ for (size_t dim4_post_pad = 0; dim4_post_pad <= kDim4PostPad; dim4_post_pad += kDim4PostPad) {
+ ConstantPadOperatorTester()
+ .input_shape({kDim1, kDim2, kDim3, kDim4})
+ .pre_paddings({dim1_pre_pad, dim2_pre_pad, dim3_pre_pad, dim4_pre_pad})
+ .post_paddings({dim1_post_pad, dim2_post_pad, dim3_post_pad, dim4_post_pad})
+ .TestX16();
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+}
+
+TEST(CONSTANT_PAD_ND_X16, 5d) {
+ for (size_t dim1_pre_pad = 0; dim1_pre_pad <= kDim1PrePad; dim1_pre_pad += kDim1PrePad) {
+ for (size_t dim1_post_pad = 0; dim1_post_pad <= kDim1PostPad; dim1_post_pad += kDim1PostPad) {
+ for (size_t dim2_pre_pad = 0; dim2_pre_pad <= kDim2PrePad; dim2_pre_pad += kDim2PrePad) {
+ for (size_t dim2_post_pad = 0; dim2_post_pad <= kDim2PostPad; dim2_post_pad += kDim2PostPad) {
+ for (size_t dim3_pre_pad = 0; dim3_pre_pad <= kDim3PrePad; dim3_pre_pad += kDim3PrePad) {
+ for (size_t dim3_post_pad = 0; dim3_post_pad <= kDim3PostPad; dim3_post_pad += kDim3PostPad) {
+ for (size_t dim4_pre_pad = 0; dim4_pre_pad <= kDim4PrePad; dim4_pre_pad += kDim4PrePad) {
+ for (size_t dim4_post_pad = 0; dim4_post_pad <= kDim4PostPad; dim4_post_pad += kDim4PostPad) {
+ for (size_t dim5_pre_pad = 0; dim5_pre_pad <= kDim5PrePad; dim5_pre_pad += kDim5PrePad) {
+ for (size_t dim5_post_pad = 0; dim5_post_pad <= kDim5PostPad; dim5_post_pad += kDim5PostPad) {
+ ConstantPadOperatorTester()
+ .input_shape({kDim1, kDim2, kDim3, kDim4, kDim5})
+ .pre_paddings({dim1_pre_pad, dim2_pre_pad, dim3_pre_pad, dim4_pre_pad, dim5_pre_pad})
+ .post_paddings({dim1_post_pad, dim2_post_pad, dim3_post_pad, dim4_post_pad, dim5_post_pad})
+ .TestX16();
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+}
+
+TEST(CONSTANT_PAD_ND_X16, 6d) {
+ for (size_t dim1_pre_pad = 0; dim1_pre_pad <= kDim1PrePad; dim1_pre_pad += kDim1PrePad) {
+ for (size_t dim1_post_pad = 0; dim1_post_pad <= kDim1PostPad; dim1_post_pad += kDim1PostPad) {
+ for (size_t dim2_pre_pad = 0; dim2_pre_pad <= kDim2PrePad; dim2_pre_pad += kDim2PrePad) {
+ for (size_t dim2_post_pad = 0; dim2_post_pad <= kDim2PostPad; dim2_post_pad += kDim2PostPad) {
+ for (size_t dim3_pre_pad = 0; dim3_pre_pad <= kDim3PrePad; dim3_pre_pad += kDim3PrePad) {
+ for (size_t dim3_post_pad = 0; dim3_post_pad <= kDim3PostPad; dim3_post_pad += kDim3PostPad) {
+ for (size_t dim4_pre_pad = 0; dim4_pre_pad <= kDim4PrePad; dim4_pre_pad += kDim4PrePad) {
+ for (size_t dim4_post_pad = 0; dim4_post_pad <= kDim4PostPad; dim4_post_pad += kDim4PostPad) {
+ for (size_t dim5_pre_pad = 0; dim5_pre_pad <= kDim5PrePad; dim5_pre_pad += kDim5PrePad) {
+ for (size_t dim5_post_pad = 0; dim5_post_pad <= kDim5PostPad; dim5_post_pad += kDim5PostPad) {
+ for (size_t dim6_pre_pad = 0; dim6_pre_pad <= kDim6PrePad; dim6_pre_pad += kDim6PrePad) {
+ for (size_t dim6_post_pad = 0; dim6_post_pad <= kDim6PostPad; dim6_post_pad += kDim6PostPad) {
+ ConstantPadOperatorTester()
+ .input_shape({kDim1, kDim2, kDim3, kDim4, kDim5, kDim6})
+ .pre_paddings({dim1_pre_pad, dim2_pre_pad, dim3_pre_pad, dim4_pre_pad, dim5_pre_pad, dim6_pre_pad})
+ .post_paddings({dim1_post_pad, dim2_post_pad, dim3_post_pad, dim4_post_pad, dim5_post_pad, dim6_post_pad})
+ .TestX16();
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+}
+
+
TEST(CONSTANT_PAD_ND_X32, 0d) {
ConstantPadOperatorTester()
.TestX32();
diff --git a/test/constant-pad-operator-tester.h b/test/constant-pad-operator-tester.h
index 2f848ed5d..f41d34634 100644
--- a/test/constant-pad-operator-tester.h
+++ b/test/constant-pad-operator-tester.h
@@ -217,6 +217,121 @@ class ConstantPadOperatorTester {
}
}
+ void TestX16() const {
+ ASSERT_EQ(num_dims(), num_pre_paddings());
+ ASSERT_EQ(num_dims(), num_post_paddings());
+
+ std::random_device random_device;
+ auto rng = std::mt19937(random_device());
+ auto u16rng = std::bind(std::uniform_int_distribution<uint16_t>(), rng);
+
+ // Compute generalized shapes.
+ std::array<size_t, XNN_MAX_TENSOR_DIMS> input_dims;
+ std::array<size_t, XNN_MAX_TENSOR_DIMS> input_pre_paddings;
+ std::array<size_t, XNN_MAX_TENSOR_DIMS> input_post_paddings;
+ std::array<size_t, XNN_MAX_TENSOR_DIMS> output_dims;
+ std::fill(input_dims.begin(), input_dims.end(), 1);
+ std::fill(input_pre_paddings.begin(), input_pre_paddings.end(), 0);
+ std::fill(input_post_paddings.begin(), input_post_paddings.end(), 0);
+ std::fill(output_dims.begin(), output_dims.end(), 1);
+ for (size_t i = 0; i < num_dims(); i++) {
+ input_dims[XNN_MAX_TENSOR_DIMS - num_dims() + i] = input_dim(i);
+ input_pre_paddings[XNN_MAX_TENSOR_DIMS - num_dims() + i] = pre_padding(i);
+ input_post_paddings[XNN_MAX_TENSOR_DIMS - num_dims() + i] = post_padding(i);
+ output_dims[XNN_MAX_TENSOR_DIMS - num_dims() + i] = output_dim(i);
+ }
+
+ // Compute generalized strides.
+ std::array<size_t, XNN_MAX_TENSOR_DIMS> input_strides;
+ std::array<size_t, XNN_MAX_TENSOR_DIMS> output_strides;
+ size_t input_stride = 1, output_stride = 1;
+ for (size_t i = XNN_MAX_TENSOR_DIMS; i != 0; i--) {
+ input_strides[i - 1] = input_stride;
+ output_strides[i - 1] = output_stride;
+ input_stride *= input_dims[i - 1];
+ output_stride *= output_dims[i - 1];
+ }
+
+ std::vector<uint16_t> input(XNN_EXTRA_BYTES / sizeof(uint16_t) + num_input_elements());
+ std::vector<uint16_t> output(num_output_elements());
+ std::vector<uint16_t> output_ref(num_output_elements());
+ for (size_t iteration = 0; iteration < iterations(); iteration++) {
+ std::generate(input.begin(), input.end(), std::ref(u16rng));
+ std::fill(output.begin(), output.end(), UINT16_C(0xDEAD));
+ const uint16_t padding_value = u16rng();
+
+ // Compute reference results.
+ std::fill(output_ref.begin(), output_ref.end(), padding_value);
+ for (size_t i = 0; i < input_dims[0]; i++) {
+ for (size_t j = 0; j < input_dims[1]; j++) {
+ for (size_t k = 0; k < input_dims[2]; k++) {
+ for (size_t l = 0; l < input_dims[3]; l++) {
+ for (size_t m = 0; m < input_dims[4]; m++) {
+ for (size_t n = 0; n < input_dims[5]; n++) {
+ const size_t output_index =
+ (i + input_pre_paddings[0]) * output_strides[0] +
+ (j + input_pre_paddings[1]) * output_strides[1] +
+ (k + input_pre_paddings[2]) * output_strides[2] +
+ (l + input_pre_paddings[3]) * output_strides[3] +
+ (m + input_pre_paddings[4]) * output_strides[4] +
+ (n + input_pre_paddings[5]) * output_strides[5];
+ const size_t input_index =
+ i * input_strides[0] + j * input_strides[1] + k * input_strides[2] +
+ l * input_strides[3] + m * input_strides[4] + n * input_strides[5];
+ output_ref[output_index] = input[input_index];
+ }
+ }
+ }
+ }
+ }
+ }
+
+ // Create, setup, run, and destroy a binary elementwise operator.
+ ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
+ xnn_operator_t pad_op = nullptr;
+
+ ASSERT_EQ(xnn_status_success,
+ xnn_create_constant_pad_nd_x16(
+ &padding_value, 0, &pad_op));
+ ASSERT_NE(nullptr, pad_op);
+
+ // Smart pointer to automatically delete pad_op.
+ std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_pad_op(pad_op, xnn_delete_operator);
+
+ ASSERT_EQ(xnn_status_success,
+ xnn_setup_constant_pad_nd_x16(
+ pad_op,
+ num_dims(),
+ input_shape().data(), pre_paddings().data(), post_paddings().data(),
+ input.data(), output.data(),
+ nullptr /* thread pool */));
+
+ ASSERT_EQ(xnn_status_success,
+ xnn_run_operator(pad_op, nullptr /* thread pool */));
+
+ // Verify results.
+ for (size_t i = 0; i < output_dims[0]; i++) {
+ for (size_t j = 0; j < output_dims[1]; j++) {
+ for (size_t k = 0; k < output_dims[2]; k++) {
+ for (size_t l = 0; l < output_dims[3]; l++) {
+ for (size_t m = 0; m < output_dims[4]; m++) {
+ for (size_t n = 0; n < output_dims[5]; n++) {
+ const size_t index =
+ i * output_strides[0] + j * output_strides[1] + k * output_strides[2] +
+ l * output_strides[3] + m * output_strides[4] + n * output_strides[5];
+ ASSERT_EQ(output[index], output_ref[index])
+ << "(i, j, k, l, m, n) = ("
+ << i << ", " << j << ", " << k << ", " << l << ", " << m << ", " << n << ")"
+ << ", padding value = " << padding_value;
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+
void TestX32() const {
ASSERT_EQ(num_dims(), num_pre_paddings());
ASSERT_EQ(num_dims(), num_post_paddings());