aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorZhi An Ng <zhin@google.com>2022-08-24 10:38:43 -0700
committerXNNPACK Team <xnnpack-github-robot@google.com>2022-08-24 10:39:49 -0700
commitedfac6301d77feb42f4f0562d02251fd02fe4825 (patch)
tree1a4457f746c24a22226e4a47fbef3b4a98679dc4
parenta066c3191d564b824a64b78fee498ae5ac48e998 (diff)
downloadXNNPACK-edfac6301d77feb42f4f0562d02251fd02fe4825.tar.gz
Add test for FP16 rewrite when an external output happens to be an input to another node
This can happen when a subgraph is split because an operation is not supported by XNNPACK. The FP16 rewriting code does not account for this: When counting the number of external inputs, we assert that a node's input value which has a fp32_id is an external input, this is not always true, as it could be an external output PiperOrigin-RevId: 469766360
-rw-r--r--BUILD.bazel13
-rwxr-xr-xCMakeLists.txt5
-rw-r--r--src/subgraph.c27
-rw-r--r--src/xnnpack/subgraph.h2
-rw-r--r--test/subgraph-fp16.cc90
-rw-r--r--test/subgraph-nchw.cc8
-rw-r--r--test/subgraph-tester.h20
7 files changed, 148 insertions, 17 deletions
diff --git a/BUILD.bazel b/BUILD.bazel
index 35c7a89dc..ec2005453 100644
--- a/BUILD.bazel
+++ b/BUILD.bazel
@@ -15657,6 +15657,19 @@ xnnpack_unit_test(
)
xnnpack_unit_test(
+ name = "subgraph_fp16_test",
+ srcs = [
+ "test/subgraph-fp16.cc",
+ "test/subgraph-tester.h",
+ ],
+ deps = [
+ ":XNNPACK",
+ ":node_type",
+ ":subgraph_test_mode",
+ ],
+)
+
+xnnpack_unit_test(
name = "jit_test",
srcs = [
"test/jit.cc",
diff --git a/CMakeLists.txt b/CMakeLists.txt
index c7cfb7c4e..7ecc493cb 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -7655,6 +7655,11 @@ IF(XNNPACK_BUILD_TESTS)
TARGET_LINK_LIBRARIES(subgraph-nchw-test PRIVATE XNNPACK cache gtest gtest_main microparams_init logging subgraph operators)
ADD_TEST(NAME subgraph-nchw-test COMMAND subgraph-nchw-test)
+ ADD_EXECUTABLE(subgraph-fp16-test test/subgraph-fp16.cc)
+ TARGET_INCLUDE_DIRECTORIES(subgraph-fp16-test PRIVATE src test)
+ TARGET_LINK_LIBRARIES(subgraph-fp16-test PRIVATE XNNPACK cache gtest gtest_main microparams_init logging subgraph operators)
+ ADD_TEST(NAME subgraph-fp16-test COMMAND subgraph-fp16-test)
+
# ---[ Build subgraph-level unit tests
ADD_EXECUTABLE(workspace-test test/workspace.cc)
TARGET_INCLUDE_DIRECTORIES(workspace-test PRIVATE src test)
diff --git a/src/subgraph.c b/src/subgraph.c
index 029866c79..908132e73 100644
--- a/src/subgraph.c
+++ b/src/subgraph.c
@@ -887,8 +887,11 @@ bool xnn_subgraph_rewrite_for_fp16(xnn_subgraph_t subgraph)
assert(value->data == NULL);
assert(value->datatype == xnn_datatype_fp16);
assert(subgraph->values[value->fp32_id].datatype == xnn_datatype_fp32);
- assert(subgraph->values[value->fp32_id].flags & XNN_VALUE_FLAG_EXTERNAL_INPUT);
- num_external_inputs += 1;
+ // This value isn't always an external input, it could be an external output of the current subgraph (due to
+ // partition), and be simultaneously consumed by the current node.
+ if (subgraph->values[value->fp32_id].flags & XNN_VALUE_FLAG_EXTERNAL_INPUT) {
+ num_external_inputs += 1;
+ }
}
}
for (uint32_t o = 0; o < node->num_outputs; o++) {
@@ -935,14 +938,18 @@ bool xnn_subgraph_rewrite_for_fp16(xnn_subgraph_t subgraph)
for (uint32_t i = 0; i < node->num_inputs; i++) {
const struct xnn_value* value = &subgraph->values[node->inputs[i]];
if (value->fp32_id != XNN_INVALID_VALUE_ID && value->first_consumer == n - 1) {
- xnn_log_debug("Inserted FP32->FP16 Convert Node from tensor #%"PRIu32" to tensor #%"PRIu32,
- value->fp32_id, value->id);
- const uint32_t output_node_id = output_node->id;
- assert(output_node >= subgraph->nodes);
- xnn_node_clear(output_node);
- output_node->id = output_node_id;
- xnn_init_convert_node(output_node, xnn_compute_type_fp32_to_fp16, value->fp32_id, value->id, 0 /* flags */);
- output_node -= 1;
+ // Only insert convert nodes if the value actually is an external input. This value could be an external output,
+ // if that's the case, we have already inserted a convert node in loop above for outputs.
+ if (subgraph->values[value->fp32_id].flags & XNN_VALUE_FLAG_EXTERNAL_INPUT) {
+ xnn_log_debug("Inserted FP32->FP16 Convert Node from tensor #%"PRIu32" to tensor #%"PRIu32,
+ value->fp32_id, value->id);
+ const uint32_t output_node_id = output_node->id;
+ assert(output_node >= subgraph->nodes);
+ xnn_node_clear(output_node);
+ output_node->id = output_node_id;
+ xnn_init_convert_node(output_node, xnn_compute_type_fp32_to_fp16, value->fp32_id, value->id, 0 /* flags */);
+ output_node -= 1;
+ }
}
}
}
diff --git a/src/xnnpack/subgraph.h b/src/xnnpack/subgraph.h
index 7da92c812..a0ba588a6 100644
--- a/src/xnnpack/subgraph.h
+++ b/src/xnnpack/subgraph.h
@@ -373,6 +373,8 @@ size_t xnn_shape_multiply_non_channel_dims(
enum xnn_status xnn_subgraph_optimize(xnn_subgraph_t subgraph, uint32_t flags);
void xnn_subgraph_rewrite_for_nchw(xnn_subgraph_t subgraph);
+// Rewrites subgraph for FP16, returns true if success, false if rewrite failed.
+bool xnn_subgraph_rewrite_for_fp16(xnn_subgraph_t subgraph);
void xnn_node_clear(struct xnn_node* node);
void xnn_value_clear(struct xnn_value* value);
diff --git a/test/subgraph-fp16.cc b/test/subgraph-fp16.cc
new file mode 100644
index 000000000..88e587be8
--- /dev/null
+++ b/test/subgraph-fp16.cc
@@ -0,0 +1,90 @@
+// Copyright 2022 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#include <array>
+
+#include <xnnpack.h>
+#include <xnnpack/node-type.h>
+
+#include "subgraph-tester.h"
+#include <gtest/gtest.h>
+
+namespace xnnpack {
+
+TEST(SUBGRAPH_FP16, value_both_external_output_and_input) {
+ auto tester = SubgraphTester(4);
+ std::array<size_t, 4> pre_paddings = {0,1,0,0};
+ std::array<size_t, 4> post_paddings = {0,1,0,0};
+ // external input[0]
+ // /
+ // [constant pad]
+ // /
+ // external dynamic[1]
+ // output[2] /
+ // \ /
+ // [add]
+ // |
+ // external
+ // output[3]
+ tester
+ .AddInputTensorF32({1, 2, 2, 3}, 0)
+ .AddDynamicTensorF32({1, 1, 1, 3}, 1)
+ .AddOutputTensorF32({1, 4, 2, 3}, 2)
+ .AddOutputTensorF32({1, 4, 2, 3}, 3)
+ .AddConstantPad(pre_paddings.data(), post_paddings.data(), 0.0f, 0, 2)
+ .AddAddition(2, 1, 3)
+ .Optimize()
+ .RewriteForFp16();
+
+ // After rewriting for FP16, the graph should look like this, with * indicating new operators and values created:
+ //
+ // external input[0]
+ // |
+ // [convert]*
+ // |
+ // input[4]*
+ // /
+ // [constant pad]
+ // /
+ // fp16 value[5]*
+ // | \
+ // [convert]* \
+ // | \
+ // external \ dynamic[1] converted in-place
+ // output[2] \ /
+ // \ /
+ // [add]
+ // |
+ // fp16 value[6]*
+ // |
+ // [convert]*
+ // |
+ // external
+ // output[3]
+
+ // We should have 3 convert nodes, one for external input, 2 for external
+ // outputs, so 5 in total, including the pad and add in the original graph.
+ ASSERT_EQ(tester.NumNodes(), 5);
+
+ const xnn_node* output_convert_node = tester.Node(4);
+ ASSERT_EQ(output_convert_node->type, xnn_node_type_convert);
+ ASSERT_EQ(output_convert_node->compute_type, xnn_compute_type_fp16_to_fp32);
+
+ // Check that Addition node refers to the FP16 value before conversion.
+ const xnn_node* addition_node = tester.Node(3);
+ ASSERT_EQ(addition_node->type, xnn_node_type_add2);
+ ASSERT_EQ(addition_node->inputs[0], 5);
+ ASSERT_EQ(addition_node->inputs[1], 1);
+ ASSERT_EQ(tester.Value(5)->datatype, xnn_datatype_fp16);
+ ASSERT_EQ(tester.Value(1)->datatype, xnn_datatype_fp16);
+
+ ASSERT_EQ(tester.Node(2)->type, xnn_node_type_convert);
+ ASSERT_EQ(tester.Node(2)->compute_type, xnn_compute_type_fp16_to_fp32);
+ ASSERT_EQ(tester.Node(1)->type, xnn_node_type_static_constant_pad);
+ ASSERT_EQ(tester.Node(0)->type, xnn_node_type_convert);
+ ASSERT_EQ(tester.Node(0)->compute_type, xnn_compute_type_fp32_to_fp16);
+}
+
+} // namespace xnnpack
diff --git a/test/subgraph-nchw.cc b/test/subgraph-nchw.cc
index c0e9d600c..04fd3c492 100644
--- a/test/subgraph-nchw.cc
+++ b/test/subgraph-nchw.cc
@@ -28,7 +28,7 @@ TEST(SUBGRAPH_NCHW, single_conv) {
/*group_output_channels=*/ 32,
}, 0, 1, 2, 3)
.Optimize()
- .Rewrite();
+ .RewriteForNchw();
ASSERT_EQ(tester.GetLayout(0), xnn_layout_type_nhwc);
ASSERT_EQ(tester.GetLayout(3), xnn_layout_type_nhwc);
@@ -54,7 +54,7 @@ TEST(SUBGRAPH_NCHW, single_conv_and_global_average_pooling) {
}, 0, 1, 2, 3)
.AddGlobalAveragePooling(3, 4)
.Optimize()
- .Rewrite();
+ .RewriteForNchw();
ASSERT_EQ(tester.GetLayout(0), xnn_layout_type_nhwc);
ASSERT_EQ(tester.GetLayout(3), xnn_layout_type_nhwc);
@@ -94,7 +94,7 @@ TEST(SUBGRAPH_NCHW, pixelwise_conv_sandwich) {
}, 3, 4, 5, 6)
.AddGlobalAveragePooling(6, 7)
.Optimize()
- .Rewrite();
+ .RewriteForNchw();
ASSERT_EQ(tester.GetLayout(0), xnn_layout_type_nhwc);
ASSERT_EQ(tester.GetLayout(3), xnn_layout_type_nchw);
@@ -163,7 +163,7 @@ TEST(SUBGRAPH_NCHW, bottleneck) {
.AddAddition(3, 12, 13)
.AddGlobalAveragePooling(13, 14)
.Optimize()
- .Rewrite();
+ .RewriteForNchw();
ASSERT_EQ(tester.GetLayout(0), xnn_layout_type_nhwc);
ASSERT_EQ(tester.GetLayout(3), xnn_layout_type_nchw);
diff --git a/test/subgraph-tester.h b/test/subgraph-tester.h
index 30b58300c..fd525c114 100644
--- a/test/subgraph-tester.h
+++ b/test/subgraph-tester.h
@@ -349,18 +349,32 @@ class SubgraphTester {
return *this;
}
- inline SubgraphTester& Rewrite() {
+ inline SubgraphTester& RewriteForNchw() {
xnn_subgraph_rewrite_for_nchw(subgraph_.get());
return *this;
}
+ inline SubgraphTester& RewriteForFp16() {
+ EXPECT_TRUE(xnn_subgraph_rewrite_for_fp16(subgraph_.get()));
+
+ return *this;
+ }
+
inline xnn_layout_type GetLayout(uint32_t value_id) const {
return subgraph_->values[value_id].layout;
}
- inline const xnn_node* const Node(uint32_t node_index) const {
- return &subgraph_->nodes[node_index];
+ inline const xnn_value* const Value(uint32_t value_id) const {
+ return &subgraph_->values[value_id];
+ }
+
+ inline const xnn_node* const Node(uint32_t node_id) const {
+ return &subgraph_->nodes[node_id];
+ }
+
+ inline size_t NumNodes() const {
+ return subgraph_->num_nodes;
}
protected: