diff options
author | Zhi An Ng <zhin@google.com> | 2022-08-24 10:38:43 -0700 |
---|---|---|
committer | XNNPACK Team <xnnpack-github-robot@google.com> | 2022-08-24 10:39:49 -0700 |
commit | edfac6301d77feb42f4f0562d02251fd02fe4825 (patch) | |
tree | 1a4457f746c24a22226e4a47fbef3b4a98679dc4 | |
parent | a066c3191d564b824a64b78fee498ae5ac48e998 (diff) | |
download | XNNPACK-edfac6301d77feb42f4f0562d02251fd02fe4825.tar.gz |
Add test for FP16 rewrite when an external output happens to be an input to another node
This can happen when a subgraph is split because an operation is not supported by XNNPACK. The FP16 rewriting code does not account for this:
When counting the number of external inputs, we assert that a node's input value which has a fp32_id is an external input, this is not always true, as it could be an external output
PiperOrigin-RevId: 469766360
-rw-r--r-- | BUILD.bazel | 13 | ||||
-rwxr-xr-x | CMakeLists.txt | 5 | ||||
-rw-r--r-- | src/subgraph.c | 27 | ||||
-rw-r--r-- | src/xnnpack/subgraph.h | 2 | ||||
-rw-r--r-- | test/subgraph-fp16.cc | 90 | ||||
-rw-r--r-- | test/subgraph-nchw.cc | 8 | ||||
-rw-r--r-- | test/subgraph-tester.h | 20 |
7 files changed, 148 insertions, 17 deletions
diff --git a/BUILD.bazel b/BUILD.bazel index 35c7a89dc..ec2005453 100644 --- a/BUILD.bazel +++ b/BUILD.bazel @@ -15657,6 +15657,19 @@ xnnpack_unit_test( ) xnnpack_unit_test( + name = "subgraph_fp16_test", + srcs = [ + "test/subgraph-fp16.cc", + "test/subgraph-tester.h", + ], + deps = [ + ":XNNPACK", + ":node_type", + ":subgraph_test_mode", + ], +) + +xnnpack_unit_test( name = "jit_test", srcs = [ "test/jit.cc", diff --git a/CMakeLists.txt b/CMakeLists.txt index c7cfb7c4e..7ecc493cb 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -7655,6 +7655,11 @@ IF(XNNPACK_BUILD_TESTS) TARGET_LINK_LIBRARIES(subgraph-nchw-test PRIVATE XNNPACK cache gtest gtest_main microparams_init logging subgraph operators) ADD_TEST(NAME subgraph-nchw-test COMMAND subgraph-nchw-test) + ADD_EXECUTABLE(subgraph-fp16-test test/subgraph-fp16.cc) + TARGET_INCLUDE_DIRECTORIES(subgraph-fp16-test PRIVATE src test) + TARGET_LINK_LIBRARIES(subgraph-fp16-test PRIVATE XNNPACK cache gtest gtest_main microparams_init logging subgraph operators) + ADD_TEST(NAME subgraph-fp16-test COMMAND subgraph-fp16-test) + # ---[ Build subgraph-level unit tests ADD_EXECUTABLE(workspace-test test/workspace.cc) TARGET_INCLUDE_DIRECTORIES(workspace-test PRIVATE src test) diff --git a/src/subgraph.c b/src/subgraph.c index 029866c79..908132e73 100644 --- a/src/subgraph.c +++ b/src/subgraph.c @@ -887,8 +887,11 @@ bool xnn_subgraph_rewrite_for_fp16(xnn_subgraph_t subgraph) assert(value->data == NULL); assert(value->datatype == xnn_datatype_fp16); assert(subgraph->values[value->fp32_id].datatype == xnn_datatype_fp32); - assert(subgraph->values[value->fp32_id].flags & XNN_VALUE_FLAG_EXTERNAL_INPUT); - num_external_inputs += 1; + // This value isn't always an external input, it could be an external output of the current subgraph (due to + // partition), and be simultaneously consumed by the current node. + if (subgraph->values[value->fp32_id].flags & XNN_VALUE_FLAG_EXTERNAL_INPUT) { + num_external_inputs += 1; + } } } for (uint32_t o = 0; o < node->num_outputs; o++) { @@ -935,14 +938,18 @@ bool xnn_subgraph_rewrite_for_fp16(xnn_subgraph_t subgraph) for (uint32_t i = 0; i < node->num_inputs; i++) { const struct xnn_value* value = &subgraph->values[node->inputs[i]]; if (value->fp32_id != XNN_INVALID_VALUE_ID && value->first_consumer == n - 1) { - xnn_log_debug("Inserted FP32->FP16 Convert Node from tensor #%"PRIu32" to tensor #%"PRIu32, - value->fp32_id, value->id); - const uint32_t output_node_id = output_node->id; - assert(output_node >= subgraph->nodes); - xnn_node_clear(output_node); - output_node->id = output_node_id; - xnn_init_convert_node(output_node, xnn_compute_type_fp32_to_fp16, value->fp32_id, value->id, 0 /* flags */); - output_node -= 1; + // Only insert convert nodes if the value actually is an external input. This value could be an external output, + // if that's the case, we have already inserted a convert node in loop above for outputs. + if (subgraph->values[value->fp32_id].flags & XNN_VALUE_FLAG_EXTERNAL_INPUT) { + xnn_log_debug("Inserted FP32->FP16 Convert Node from tensor #%"PRIu32" to tensor #%"PRIu32, + value->fp32_id, value->id); + const uint32_t output_node_id = output_node->id; + assert(output_node >= subgraph->nodes); + xnn_node_clear(output_node); + output_node->id = output_node_id; + xnn_init_convert_node(output_node, xnn_compute_type_fp32_to_fp16, value->fp32_id, value->id, 0 /* flags */); + output_node -= 1; + } } } } diff --git a/src/xnnpack/subgraph.h b/src/xnnpack/subgraph.h index 7da92c812..a0ba588a6 100644 --- a/src/xnnpack/subgraph.h +++ b/src/xnnpack/subgraph.h @@ -373,6 +373,8 @@ size_t xnn_shape_multiply_non_channel_dims( enum xnn_status xnn_subgraph_optimize(xnn_subgraph_t subgraph, uint32_t flags); void xnn_subgraph_rewrite_for_nchw(xnn_subgraph_t subgraph); +// Rewrites subgraph for FP16, returns true if success, false if rewrite failed. +bool xnn_subgraph_rewrite_for_fp16(xnn_subgraph_t subgraph); void xnn_node_clear(struct xnn_node* node); void xnn_value_clear(struct xnn_value* value); diff --git a/test/subgraph-fp16.cc b/test/subgraph-fp16.cc new file mode 100644 index 000000000..88e587be8 --- /dev/null +++ b/test/subgraph-fp16.cc @@ -0,0 +1,90 @@ +// Copyright 2022 Google LLC +// +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. + +#include <array> + +#include <xnnpack.h> +#include <xnnpack/node-type.h> + +#include "subgraph-tester.h" +#include <gtest/gtest.h> + +namespace xnnpack { + +TEST(SUBGRAPH_FP16, value_both_external_output_and_input) { + auto tester = SubgraphTester(4); + std::array<size_t, 4> pre_paddings = {0,1,0,0}; + std::array<size_t, 4> post_paddings = {0,1,0,0}; + // external input[0] + // / + // [constant pad] + // / + // external dynamic[1] + // output[2] / + // \ / + // [add] + // | + // external + // output[3] + tester + .AddInputTensorF32({1, 2, 2, 3}, 0) + .AddDynamicTensorF32({1, 1, 1, 3}, 1) + .AddOutputTensorF32({1, 4, 2, 3}, 2) + .AddOutputTensorF32({1, 4, 2, 3}, 3) + .AddConstantPad(pre_paddings.data(), post_paddings.data(), 0.0f, 0, 2) + .AddAddition(2, 1, 3) + .Optimize() + .RewriteForFp16(); + + // After rewriting for FP16, the graph should look like this, with * indicating new operators and values created: + // + // external input[0] + // | + // [convert]* + // | + // input[4]* + // / + // [constant pad] + // / + // fp16 value[5]* + // | \ + // [convert]* \ + // | \ + // external \ dynamic[1] converted in-place + // output[2] \ / + // \ / + // [add] + // | + // fp16 value[6]* + // | + // [convert]* + // | + // external + // output[3] + + // We should have 3 convert nodes, one for external input, 2 for external + // outputs, so 5 in total, including the pad and add in the original graph. + ASSERT_EQ(tester.NumNodes(), 5); + + const xnn_node* output_convert_node = tester.Node(4); + ASSERT_EQ(output_convert_node->type, xnn_node_type_convert); + ASSERT_EQ(output_convert_node->compute_type, xnn_compute_type_fp16_to_fp32); + + // Check that Addition node refers to the FP16 value before conversion. + const xnn_node* addition_node = tester.Node(3); + ASSERT_EQ(addition_node->type, xnn_node_type_add2); + ASSERT_EQ(addition_node->inputs[0], 5); + ASSERT_EQ(addition_node->inputs[1], 1); + ASSERT_EQ(tester.Value(5)->datatype, xnn_datatype_fp16); + ASSERT_EQ(tester.Value(1)->datatype, xnn_datatype_fp16); + + ASSERT_EQ(tester.Node(2)->type, xnn_node_type_convert); + ASSERT_EQ(tester.Node(2)->compute_type, xnn_compute_type_fp16_to_fp32); + ASSERT_EQ(tester.Node(1)->type, xnn_node_type_static_constant_pad); + ASSERT_EQ(tester.Node(0)->type, xnn_node_type_convert); + ASSERT_EQ(tester.Node(0)->compute_type, xnn_compute_type_fp32_to_fp16); +} + +} // namespace xnnpack diff --git a/test/subgraph-nchw.cc b/test/subgraph-nchw.cc index c0e9d600c..04fd3c492 100644 --- a/test/subgraph-nchw.cc +++ b/test/subgraph-nchw.cc @@ -28,7 +28,7 @@ TEST(SUBGRAPH_NCHW, single_conv) { /*group_output_channels=*/ 32, }, 0, 1, 2, 3) .Optimize() - .Rewrite(); + .RewriteForNchw(); ASSERT_EQ(tester.GetLayout(0), xnn_layout_type_nhwc); ASSERT_EQ(tester.GetLayout(3), xnn_layout_type_nhwc); @@ -54,7 +54,7 @@ TEST(SUBGRAPH_NCHW, single_conv_and_global_average_pooling) { }, 0, 1, 2, 3) .AddGlobalAveragePooling(3, 4) .Optimize() - .Rewrite(); + .RewriteForNchw(); ASSERT_EQ(tester.GetLayout(0), xnn_layout_type_nhwc); ASSERT_EQ(tester.GetLayout(3), xnn_layout_type_nhwc); @@ -94,7 +94,7 @@ TEST(SUBGRAPH_NCHW, pixelwise_conv_sandwich) { }, 3, 4, 5, 6) .AddGlobalAveragePooling(6, 7) .Optimize() - .Rewrite(); + .RewriteForNchw(); ASSERT_EQ(tester.GetLayout(0), xnn_layout_type_nhwc); ASSERT_EQ(tester.GetLayout(3), xnn_layout_type_nchw); @@ -163,7 +163,7 @@ TEST(SUBGRAPH_NCHW, bottleneck) { .AddAddition(3, 12, 13) .AddGlobalAveragePooling(13, 14) .Optimize() - .Rewrite(); + .RewriteForNchw(); ASSERT_EQ(tester.GetLayout(0), xnn_layout_type_nhwc); ASSERT_EQ(tester.GetLayout(3), xnn_layout_type_nchw); diff --git a/test/subgraph-tester.h b/test/subgraph-tester.h index 30b58300c..fd525c114 100644 --- a/test/subgraph-tester.h +++ b/test/subgraph-tester.h @@ -349,18 +349,32 @@ class SubgraphTester { return *this; } - inline SubgraphTester& Rewrite() { + inline SubgraphTester& RewriteForNchw() { xnn_subgraph_rewrite_for_nchw(subgraph_.get()); return *this; } + inline SubgraphTester& RewriteForFp16() { + EXPECT_TRUE(xnn_subgraph_rewrite_for_fp16(subgraph_.get())); + + return *this; + } + inline xnn_layout_type GetLayout(uint32_t value_id) const { return subgraph_->values[value_id].layout; } - inline const xnn_node* const Node(uint32_t node_index) const { - return &subgraph_->nodes[node_index]; + inline const xnn_value* const Value(uint32_t value_id) const { + return &subgraph_->values[value_id]; + } + + inline const xnn_node* const Node(uint32_t node_id) const { + return &subgraph_->nodes[node_id]; + } + + inline size_t NumNodes() const { + return subgraph_->num_nodes; } protected: |