aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAntonio Sanchez <cantonios@google.com>2022-02-03 09:47:17 -0800
committerXNNPACK Team <xnnpack-github-robot@google.com>2022-02-03 09:49:25 -0800
commit9a365d0da32b5401718b34830da47c7658fda510 (patch)
treebef2538c37ce0eb98efc47d790fce03ecf65f9d4
parentf0f374f6122fa7271f643fd741d33132d9159166 (diff)
downloadXNNPACK-9a365d0da32b5401718b34830da47c7658fda510.tar.gz
Revert "Graph rewriting for FP16 inference"
PiperOrigin-RevId: 426169617
-rw-r--r--include/xnnpack.h11
-rw-r--r--src/subgraph.c304
-rw-r--r--src/subgraph/add2.c20
-rw-r--r--src/subgraph/convert.c66
-rw-r--r--src/subgraph/convolution-2d.c38
-rw-r--r--src/subgraph/depthwise-convolution-2d.c38
-rw-r--r--src/subgraph/global-average-pooling-2d.c21
-rw-r--r--src/subgraph/hardswish.c49
-rw-r--r--src/xnnpack/subgraph.h22
9 files changed, 51 insertions, 518 deletions
diff --git a/include/xnnpack.h b/include/xnnpack.h
index 3f7b3a2d3..4f5032f52 100644
--- a/include/xnnpack.h
+++ b/include/xnnpack.h
@@ -32,11 +32,6 @@ extern "C" {
/// Note: this flag forces XNNPACK to consider sparse inference, but does not guarantee it.
#define XNN_FLAG_SPARSE_INFERENCE 0x00000001
-/// Allow IEEE FP16 inference in a Runtime.
-///
-/// Note: this flag forces XNNPACK to consider IEEE FP16 inference, but does not guarantee it.
-#define XNN_FLAG_FP16_INFERENCE 0x00000001
-
/// The convolution operator represents a depthwise convolution, and use HWGo layout for filters.
#define XNN_FLAG_DEPTHWISE_CONVOLUTION 0x00000001
@@ -1138,9 +1133,9 @@ typedef struct xnn_runtime* xnn_runtime_t;
/// Nodes can be added to the runtime once it is constructed.
/// @param threadpool - the thread pool to be used for parallelisation of computations in the runtime. If the thread
/// pool is NULL, the computation would run on the caller thread without parallelization.
-/// @param flags - binary features of the runtime. The only currently supported values are XNN_FLAG_SPARSE_INFERENCE,
-/// XNN_FLAG_FP16_INFERENCE, and XNN_FLAG_YIELD_WORKERS. If XNN_FLAG_YIELD_WORKERS is specified, worker
-/// threads would be yielded to the system scheduler after processing the last operator in the Runtime.
+/// @param flags - binary features of the runtime. The only currently supported values are XNN_FLAG_SPARSE_INFERENCE
+/// and XNN_FLAG_YIELD_WORKERS. If XNN_FLAG_YIELD_WORKERS is specified, worker threads would be yielded
+/// to the system scheduler after processing the last operator in the Runtime.
/// @param runtime_out - pointer to the variable that will be initialized with a handle to the Runtime object upon
/// successful return. Once constructed, the Runtime object is independent of the Subgraph object
/// used to create it.
diff --git a/src/subgraph.c b/src/subgraph.c
index 9ed9d271f..2946d98dd 100644
--- a/src/subgraph.c
+++ b/src/subgraph.c
@@ -86,30 +86,16 @@ struct xnn_value* xnn_subgraph_new_internal_value(xnn_subgraph_t subgraph)
void xnn_node_clear(struct xnn_node* node) {
assert(node != NULL);
+ assert(node->type != xnn_node_type_invalid);
memset(node, 0, sizeof(struct xnn_node));
}
void xnn_value_clear(struct xnn_value* value) {
assert(value != NULL);
+ assert(value->type != xnn_value_type_invalid);
memset(value, 0, sizeof(struct xnn_value));
}
-void xnn_value_copy(
- struct xnn_value* dst_value,
- const struct xnn_value* src_value)
-{
- // Note: Value ID stays unchanged
-
- dst_value->type = src_value->type;
- dst_value->datatype = src_value->datatype;
- dst_value->quantization = src_value->quantization;
- dst_value->shape = src_value->shape;
- dst_value->flags = src_value->flags;
- dst_value->data = src_value->data;
- dst_value->producer = src_value->producer;
- dst_value->first_consumer = src_value->first_consumer;
-}
-
struct xnn_node* xnn_subgraph_new_node(xnn_subgraph_t subgraph)
{
struct xnn_node* nodes = subgraph->nodes;
@@ -136,76 +122,6 @@ struct xnn_node* xnn_subgraph_new_node(xnn_subgraph_t subgraph)
return new_node;
}
-void xnn_subgraph_add_nodes(xnn_subgraph_t subgraph, size_t num_nodes)
-{
- struct xnn_node* nodes = subgraph->nodes;
- const size_t size = subgraph->num_nodes;
- const size_t capacity = subgraph->num_reserved_nodes;
-
- if (capacity < size + num_nodes) {
- const size_t new_capacity = max(min(capacity * 2, capacity + 512), capacity + max(num_nodes, 64));
- assert(new_capacity >= size + num_nodes);
- nodes = xnn_reallocate_memory(nodes, new_capacity * sizeof(struct xnn_node));
- if (nodes == NULL) {
- xnn_log_error("failed to allocate %zu bytes for subgraph nodes",
- capacity * sizeof(struct xnn_node));
- return;
- }
-
- memset(nodes + size, 0, (new_capacity - size) * sizeof(struct xnn_node));
- subgraph->num_reserved_nodes = new_capacity;
- subgraph->nodes = nodes;
- }
- subgraph->num_nodes = size + num_nodes;
- struct xnn_node* new_nodes = nodes + size;
- for (size_t i = 0; i < num_nodes; i++) {
- new_nodes[i].id = size + i;
- }
-}
-
-void xnn_subgraph_analyze_consumers_and_producers(xnn_subgraph_t subgraph)
-{
- // Initialize producer/consumer fields to safe defaults.
- for (uint32_t i = 0; i < subgraph->num_values; i++) {
- struct xnn_value* value = &subgraph->values[i];
- value->producer = XNN_INVALID_NODE_ID;
- value->first_consumer = XNN_INVALID_NODE_ID;
- value->num_consumers = 0;
- }
-
- // Analyse Nodes' inputs and output and update Values' producer/consumer fields
- for (uint32_t n = 0; n < subgraph->num_nodes; n++) {
- struct xnn_node* node = &subgraph->nodes[n];
-
- for (uint32_t i = 0; i < node->num_inputs; i++) {
- const uint32_t input_id = node->inputs[i];
- assert(input_id < subgraph->num_values);
-
- if (subgraph->values[input_id].num_consumers++ == 0) {
- assert(subgraph->values[input_id].first_consumer == XNN_INVALID_NODE_ID);
- subgraph->values[input_id].first_consumer = n;
- }
- }
-
- for (uint32_t o = 0; o < node->num_outputs; o++) {
- const uint32_t output_id = node->outputs[o];
- assert(output_id < subgraph->num_values);
-
- assert(subgraph->values[output_id].producer == XNN_INVALID_NODE_ID);
- subgraph->values[output_id].producer = n;
- }
- }
-
- // Count extra consumer for Values which are external outputs.
- // Remove unreferenced values.
- for (uint32_t i = 0; i < subgraph->num_values; i++) {
- struct xnn_value* value = &subgraph->values[i];
- if (value->flags & XNN_VALUE_FLAG_EXTERNAL_OUTPUT) {
- value->num_consumers += 1;
- }
- }
-}
-
#define XNN_LAYOUT_FLAG_COMPATIBLE_NCHW 1
#define XNN_LAYOUT_FLAG_COMPATIBLE_NHWC2NCHW 2
#define XNN_LAYOUT_FLAG_COMPATIBLE_NCHW2NHWC 4
@@ -579,205 +495,42 @@ void xnn_subgraph_rewrite_for_nchw(xnn_subgraph_t subgraph)
}
}
-void xnn_subgraph_rewrite_for_fp16(xnn_subgraph_t subgraph)
+enum xnn_status xnn_subgraph_optimize(
+ xnn_subgraph_t subgraph,
+ uint32_t flags)
{
- xnn_log_info("Analyzing subgraph for FP16 compatibility");
-
- // Convert tensors and operators in the subgraph to FP16
- // 1. Check that all operators in the subgraph are supported in FP16.
- // 2. Indicate values that must be converted to FP16.
- // 3. Replace FP32 Values with FP16 Values as Nodes' inputs/outputs.
- // 4. Insert FP32->FP16 Convert Nodes for external FP32 inputs and FP16->FP32 Convert Nodes for external outputs.
-
- // Check that all operators in the subgraph are supported in FP16, bail out on any unsupported one.
- for (uint32_t n = 0; n < subgraph->num_nodes; n++) {
- struct xnn_node* node = &subgraph->nodes[n];
- if (node->compute_type != xnn_compute_type_fp32) {
- xnn_log_info("FP16 rewrite aborted: node #%" PRIu32 " (%s) is not FP32", n, xnn_node_type_to_string(node->type));
- return;
- }
- switch (node->type) {
- case xnn_node_type_add2:
- assert(node->num_inputs == 2);
- for (uint32_t i = 0; i < node->num_inputs; i++) {
- if (subgraph->values[node->inputs[i]].data != NULL) {
- xnn_log_info("FP16 rewrite aborted: node #%" PRIu32 " (%s) has static input %i",
- n, xnn_node_type_to_string(node->type), i);
- return;
- }
- }
- break;
- case xnn_node_type_convolution_2d:
- case xnn_node_type_depthwise_convolution_2d:
- case xnn_node_type_global_average_pooling_2d:
- case xnn_node_type_hardswish:
- break;
- default:
- xnn_log_info("FP16 rewrite aborted: node #%" PRIu32 " (%s) is not supported for FP16 inference",
- n, xnn_node_type_to_string(node->type));
- return;
- }
+ // Initialize producer/consumer fields to safe defaults.
+ for (uint32_t i = 0; i < subgraph->num_values; i++) {
+ struct xnn_value* value = &subgraph->values[i];
+ value->producer = XNN_INVALID_NODE_ID;
+ value->first_consumer = XNN_INVALID_NODE_ID;
+ value->num_consumers = 0;
}
- // Annotate Values to be converted to FP16 as FP16-compatible.
- // Note that static weights in [Depthwise] Convolution & Fully Connected Nodes remain FP32,
- // they will be converted to FP16 during weight repacking when the operator is created.
+ // Analyse Nodes' inputs and output and update Values' producer/consumer fields
for (uint32_t n = 0; n < subgraph->num_nodes; n++) {
struct xnn_node* node = &subgraph->nodes[n];
- switch (node->type) {
- case xnn_node_type_convolution_2d:
- case xnn_node_type_depthwise_convolution_2d:
- subgraph->values[node->inputs[0]].fp16_compatible = true;
- subgraph->values[node->outputs[0]].fp16_compatible = true;
- break;
- default:
- for (uint32_t i = 0; i < node->num_inputs; i++) {
- subgraph->values[node->inputs[i]].fp16_compatible = true;
- }
- for (uint32_t o = 0; o < node->num_outputs; o++) {
- subgraph->values[node->outputs[o]].fp16_compatible = true;
- }
- break;
- }
- }
- // Replace FP32 Values in Nodes' inputs/outputs with FP16 Values.
- // FP32 Values that are not external inputs or outputs are converted to FP16 in-place,
- // for external inputs and outputs we create same-shaped FP16 Values and use those instead.
- const uint32_t num_original_values = subgraph->num_values;
- xnn_subgraph_analyze_consumers_and_producers(subgraph);
- for (uint32_t n = 0; n < num_original_values; n++) {
- struct xnn_value* value = &subgraph->values[n];
- value->fp16_id = XNN_INVALID_VALUE_ID;
- value->fp32_id = XNN_INVALID_VALUE_ID;
- if (value->fp16_compatible) {
- assert(value->data == NULL);
- assert(value->datatype == xnn_datatype_fp32);
- if ((value->flags & (XNN_VALUE_FLAG_EXTERNAL_INPUT | XNN_VALUE_FLAG_EXTERNAL_OUTPUT)) != 0) {
- struct xnn_value* fp16_value = xnn_subgraph_new_internal_value(subgraph);
-
- // Recompute value due to potential reallocation in xnn_subgraph_new_internal_value
- value = &subgraph->values[n];
- xnn_value_copy(fp16_value, value);
- fp16_value->datatype = xnn_datatype_fp16;
-
- fp16_value->producer = value->producer;
- fp16_value->num_consumers = value->num_consumers;
- fp16_value->first_consumer = value->first_consumer;
- value->producer = XNN_INVALID_NODE_ID;
- value->num_consumers = 0;
- value->first_consumer = XNN_INVALID_NODE_ID;
-
- // Clear external input/output flags
- fp16_value->flags = 0;
- xnn_log_debug("FP16 rewrite: created FP16 tensor #%" PRIu32 " for FP32 tensor #%" PRIu32, fp16_value->id, n);
-
- value->fp16_id = fp16_value->id;
- fp16_value->fp32_id = n;
- } else {
- xnn_log_debug("FP16 rewrite: converted FP32 tensor #%" PRIu32 " to FP16", n);
- value->datatype = xnn_datatype_fp16;
- }
- }
- }
- for (uint32_t n = 0; n < subgraph->num_nodes; n++) {
- struct xnn_node* node = &subgraph->nodes[n];
- assert(node->compute_type == xnn_compute_type_fp32);
- node->compute_type = xnn_compute_type_fp16;
for (uint32_t i = 0; i < node->num_inputs; i++) {
- const uint32_t fp16_id = subgraph->values[node->inputs[i]].fp16_id;
- if (fp16_id != XNN_INVALID_VALUE_ID) {
- assert(subgraph->values[fp16_id].fp32_id == node->inputs[i]);
- node->inputs[i] = fp16_id;
- }
- }
- for (uint32_t o = 0; o < node->num_outputs; o++) {
- const uint32_t fp16_id = subgraph->values[node->outputs[o]].fp16_id;
- if (fp16_id != XNN_INVALID_VALUE_ID) {
- assert(subgraph->values[fp16_id].fp32_id == node->outputs[o]);
- node->outputs[o] = fp16_id;
- }
- }
- }
+ const uint32_t input_id = node->inputs[i];
+ assert(input_id < subgraph->num_values);
- // Count the number of external inputs and outputs which require Convert nodes
- uint32_t num_external_inputs = 0;
- uint32_t num_external_outputs = 0;
- for (uint32_t n = 0; n < subgraph->num_nodes; n++) {
- const struct xnn_node* node = &subgraph->nodes[n];
- for (uint32_t i = 0; i < node->num_inputs; i++) {
- const struct xnn_value* value = &subgraph->values[node->inputs[i]];
- if (value->fp32_id != XNN_INVALID_VALUE_ID && value->first_consumer == n) {
- assert(value->data == NULL);
- assert(value->datatype == xnn_datatype_fp16);
- assert(subgraph->values[value->fp32_id].datatype == xnn_datatype_fp32);
- assert(subgraph->values[value->fp32_id].flags & XNN_VALUE_FLAG_EXTERNAL_INPUT);
- num_external_inputs += 1;
- }
- }
- for (uint32_t o = 0; o < node->num_outputs; o++) {
- const struct xnn_value* value = &subgraph->values[node->outputs[o]];
- if (value->fp32_id != XNN_INVALID_VALUE_ID) {
- assert(value->datatype == xnn_datatype_fp16);
- assert(subgraph->values[value->fp32_id].datatype == xnn_datatype_fp32);
- assert(subgraph->values[value->fp32_id].flags & XNN_VALUE_FLAG_EXTERNAL_OUTPUT);
- num_external_outputs += 1;
+ if (subgraph->values[input_id].num_consumers++ == 0) {
+ assert(subgraph->values[input_id].first_consumer == XNN_INVALID_NODE_ID);
+ subgraph->values[input_id].first_consumer = n;
}
}
- }
- xnn_log_debug("Discovered %"PRIu32" external inputs and %"PRIu32" external outputs",
- num_external_inputs, num_external_outputs);
-
- const uint32_t num_original_nodes = subgraph->num_nodes;
- xnn_subgraph_add_nodes(subgraph, num_external_inputs + num_external_outputs);
- struct xnn_node* output_node = subgraph->nodes + subgraph->num_nodes - 1;
- for (uint32_t n = num_original_nodes; n != 0; n--) {
- const struct xnn_node* node = &subgraph->nodes[n - 1];
- // Insert Convert nodes for outputs
+
for (uint32_t o = 0; o < node->num_outputs; o++) {
- const struct xnn_value* value = &subgraph->values[node->outputs[o]];
- if (value->fp32_id != XNN_INVALID_VALUE_ID) {
- xnn_log_debug("Inserted FP16->FP32 Convert Node from tensor #%"PRIu32" to tensor #%"PRIu32,
- value->id, value->fp32_id);
- const uint32_t output_node_id = output_node->id;
- assert(output_node >= subgraph->nodes);
- xnn_node_clear(output_node);
- output_node->id = output_node_id;
- xnn_init_convert_node(output_node, xnn_compute_type_fp16_to_fp32, value->id, value->fp32_id, 0 /* flags */);
- output_node -= 1;
- }
- }
- // Move the Node to the new location
- if (output_node != node) {
- const uint32_t output_node_id = output_node->id;
- assert(output_node >= subgraph->nodes);
- memcpy(output_node, node, sizeof(struct xnn_node));
- output_node->id = output_node_id;
- output_node -= 1;
- }
- // Insert Convert nodes for inputs
- for (uint32_t i = 0; i < node->num_inputs; i++) {
- const struct xnn_value* value = &subgraph->values[node->inputs[i]];
- if (value->fp32_id != XNN_INVALID_VALUE_ID && value->first_consumer == n - 1) {
- xnn_log_debug("Inserted FP32->FP16 Convert Node from tensor #%"PRIu32" to tensor #%"PRIu32,
- value->fp32_id, value->id);
- const uint32_t output_node_id = output_node->id;
- assert(output_node >= subgraph->nodes);
- xnn_node_clear(output_node);
- output_node->id = output_node_id;
- xnn_init_convert_node(output_node, xnn_compute_type_fp32_to_fp16, value->fp32_id, value->id, 0 /* flags */);
- output_node -= 1;
- }
+ const uint32_t output_id = node->outputs[o];
+ assert(output_id < subgraph->num_values);
+
+ assert(subgraph->values[output_id].producer == XNN_INVALID_NODE_ID);
+ subgraph->values[output_id].producer = n;
}
}
-}
-
-enum xnn_status xnn_subgraph_optimize(
- xnn_subgraph_t subgraph,
- uint32_t flags)
-{
- xnn_subgraph_analyze_consumers_and_producers(subgraph);
+ // Count extra consumer for Values which are external outputs.
// Remove unreferenced values.
for (uint32_t i = 0; i < subgraph->num_values; i++) {
struct xnn_value* value = &subgraph->values[i];
@@ -785,6 +538,9 @@ enum xnn_status xnn_subgraph_optimize(
continue;
}
+ if (value->flags & XNN_VALUE_FLAG_EXTERNAL_OUTPUT) {
+ value->num_consumers += 1;
+ }
if ((value->flags & XNN_VALUE_FLAG_EXTERNAL_INPUT) == 0 && value->num_consumers == 0) {
xnn_value_clear(value);
}
@@ -926,12 +682,6 @@ enum xnn_status xnn_subgraph_optimize(
}
#endif
- #ifndef XNN_NO_F16_OPERATORS
- if ((flags & XNN_FLAG_FP16_INFERENCE) && (xnn_params.init_flags & XNN_INIT_FLAG_F16)) {
- xnn_subgraph_rewrite_for_fp16(subgraph);
- }
- #endif // XNN_NO_F16_OPERATORS
-
return xnn_status_success;
}
diff --git a/src/subgraph/add2.c b/src/subgraph/add2.c
index dc3d2a338..493a74be5 100644
--- a/src/subgraph/add2.c
+++ b/src/subgraph/add2.c
@@ -42,15 +42,6 @@ static enum xnn_status create_add_operator(
node->flags,
&opdata->operator_object);
break;
-#ifndef XNN_NO_F16_OPERATORS
- case xnn_compute_type_fp16:
- status = xnn_create_add_nd_f16(
- node->activation.output_min,
- node->activation.output_max,
- node->flags,
- &opdata->operator_object);
- break;
-#endif // !defined(XNN_NO_F16_OPERATORS)
#ifndef XNN_NO_QS8_OPERATORS
case xnn_compute_type_qs8:
{
@@ -164,17 +155,6 @@ static enum xnn_status setup_add_operator(
opdata->shape2.dim,
input1_data, input2_data, output_data,
threadpool);
-#ifndef XNN_NO_F16_OPERATORS
- case xnn_operator_type_add_nd_f16:
- return xnn_setup_add_nd_f16(
- opdata->operator_object,
- opdata->shape1.num_dims,
- opdata->shape1.dim,
- opdata->shape2.num_dims,
- opdata->shape2.dim,
- input1_data, input2_data, output_data,
- threadpool);
-#endif // !defined(XNN_NO_F16_OPERATORS)
#ifndef XNN_NO_QS8_OPERATORS
case xnn_operator_type_add_nd_qs8:
return xnn_setup_add_nd_qs8(
diff --git a/src/subgraph/convert.c b/src/subgraph/convert.c
index 291e20278..5f58eab74 100644
--- a/src/subgraph/convert.c
+++ b/src/subgraph/convert.c
@@ -35,12 +35,6 @@ static enum xnn_status create_convert_operator(
enum xnn_status status = xnn_status_uninitialized;
switch (node->compute_type) {
- case xnn_compute_type_fp32_to_fp16:
- status = xnn_create_convert_nc_f32_f16(
- channel_dim /* channels */, channel_dim /* input stride */, channel_dim /* output stride */,
- node->flags,
- &opdata->operator_object);
- break;
case xnn_compute_type_fp32_to_qs8:
status = xnn_create_convert_nc_f32_qs8(
channel_dim /* channels */, channel_dim /* input stride */, channel_dim /* output stride */,
@@ -59,12 +53,6 @@ static enum xnn_status create_convert_operator(
node->flags,
&opdata->operator_object);
break;
- case xnn_compute_type_fp16_to_fp32:
- status = xnn_create_convert_nc_f16_f32(
- channel_dim /* channels */, channel_dim /* input stride */, channel_dim /* output stride */,
- node->flags,
- &opdata->operator_object);
- break;
case xnn_compute_type_qs8_to_fp32:
status = xnn_create_convert_nc_qs8_f32(
channel_dim /* channels */, channel_dim /* input stride */, channel_dim /* output stride */,
@@ -115,13 +103,6 @@ static enum xnn_status setup_convert_operator(
assert(output_data != NULL);
switch (opdata->operator_object->type) {
- case xnn_operator_type_convert_nc_f32_f16:
- return xnn_setup_convert_nc_f32_f16(
- opdata->operator_object,
- opdata->batch_size,
- input_data,
- output_data,
- threadpool);
case xnn_operator_type_convert_nc_f32_qs8:
return xnn_setup_convert_nc_f32_qs8(
opdata->operator_object,
@@ -136,13 +117,6 @@ static enum xnn_status setup_convert_operator(
input_data,
output_data,
threadpool);
- case xnn_operator_type_convert_nc_f16_f32:
- return xnn_setup_convert_nc_f16_f32(
- opdata->operator_object,
- opdata->batch_size,
- input_data,
- output_data,
- threadpool);
case xnn_operator_type_convert_nc_qs8_f32:
return xnn_setup_convert_nc_qs8_f32(
opdata->operator_object,
@@ -169,8 +143,6 @@ static inline enum xnn_compute_type validate_datatypes(
switch (input_datatype) {
case xnn_datatype_fp32:
switch (output_datatype) {
- case xnn_datatype_fp16:
- return xnn_compute_type_fp32_to_fp16;
case xnn_datatype_qint8:
return xnn_compute_type_fp32_to_qs8;
case xnn_datatype_quint8:
@@ -179,11 +151,6 @@ static inline enum xnn_compute_type validate_datatypes(
break;
}
break;
- case xnn_datatype_fp16:
- if (output_datatype == xnn_datatype_fp32) {
- return xnn_compute_type_fp16_to_fp32;
- }
- break;
case xnn_datatype_qint8:
if (output_datatype == xnn_datatype_fp32) {
return xnn_compute_type_qs8_to_fp32;
@@ -200,25 +167,6 @@ static inline enum xnn_compute_type validate_datatypes(
return xnn_compute_type_invalid;
}
-void xnn_init_convert_node(
- struct xnn_node* node,
- enum xnn_compute_type compute_type,
- uint32_t input_id,
- uint32_t output_id,
- uint32_t flags)
-{
- node->type = xnn_node_type_convert;
- node->compute_type = compute_type;
- node->num_inputs = 1;
- node->inputs[0] = input_id;
- node->num_outputs = 1;
- node->outputs[0] = output_id;
- node->flags = flags;
-
- node->create = create_convert_operator;
- node->setup = setup_convert_operator;
-}
-
enum xnn_status xnn_define_convert(
xnn_subgraph_t subgraph,
uint32_t input_id,
@@ -247,7 +195,6 @@ enum xnn_status xnn_define_convert(
}
switch (input_value->datatype) {
- case xnn_datatype_fp16:
case xnn_datatype_fp32:
case xnn_datatype_qint8:
case xnn_datatype_quint8:
@@ -276,7 +223,6 @@ enum xnn_status xnn_define_convert(
}
switch (output_value->datatype) {
- case xnn_datatype_fp16:
case xnn_datatype_fp32:
case xnn_datatype_qint8:
case xnn_datatype_quint8:
@@ -305,6 +251,16 @@ enum xnn_status xnn_define_convert(
return xnn_status_out_of_memory;
}
- xnn_init_convert_node(node, compute_type, input_id, output_id, flags);
+ node->type = xnn_node_type_convert;
+ node->compute_type = compute_type;
+ node->num_inputs = 1;
+ node->inputs[0] = input_id;
+ node->num_outputs = 1;
+ node->outputs[0] = output_id;
+ node->flags = flags;
+
+ node->create = create_convert_operator;
+ node->setup = setup_convert_operator;
+
return xnn_status_success;
}
diff --git a/src/subgraph/convolution-2d.c b/src/subgraph/convolution-2d.c
index a5ea509ed..0225a9368 100644
--- a/src/subgraph/convolution-2d.c
+++ b/src/subgraph/convolution-2d.c
@@ -99,32 +99,6 @@ static enum xnn_status create_convolution_operator(
node->flags,
&opdata->operator_object);
break;
-#ifndef XNN_NO_F16_OPERATORS
- case xnn_compute_type_fp16:
- status = xnn_create_convolution2d_nhwc_f16(
- node->params.convolution_2d.input_padding_top,
- node->params.convolution_2d.input_padding_right,
- node->params.convolution_2d.input_padding_bottom,
- node->params.convolution_2d.input_padding_left,
- node->params.convolution_2d.kernel_height,
- node->params.convolution_2d.kernel_width,
- node->params.convolution_2d.subsampling_height,
- node->params.convolution_2d.subsampling_width,
- node->params.convolution_2d.dilation_height,
- node->params.convolution_2d.dilation_width,
- node->params.convolution_2d.groups,
- node->params.convolution_2d.group_input_channels,
- node->params.convolution_2d.group_output_channels,
- node->params.convolution_2d.group_input_channels * node->params.convolution_2d.groups /* input_pixel_stride */,
- node->params.convolution_2d.group_output_channels * node->params.convolution_2d.groups /* output_pixel_stride */,
- filter_data,
- bias_data,
- node->activation.output_min,
- node->activation.output_max,
- node->flags | XNN_FLAG_FP32_STATIC_WEIGHTS,
- &opdata->operator_object);
- break;
-#endif // XNN_NO_F16_OPERATORS
#ifndef XNN_NO_QS8_OPERATORS
case xnn_compute_type_qs8:
{
@@ -292,18 +266,6 @@ static enum xnn_status setup_convolution_operator(
output_data,
threadpool);
break;
-#ifndef XNN_NO_F16_OPERATORS
- case xnn_operator_type_convolution_nhwc_f16:
- return xnn_setup_convolution2d_nhwc_f16(
- opdata->operator_object,
- opdata->batch_size,
- opdata->input_height,
- opdata->input_width,
- input_data,
- output_data,
- threadpool);
- break;
-#endif // !defined(XNN_NO_F16_OPERATORS)
#ifndef XNN_NO_QS8_OPERATORS
case xnn_operator_type_convolution_nhwc_qc8:
return xnn_setup_convolution2d_nhwc_qc8(
diff --git a/src/subgraph/depthwise-convolution-2d.c b/src/subgraph/depthwise-convolution-2d.c
index 604c86a08..c478a5263 100644
--- a/src/subgraph/depthwise-convolution-2d.c
+++ b/src/subgraph/depthwise-convolution-2d.c
@@ -100,32 +100,6 @@ static enum xnn_status create_convolution_operator(
node->flags | XNN_FLAG_DEPTHWISE_CONVOLUTION,
&opdata->operator_object);
break;
-#ifndef XNN_NO_F16_OPERATORS
- case xnn_compute_type_fp16:
- status = xnn_create_convolution2d_nhwc_f16(
- node->params.depthwise_convolution_2d.input_padding_top,
- node->params.depthwise_convolution_2d.input_padding_right,
- node->params.depthwise_convolution_2d.input_padding_bottom,
- node->params.depthwise_convolution_2d.input_padding_left,
- node->params.depthwise_convolution_2d.kernel_height,
- node->params.depthwise_convolution_2d.kernel_width,
- node->params.depthwise_convolution_2d.subsampling_height,
- node->params.depthwise_convolution_2d.subsampling_width,
- node->params.depthwise_convolution_2d.dilation_height,
- node->params.depthwise_convolution_2d.dilation_width,
- node->params.depthwise_convolution_2d.input_channels /* groups */,
- 1 /* group_input_channels */,
- node->params.depthwise_convolution_2d.depth_multiplier /* group_output_channels */,
- node->params.depthwise_convolution_2d.input_channels /* input_channel_stride */,
- node->params.depthwise_convolution_2d.input_channels * node->params.depthwise_convolution_2d.depth_multiplier /* output_channel_stride */,
- filter_data,
- bias_data,
- node->activation.output_min,
- node->activation.output_max,
- node->flags | XNN_FLAG_DEPTHWISE_CONVOLUTION | XNN_FLAG_FP32_STATIC_WEIGHTS,
- &opdata->operator_object);
- break;
-#endif // XNN_NO_F16_OPERATORS
#ifndef XNN_NO_QS8_OPERATORS
case xnn_compute_type_qs8:
{
@@ -293,18 +267,6 @@ static enum xnn_status setup_convolution_operator(
output_data,
threadpool);
break;
-#ifndef XNN_NO_F16_OPERATORS
- case xnn_operator_type_convolution_nhwc_f16:
- return xnn_setup_convolution2d_nhwc_f16(
- opdata->operator_object,
- opdata->batch_size,
- opdata->input_height,
- opdata->input_width,
- input_data,
- output_data,
- threadpool);
- break;
-#endif // !defined(XNN_NO_F16_OPERATORS)
#ifndef XNN_NO_QS8_OPERATORS
case xnn_operator_type_convolution_nhwc_qc8:
return xnn_setup_convolution2d_nhwc_qc8(
diff --git a/src/subgraph/global-average-pooling-2d.c b/src/subgraph/global-average-pooling-2d.c
index 44d6686de..85cd14491 100644
--- a/src/subgraph/global-average-pooling-2d.c
+++ b/src/subgraph/global-average-pooling-2d.c
@@ -54,16 +54,6 @@ static enum xnn_status create_global_average_pooling_operator(
node->flags,
&opdata->operator_object);
break;
-#ifndef XNN_NO_F16_OPERATORS
- case xnn_compute_type_fp16:
- status = xnn_create_global_average_pooling_nwc_f16(
- channel_dim /* channels */, channel_dim /* input stride */, channel_dim /* output stride */,
- node->activation.output_min,
- node->activation.output_max,
- node->flags,
- &opdata->operator_object);
- break;
-#endif // !defined(XNN_NO_F16_OPERATORS)
#ifndef XNN_NO_QS8_OPERATORS
case xnn_compute_type_qs8:
{
@@ -158,17 +148,6 @@ static enum xnn_status setup_global_average_pooling_operator(
output_data,
threadpool);
break;
-#ifndef XNN_NO_F16_OPERATORS
- case xnn_operator_type_global_average_pooling_nwc_f16:
- return xnn_setup_global_average_pooling_nwc_f16(
- opdata->operator_object,
- opdata->batch_size,
- opdata->input_width,
- input_data,
- output_data,
- threadpool);
- break;
-#endif // !defined(XNN_NO_F16_OPERATORS)
#ifndef XNN_NO_QS8_OPERATORS
case xnn_operator_type_global_average_pooling_nwc_qs8:
return xnn_setup_global_average_pooling_nwc_qs8(
diff --git a/src/subgraph/hardswish.c b/src/subgraph/hardswish.c
index d5eb54fba..6e896519f 100644
--- a/src/subgraph/hardswish.c
+++ b/src/subgraph/hardswish.c
@@ -34,25 +34,10 @@ static enum xnn_status create_hardswish_operator(
const size_t num_input_dims = values[input_id].shape.num_dims;
const size_t channel_dim = num_input_dims == 0 ? 1 : values[input_id].shape.dim[num_input_dims - 1];
- enum xnn_status status;
- switch (node->compute_type) {
- case xnn_compute_type_fp32:
- status = xnn_create_hardswish_nc_f32(
- channel_dim /* channels */, channel_dim /* input stride */, channel_dim /* output stride */,
- node->flags,
- &opdata->operator_object);
- break;
-#ifndef XNN_NO_F16_OPERATORS
- case xnn_compute_type_fp16:
- status = xnn_create_hardswish_nc_f16(
- channel_dim /* channels */, channel_dim /* input stride */, channel_dim /* output stride */,
- node->flags,
- &opdata->operator_object);
- break;
-#endif // !defined(XNN_NO_F16_OPERATORS)
- default:
- XNN_UNREACHABLE;
- }
+ const enum xnn_status status = xnn_create_hardswish_nc_f32(
+ channel_dim /* channels */, channel_dim /* input stride */, channel_dim /* output stride */,
+ node->flags,
+ &opdata->operator_object);
if (status == xnn_status_success) {
opdata->batch_size = xnn_shape_multiply_non_channel_dims(&values[input_id].shape);
opdata->inputs[0] = input_id;
@@ -83,26 +68,12 @@ static enum xnn_status setup_hardswish_operator(
void* output_data = output_blob->data;
assert(output_data != NULL);
- switch (opdata->operator_object->type) {
- case xnn_operator_type_hardswish_nc_f32:
- return xnn_setup_hardswish_nc_f32(
- opdata->operator_object,
- opdata->batch_size,
- input_data,
- output_data,
- threadpool);
-#ifndef XNN_NO_F16_OPERATORS
- case xnn_operator_type_hardswish_nc_f16:
- return xnn_setup_hardswish_nc_f16(
- opdata->operator_object,
- opdata->batch_size,
- input_data,
- output_data,
- threadpool);
-#endif // !defined(XNN_NO_F16_OPERATORS)
- default:
- XNN_UNREACHABLE;
- }
+ return xnn_setup_hardswish_nc_f32(
+ opdata->operator_object,
+ opdata->batch_size,
+ input_data,
+ output_data,
+ threadpool);
}
enum xnn_status xnn_define_hardswish(
diff --git a/src/xnnpack/subgraph.h b/src/xnnpack/subgraph.h
index a132c4328..5f0fc1571 100644
--- a/src/xnnpack/subgraph.h
+++ b/src/xnnpack/subgraph.h
@@ -82,15 +82,6 @@ struct xnn_value {
uint32_t num_consumers;
uint32_t num_nchw_compatible_consumers;
enum xnn_layout_type layout;
- /// Set during analysis in xnn_subgraph_rewrite_for_fp16.
- /// Indicates that this value should be converted to FP16.
- bool fp16_compatible;
- /// Set during analysis in xnn_subgraph_rewrite_for_fp16.
- /// Indicates Value ID of the FP16 variant of this Value.
- uint32_t fp16_id;
- /// Set during analysis in xnn_subgraph_rewrite_for_fp16.
- /// Indicates Value ID of the FP32 variant of this Value.
- uint32_t fp32_id;
};
struct xnn_blob {
@@ -119,14 +110,11 @@ typedef enum xnn_status (*xnn_setup_operator_fn)(
enum xnn_compute_type {
xnn_compute_type_invalid = 0,
xnn_compute_type_fp32,
- xnn_compute_type_fp16,
xnn_compute_type_qc8,
xnn_compute_type_qs8,
xnn_compute_type_qu8,
- xnn_compute_type_fp32_to_fp16,
xnn_compute_type_fp32_to_qs8,
xnn_compute_type_fp32_to_qu8,
- xnn_compute_type_fp16_to_fp32,
xnn_compute_type_qs8_to_fp32,
xnn_compute_type_qu8_to_fp32,
};
@@ -334,8 +322,6 @@ struct xnn_value* xnn_subgraph_new_internal_value(xnn_subgraph_t subgraph);
struct xnn_node* xnn_subgraph_new_node(xnn_subgraph_t subgraph);
-void xnn_subgraph_add_nodes(xnn_subgraph_t subgraph, size_t num_nodes);
-
size_t xnn_tensor_get_size(
xnn_subgraph_t subgraph,
uint32_t value_id);
@@ -355,14 +341,6 @@ void xnn_subgraph_rewrite_for_nchw(xnn_subgraph_t subgraph);
void xnn_node_clear(struct xnn_node* node);
void xnn_value_clear(struct xnn_value* value);
-void xnn_value_copy(struct xnn_value* dst_value, const struct xnn_value* src_value);
-
-void xnn_init_convert_node(
- struct xnn_node* node,
- enum xnn_compute_type compute_type,
- uint32_t input_id,
- uint32_t output_id,
- uint32_t flags);
#ifdef __cplusplus
} // extern "C"