diff options
author | Antonio Sanchez <cantonios@google.com> | 2022-02-03 09:47:17 -0800 |
---|---|---|
committer | XNNPACK Team <xnnpack-github-robot@google.com> | 2022-02-03 09:49:25 -0800 |
commit | 9a365d0da32b5401718b34830da47c7658fda510 (patch) | |
tree | bef2538c37ce0eb98efc47d790fce03ecf65f9d4 | |
parent | f0f374f6122fa7271f643fd741d33132d9159166 (diff) | |
download | XNNPACK-9a365d0da32b5401718b34830da47c7658fda510.tar.gz |
Revert "Graph rewriting for FP16 inference"
PiperOrigin-RevId: 426169617
-rw-r--r-- | include/xnnpack.h | 11 | ||||
-rw-r--r-- | src/subgraph.c | 304 | ||||
-rw-r--r-- | src/subgraph/add2.c | 20 | ||||
-rw-r--r-- | src/subgraph/convert.c | 66 | ||||
-rw-r--r-- | src/subgraph/convolution-2d.c | 38 | ||||
-rw-r--r-- | src/subgraph/depthwise-convolution-2d.c | 38 | ||||
-rw-r--r-- | src/subgraph/global-average-pooling-2d.c | 21 | ||||
-rw-r--r-- | src/subgraph/hardswish.c | 49 | ||||
-rw-r--r-- | src/xnnpack/subgraph.h | 22 |
9 files changed, 51 insertions, 518 deletions
diff --git a/include/xnnpack.h b/include/xnnpack.h index 3f7b3a2d3..4f5032f52 100644 --- a/include/xnnpack.h +++ b/include/xnnpack.h @@ -32,11 +32,6 @@ extern "C" { /// Note: this flag forces XNNPACK to consider sparse inference, but does not guarantee it. #define XNN_FLAG_SPARSE_INFERENCE 0x00000001 -/// Allow IEEE FP16 inference in a Runtime. -/// -/// Note: this flag forces XNNPACK to consider IEEE FP16 inference, but does not guarantee it. -#define XNN_FLAG_FP16_INFERENCE 0x00000001 - /// The convolution operator represents a depthwise convolution, and use HWGo layout for filters. #define XNN_FLAG_DEPTHWISE_CONVOLUTION 0x00000001 @@ -1138,9 +1133,9 @@ typedef struct xnn_runtime* xnn_runtime_t; /// Nodes can be added to the runtime once it is constructed. /// @param threadpool - the thread pool to be used for parallelisation of computations in the runtime. If the thread /// pool is NULL, the computation would run on the caller thread without parallelization. -/// @param flags - binary features of the runtime. The only currently supported values are XNN_FLAG_SPARSE_INFERENCE, -/// XNN_FLAG_FP16_INFERENCE, and XNN_FLAG_YIELD_WORKERS. If XNN_FLAG_YIELD_WORKERS is specified, worker -/// threads would be yielded to the system scheduler after processing the last operator in the Runtime. +/// @param flags - binary features of the runtime. The only currently supported values are XNN_FLAG_SPARSE_INFERENCE +/// and XNN_FLAG_YIELD_WORKERS. If XNN_FLAG_YIELD_WORKERS is specified, worker threads would be yielded +/// to the system scheduler after processing the last operator in the Runtime. /// @param runtime_out - pointer to the variable that will be initialized with a handle to the Runtime object upon /// successful return. Once constructed, the Runtime object is independent of the Subgraph object /// used to create it. diff --git a/src/subgraph.c b/src/subgraph.c index 9ed9d271f..2946d98dd 100644 --- a/src/subgraph.c +++ b/src/subgraph.c @@ -86,30 +86,16 @@ struct xnn_value* xnn_subgraph_new_internal_value(xnn_subgraph_t subgraph) void xnn_node_clear(struct xnn_node* node) { assert(node != NULL); + assert(node->type != xnn_node_type_invalid); memset(node, 0, sizeof(struct xnn_node)); } void xnn_value_clear(struct xnn_value* value) { assert(value != NULL); + assert(value->type != xnn_value_type_invalid); memset(value, 0, sizeof(struct xnn_value)); } -void xnn_value_copy( - struct xnn_value* dst_value, - const struct xnn_value* src_value) -{ - // Note: Value ID stays unchanged - - dst_value->type = src_value->type; - dst_value->datatype = src_value->datatype; - dst_value->quantization = src_value->quantization; - dst_value->shape = src_value->shape; - dst_value->flags = src_value->flags; - dst_value->data = src_value->data; - dst_value->producer = src_value->producer; - dst_value->first_consumer = src_value->first_consumer; -} - struct xnn_node* xnn_subgraph_new_node(xnn_subgraph_t subgraph) { struct xnn_node* nodes = subgraph->nodes; @@ -136,76 +122,6 @@ struct xnn_node* xnn_subgraph_new_node(xnn_subgraph_t subgraph) return new_node; } -void xnn_subgraph_add_nodes(xnn_subgraph_t subgraph, size_t num_nodes) -{ - struct xnn_node* nodes = subgraph->nodes; - const size_t size = subgraph->num_nodes; - const size_t capacity = subgraph->num_reserved_nodes; - - if (capacity < size + num_nodes) { - const size_t new_capacity = max(min(capacity * 2, capacity + 512), capacity + max(num_nodes, 64)); - assert(new_capacity >= size + num_nodes); - nodes = xnn_reallocate_memory(nodes, new_capacity * sizeof(struct xnn_node)); - if (nodes == NULL) { - xnn_log_error("failed to allocate %zu bytes for subgraph nodes", - capacity * sizeof(struct xnn_node)); - return; - } - - memset(nodes + size, 0, (new_capacity - size) * sizeof(struct xnn_node)); - subgraph->num_reserved_nodes = new_capacity; - subgraph->nodes = nodes; - } - subgraph->num_nodes = size + num_nodes; - struct xnn_node* new_nodes = nodes + size; - for (size_t i = 0; i < num_nodes; i++) { - new_nodes[i].id = size + i; - } -} - -void xnn_subgraph_analyze_consumers_and_producers(xnn_subgraph_t subgraph) -{ - // Initialize producer/consumer fields to safe defaults. - for (uint32_t i = 0; i < subgraph->num_values; i++) { - struct xnn_value* value = &subgraph->values[i]; - value->producer = XNN_INVALID_NODE_ID; - value->first_consumer = XNN_INVALID_NODE_ID; - value->num_consumers = 0; - } - - // Analyse Nodes' inputs and output and update Values' producer/consumer fields - for (uint32_t n = 0; n < subgraph->num_nodes; n++) { - struct xnn_node* node = &subgraph->nodes[n]; - - for (uint32_t i = 0; i < node->num_inputs; i++) { - const uint32_t input_id = node->inputs[i]; - assert(input_id < subgraph->num_values); - - if (subgraph->values[input_id].num_consumers++ == 0) { - assert(subgraph->values[input_id].first_consumer == XNN_INVALID_NODE_ID); - subgraph->values[input_id].first_consumer = n; - } - } - - for (uint32_t o = 0; o < node->num_outputs; o++) { - const uint32_t output_id = node->outputs[o]; - assert(output_id < subgraph->num_values); - - assert(subgraph->values[output_id].producer == XNN_INVALID_NODE_ID); - subgraph->values[output_id].producer = n; - } - } - - // Count extra consumer for Values which are external outputs. - // Remove unreferenced values. - for (uint32_t i = 0; i < subgraph->num_values; i++) { - struct xnn_value* value = &subgraph->values[i]; - if (value->flags & XNN_VALUE_FLAG_EXTERNAL_OUTPUT) { - value->num_consumers += 1; - } - } -} - #define XNN_LAYOUT_FLAG_COMPATIBLE_NCHW 1 #define XNN_LAYOUT_FLAG_COMPATIBLE_NHWC2NCHW 2 #define XNN_LAYOUT_FLAG_COMPATIBLE_NCHW2NHWC 4 @@ -579,205 +495,42 @@ void xnn_subgraph_rewrite_for_nchw(xnn_subgraph_t subgraph) } } -void xnn_subgraph_rewrite_for_fp16(xnn_subgraph_t subgraph) +enum xnn_status xnn_subgraph_optimize( + xnn_subgraph_t subgraph, + uint32_t flags) { - xnn_log_info("Analyzing subgraph for FP16 compatibility"); - - // Convert tensors and operators in the subgraph to FP16 - // 1. Check that all operators in the subgraph are supported in FP16. - // 2. Indicate values that must be converted to FP16. - // 3. Replace FP32 Values with FP16 Values as Nodes' inputs/outputs. - // 4. Insert FP32->FP16 Convert Nodes for external FP32 inputs and FP16->FP32 Convert Nodes for external outputs. - - // Check that all operators in the subgraph are supported in FP16, bail out on any unsupported one. - for (uint32_t n = 0; n < subgraph->num_nodes; n++) { - struct xnn_node* node = &subgraph->nodes[n]; - if (node->compute_type != xnn_compute_type_fp32) { - xnn_log_info("FP16 rewrite aborted: node #%" PRIu32 " (%s) is not FP32", n, xnn_node_type_to_string(node->type)); - return; - } - switch (node->type) { - case xnn_node_type_add2: - assert(node->num_inputs == 2); - for (uint32_t i = 0; i < node->num_inputs; i++) { - if (subgraph->values[node->inputs[i]].data != NULL) { - xnn_log_info("FP16 rewrite aborted: node #%" PRIu32 " (%s) has static input %i", - n, xnn_node_type_to_string(node->type), i); - return; - } - } - break; - case xnn_node_type_convolution_2d: - case xnn_node_type_depthwise_convolution_2d: - case xnn_node_type_global_average_pooling_2d: - case xnn_node_type_hardswish: - break; - default: - xnn_log_info("FP16 rewrite aborted: node #%" PRIu32 " (%s) is not supported for FP16 inference", - n, xnn_node_type_to_string(node->type)); - return; - } + // Initialize producer/consumer fields to safe defaults. + for (uint32_t i = 0; i < subgraph->num_values; i++) { + struct xnn_value* value = &subgraph->values[i]; + value->producer = XNN_INVALID_NODE_ID; + value->first_consumer = XNN_INVALID_NODE_ID; + value->num_consumers = 0; } - // Annotate Values to be converted to FP16 as FP16-compatible. - // Note that static weights in [Depthwise] Convolution & Fully Connected Nodes remain FP32, - // they will be converted to FP16 during weight repacking when the operator is created. + // Analyse Nodes' inputs and output and update Values' producer/consumer fields for (uint32_t n = 0; n < subgraph->num_nodes; n++) { struct xnn_node* node = &subgraph->nodes[n]; - switch (node->type) { - case xnn_node_type_convolution_2d: - case xnn_node_type_depthwise_convolution_2d: - subgraph->values[node->inputs[0]].fp16_compatible = true; - subgraph->values[node->outputs[0]].fp16_compatible = true; - break; - default: - for (uint32_t i = 0; i < node->num_inputs; i++) { - subgraph->values[node->inputs[i]].fp16_compatible = true; - } - for (uint32_t o = 0; o < node->num_outputs; o++) { - subgraph->values[node->outputs[o]].fp16_compatible = true; - } - break; - } - } - // Replace FP32 Values in Nodes' inputs/outputs with FP16 Values. - // FP32 Values that are not external inputs or outputs are converted to FP16 in-place, - // for external inputs and outputs we create same-shaped FP16 Values and use those instead. - const uint32_t num_original_values = subgraph->num_values; - xnn_subgraph_analyze_consumers_and_producers(subgraph); - for (uint32_t n = 0; n < num_original_values; n++) { - struct xnn_value* value = &subgraph->values[n]; - value->fp16_id = XNN_INVALID_VALUE_ID; - value->fp32_id = XNN_INVALID_VALUE_ID; - if (value->fp16_compatible) { - assert(value->data == NULL); - assert(value->datatype == xnn_datatype_fp32); - if ((value->flags & (XNN_VALUE_FLAG_EXTERNAL_INPUT | XNN_VALUE_FLAG_EXTERNAL_OUTPUT)) != 0) { - struct xnn_value* fp16_value = xnn_subgraph_new_internal_value(subgraph); - - // Recompute value due to potential reallocation in xnn_subgraph_new_internal_value - value = &subgraph->values[n]; - xnn_value_copy(fp16_value, value); - fp16_value->datatype = xnn_datatype_fp16; - - fp16_value->producer = value->producer; - fp16_value->num_consumers = value->num_consumers; - fp16_value->first_consumer = value->first_consumer; - value->producer = XNN_INVALID_NODE_ID; - value->num_consumers = 0; - value->first_consumer = XNN_INVALID_NODE_ID; - - // Clear external input/output flags - fp16_value->flags = 0; - xnn_log_debug("FP16 rewrite: created FP16 tensor #%" PRIu32 " for FP32 tensor #%" PRIu32, fp16_value->id, n); - - value->fp16_id = fp16_value->id; - fp16_value->fp32_id = n; - } else { - xnn_log_debug("FP16 rewrite: converted FP32 tensor #%" PRIu32 " to FP16", n); - value->datatype = xnn_datatype_fp16; - } - } - } - for (uint32_t n = 0; n < subgraph->num_nodes; n++) { - struct xnn_node* node = &subgraph->nodes[n]; - assert(node->compute_type == xnn_compute_type_fp32); - node->compute_type = xnn_compute_type_fp16; for (uint32_t i = 0; i < node->num_inputs; i++) { - const uint32_t fp16_id = subgraph->values[node->inputs[i]].fp16_id; - if (fp16_id != XNN_INVALID_VALUE_ID) { - assert(subgraph->values[fp16_id].fp32_id == node->inputs[i]); - node->inputs[i] = fp16_id; - } - } - for (uint32_t o = 0; o < node->num_outputs; o++) { - const uint32_t fp16_id = subgraph->values[node->outputs[o]].fp16_id; - if (fp16_id != XNN_INVALID_VALUE_ID) { - assert(subgraph->values[fp16_id].fp32_id == node->outputs[o]); - node->outputs[o] = fp16_id; - } - } - } + const uint32_t input_id = node->inputs[i]; + assert(input_id < subgraph->num_values); - // Count the number of external inputs and outputs which require Convert nodes - uint32_t num_external_inputs = 0; - uint32_t num_external_outputs = 0; - for (uint32_t n = 0; n < subgraph->num_nodes; n++) { - const struct xnn_node* node = &subgraph->nodes[n]; - for (uint32_t i = 0; i < node->num_inputs; i++) { - const struct xnn_value* value = &subgraph->values[node->inputs[i]]; - if (value->fp32_id != XNN_INVALID_VALUE_ID && value->first_consumer == n) { - assert(value->data == NULL); - assert(value->datatype == xnn_datatype_fp16); - assert(subgraph->values[value->fp32_id].datatype == xnn_datatype_fp32); - assert(subgraph->values[value->fp32_id].flags & XNN_VALUE_FLAG_EXTERNAL_INPUT); - num_external_inputs += 1; - } - } - for (uint32_t o = 0; o < node->num_outputs; o++) { - const struct xnn_value* value = &subgraph->values[node->outputs[o]]; - if (value->fp32_id != XNN_INVALID_VALUE_ID) { - assert(value->datatype == xnn_datatype_fp16); - assert(subgraph->values[value->fp32_id].datatype == xnn_datatype_fp32); - assert(subgraph->values[value->fp32_id].flags & XNN_VALUE_FLAG_EXTERNAL_OUTPUT); - num_external_outputs += 1; + if (subgraph->values[input_id].num_consumers++ == 0) { + assert(subgraph->values[input_id].first_consumer == XNN_INVALID_NODE_ID); + subgraph->values[input_id].first_consumer = n; } } - } - xnn_log_debug("Discovered %"PRIu32" external inputs and %"PRIu32" external outputs", - num_external_inputs, num_external_outputs); - - const uint32_t num_original_nodes = subgraph->num_nodes; - xnn_subgraph_add_nodes(subgraph, num_external_inputs + num_external_outputs); - struct xnn_node* output_node = subgraph->nodes + subgraph->num_nodes - 1; - for (uint32_t n = num_original_nodes; n != 0; n--) { - const struct xnn_node* node = &subgraph->nodes[n - 1]; - // Insert Convert nodes for outputs + for (uint32_t o = 0; o < node->num_outputs; o++) { - const struct xnn_value* value = &subgraph->values[node->outputs[o]]; - if (value->fp32_id != XNN_INVALID_VALUE_ID) { - xnn_log_debug("Inserted FP16->FP32 Convert Node from tensor #%"PRIu32" to tensor #%"PRIu32, - value->id, value->fp32_id); - const uint32_t output_node_id = output_node->id; - assert(output_node >= subgraph->nodes); - xnn_node_clear(output_node); - output_node->id = output_node_id; - xnn_init_convert_node(output_node, xnn_compute_type_fp16_to_fp32, value->id, value->fp32_id, 0 /* flags */); - output_node -= 1; - } - } - // Move the Node to the new location - if (output_node != node) { - const uint32_t output_node_id = output_node->id; - assert(output_node >= subgraph->nodes); - memcpy(output_node, node, sizeof(struct xnn_node)); - output_node->id = output_node_id; - output_node -= 1; - } - // Insert Convert nodes for inputs - for (uint32_t i = 0; i < node->num_inputs; i++) { - const struct xnn_value* value = &subgraph->values[node->inputs[i]]; - if (value->fp32_id != XNN_INVALID_VALUE_ID && value->first_consumer == n - 1) { - xnn_log_debug("Inserted FP32->FP16 Convert Node from tensor #%"PRIu32" to tensor #%"PRIu32, - value->fp32_id, value->id); - const uint32_t output_node_id = output_node->id; - assert(output_node >= subgraph->nodes); - xnn_node_clear(output_node); - output_node->id = output_node_id; - xnn_init_convert_node(output_node, xnn_compute_type_fp32_to_fp16, value->fp32_id, value->id, 0 /* flags */); - output_node -= 1; - } + const uint32_t output_id = node->outputs[o]; + assert(output_id < subgraph->num_values); + + assert(subgraph->values[output_id].producer == XNN_INVALID_NODE_ID); + subgraph->values[output_id].producer = n; } } -} - -enum xnn_status xnn_subgraph_optimize( - xnn_subgraph_t subgraph, - uint32_t flags) -{ - xnn_subgraph_analyze_consumers_and_producers(subgraph); + // Count extra consumer for Values which are external outputs. // Remove unreferenced values. for (uint32_t i = 0; i < subgraph->num_values; i++) { struct xnn_value* value = &subgraph->values[i]; @@ -785,6 +538,9 @@ enum xnn_status xnn_subgraph_optimize( continue; } + if (value->flags & XNN_VALUE_FLAG_EXTERNAL_OUTPUT) { + value->num_consumers += 1; + } if ((value->flags & XNN_VALUE_FLAG_EXTERNAL_INPUT) == 0 && value->num_consumers == 0) { xnn_value_clear(value); } @@ -926,12 +682,6 @@ enum xnn_status xnn_subgraph_optimize( } #endif - #ifndef XNN_NO_F16_OPERATORS - if ((flags & XNN_FLAG_FP16_INFERENCE) && (xnn_params.init_flags & XNN_INIT_FLAG_F16)) { - xnn_subgraph_rewrite_for_fp16(subgraph); - } - #endif // XNN_NO_F16_OPERATORS - return xnn_status_success; } diff --git a/src/subgraph/add2.c b/src/subgraph/add2.c index dc3d2a338..493a74be5 100644 --- a/src/subgraph/add2.c +++ b/src/subgraph/add2.c @@ -42,15 +42,6 @@ static enum xnn_status create_add_operator( node->flags, &opdata->operator_object); break; -#ifndef XNN_NO_F16_OPERATORS - case xnn_compute_type_fp16: - status = xnn_create_add_nd_f16( - node->activation.output_min, - node->activation.output_max, - node->flags, - &opdata->operator_object); - break; -#endif // !defined(XNN_NO_F16_OPERATORS) #ifndef XNN_NO_QS8_OPERATORS case xnn_compute_type_qs8: { @@ -164,17 +155,6 @@ static enum xnn_status setup_add_operator( opdata->shape2.dim, input1_data, input2_data, output_data, threadpool); -#ifndef XNN_NO_F16_OPERATORS - case xnn_operator_type_add_nd_f16: - return xnn_setup_add_nd_f16( - opdata->operator_object, - opdata->shape1.num_dims, - opdata->shape1.dim, - opdata->shape2.num_dims, - opdata->shape2.dim, - input1_data, input2_data, output_data, - threadpool); -#endif // !defined(XNN_NO_F16_OPERATORS) #ifndef XNN_NO_QS8_OPERATORS case xnn_operator_type_add_nd_qs8: return xnn_setup_add_nd_qs8( diff --git a/src/subgraph/convert.c b/src/subgraph/convert.c index 291e20278..5f58eab74 100644 --- a/src/subgraph/convert.c +++ b/src/subgraph/convert.c @@ -35,12 +35,6 @@ static enum xnn_status create_convert_operator( enum xnn_status status = xnn_status_uninitialized; switch (node->compute_type) { - case xnn_compute_type_fp32_to_fp16: - status = xnn_create_convert_nc_f32_f16( - channel_dim /* channels */, channel_dim /* input stride */, channel_dim /* output stride */, - node->flags, - &opdata->operator_object); - break; case xnn_compute_type_fp32_to_qs8: status = xnn_create_convert_nc_f32_qs8( channel_dim /* channels */, channel_dim /* input stride */, channel_dim /* output stride */, @@ -59,12 +53,6 @@ static enum xnn_status create_convert_operator( node->flags, &opdata->operator_object); break; - case xnn_compute_type_fp16_to_fp32: - status = xnn_create_convert_nc_f16_f32( - channel_dim /* channels */, channel_dim /* input stride */, channel_dim /* output stride */, - node->flags, - &opdata->operator_object); - break; case xnn_compute_type_qs8_to_fp32: status = xnn_create_convert_nc_qs8_f32( channel_dim /* channels */, channel_dim /* input stride */, channel_dim /* output stride */, @@ -115,13 +103,6 @@ static enum xnn_status setup_convert_operator( assert(output_data != NULL); switch (opdata->operator_object->type) { - case xnn_operator_type_convert_nc_f32_f16: - return xnn_setup_convert_nc_f32_f16( - opdata->operator_object, - opdata->batch_size, - input_data, - output_data, - threadpool); case xnn_operator_type_convert_nc_f32_qs8: return xnn_setup_convert_nc_f32_qs8( opdata->operator_object, @@ -136,13 +117,6 @@ static enum xnn_status setup_convert_operator( input_data, output_data, threadpool); - case xnn_operator_type_convert_nc_f16_f32: - return xnn_setup_convert_nc_f16_f32( - opdata->operator_object, - opdata->batch_size, - input_data, - output_data, - threadpool); case xnn_operator_type_convert_nc_qs8_f32: return xnn_setup_convert_nc_qs8_f32( opdata->operator_object, @@ -169,8 +143,6 @@ static inline enum xnn_compute_type validate_datatypes( switch (input_datatype) { case xnn_datatype_fp32: switch (output_datatype) { - case xnn_datatype_fp16: - return xnn_compute_type_fp32_to_fp16; case xnn_datatype_qint8: return xnn_compute_type_fp32_to_qs8; case xnn_datatype_quint8: @@ -179,11 +151,6 @@ static inline enum xnn_compute_type validate_datatypes( break; } break; - case xnn_datatype_fp16: - if (output_datatype == xnn_datatype_fp32) { - return xnn_compute_type_fp16_to_fp32; - } - break; case xnn_datatype_qint8: if (output_datatype == xnn_datatype_fp32) { return xnn_compute_type_qs8_to_fp32; @@ -200,25 +167,6 @@ static inline enum xnn_compute_type validate_datatypes( return xnn_compute_type_invalid; } -void xnn_init_convert_node( - struct xnn_node* node, - enum xnn_compute_type compute_type, - uint32_t input_id, - uint32_t output_id, - uint32_t flags) -{ - node->type = xnn_node_type_convert; - node->compute_type = compute_type; - node->num_inputs = 1; - node->inputs[0] = input_id; - node->num_outputs = 1; - node->outputs[0] = output_id; - node->flags = flags; - - node->create = create_convert_operator; - node->setup = setup_convert_operator; -} - enum xnn_status xnn_define_convert( xnn_subgraph_t subgraph, uint32_t input_id, @@ -247,7 +195,6 @@ enum xnn_status xnn_define_convert( } switch (input_value->datatype) { - case xnn_datatype_fp16: case xnn_datatype_fp32: case xnn_datatype_qint8: case xnn_datatype_quint8: @@ -276,7 +223,6 @@ enum xnn_status xnn_define_convert( } switch (output_value->datatype) { - case xnn_datatype_fp16: case xnn_datatype_fp32: case xnn_datatype_qint8: case xnn_datatype_quint8: @@ -305,6 +251,16 @@ enum xnn_status xnn_define_convert( return xnn_status_out_of_memory; } - xnn_init_convert_node(node, compute_type, input_id, output_id, flags); + node->type = xnn_node_type_convert; + node->compute_type = compute_type; + node->num_inputs = 1; + node->inputs[0] = input_id; + node->num_outputs = 1; + node->outputs[0] = output_id; + node->flags = flags; + + node->create = create_convert_operator; + node->setup = setup_convert_operator; + return xnn_status_success; } diff --git a/src/subgraph/convolution-2d.c b/src/subgraph/convolution-2d.c index a5ea509ed..0225a9368 100644 --- a/src/subgraph/convolution-2d.c +++ b/src/subgraph/convolution-2d.c @@ -99,32 +99,6 @@ static enum xnn_status create_convolution_operator( node->flags, &opdata->operator_object); break; -#ifndef XNN_NO_F16_OPERATORS - case xnn_compute_type_fp16: - status = xnn_create_convolution2d_nhwc_f16( - node->params.convolution_2d.input_padding_top, - node->params.convolution_2d.input_padding_right, - node->params.convolution_2d.input_padding_bottom, - node->params.convolution_2d.input_padding_left, - node->params.convolution_2d.kernel_height, - node->params.convolution_2d.kernel_width, - node->params.convolution_2d.subsampling_height, - node->params.convolution_2d.subsampling_width, - node->params.convolution_2d.dilation_height, - node->params.convolution_2d.dilation_width, - node->params.convolution_2d.groups, - node->params.convolution_2d.group_input_channels, - node->params.convolution_2d.group_output_channels, - node->params.convolution_2d.group_input_channels * node->params.convolution_2d.groups /* input_pixel_stride */, - node->params.convolution_2d.group_output_channels * node->params.convolution_2d.groups /* output_pixel_stride */, - filter_data, - bias_data, - node->activation.output_min, - node->activation.output_max, - node->flags | XNN_FLAG_FP32_STATIC_WEIGHTS, - &opdata->operator_object); - break; -#endif // XNN_NO_F16_OPERATORS #ifndef XNN_NO_QS8_OPERATORS case xnn_compute_type_qs8: { @@ -292,18 +266,6 @@ static enum xnn_status setup_convolution_operator( output_data, threadpool); break; -#ifndef XNN_NO_F16_OPERATORS - case xnn_operator_type_convolution_nhwc_f16: - return xnn_setup_convolution2d_nhwc_f16( - opdata->operator_object, - opdata->batch_size, - opdata->input_height, - opdata->input_width, - input_data, - output_data, - threadpool); - break; -#endif // !defined(XNN_NO_F16_OPERATORS) #ifndef XNN_NO_QS8_OPERATORS case xnn_operator_type_convolution_nhwc_qc8: return xnn_setup_convolution2d_nhwc_qc8( diff --git a/src/subgraph/depthwise-convolution-2d.c b/src/subgraph/depthwise-convolution-2d.c index 604c86a08..c478a5263 100644 --- a/src/subgraph/depthwise-convolution-2d.c +++ b/src/subgraph/depthwise-convolution-2d.c @@ -100,32 +100,6 @@ static enum xnn_status create_convolution_operator( node->flags | XNN_FLAG_DEPTHWISE_CONVOLUTION, &opdata->operator_object); break; -#ifndef XNN_NO_F16_OPERATORS - case xnn_compute_type_fp16: - status = xnn_create_convolution2d_nhwc_f16( - node->params.depthwise_convolution_2d.input_padding_top, - node->params.depthwise_convolution_2d.input_padding_right, - node->params.depthwise_convolution_2d.input_padding_bottom, - node->params.depthwise_convolution_2d.input_padding_left, - node->params.depthwise_convolution_2d.kernel_height, - node->params.depthwise_convolution_2d.kernel_width, - node->params.depthwise_convolution_2d.subsampling_height, - node->params.depthwise_convolution_2d.subsampling_width, - node->params.depthwise_convolution_2d.dilation_height, - node->params.depthwise_convolution_2d.dilation_width, - node->params.depthwise_convolution_2d.input_channels /* groups */, - 1 /* group_input_channels */, - node->params.depthwise_convolution_2d.depth_multiplier /* group_output_channels */, - node->params.depthwise_convolution_2d.input_channels /* input_channel_stride */, - node->params.depthwise_convolution_2d.input_channels * node->params.depthwise_convolution_2d.depth_multiplier /* output_channel_stride */, - filter_data, - bias_data, - node->activation.output_min, - node->activation.output_max, - node->flags | XNN_FLAG_DEPTHWISE_CONVOLUTION | XNN_FLAG_FP32_STATIC_WEIGHTS, - &opdata->operator_object); - break; -#endif // XNN_NO_F16_OPERATORS #ifndef XNN_NO_QS8_OPERATORS case xnn_compute_type_qs8: { @@ -293,18 +267,6 @@ static enum xnn_status setup_convolution_operator( output_data, threadpool); break; -#ifndef XNN_NO_F16_OPERATORS - case xnn_operator_type_convolution_nhwc_f16: - return xnn_setup_convolution2d_nhwc_f16( - opdata->operator_object, - opdata->batch_size, - opdata->input_height, - opdata->input_width, - input_data, - output_data, - threadpool); - break; -#endif // !defined(XNN_NO_F16_OPERATORS) #ifndef XNN_NO_QS8_OPERATORS case xnn_operator_type_convolution_nhwc_qc8: return xnn_setup_convolution2d_nhwc_qc8( diff --git a/src/subgraph/global-average-pooling-2d.c b/src/subgraph/global-average-pooling-2d.c index 44d6686de..85cd14491 100644 --- a/src/subgraph/global-average-pooling-2d.c +++ b/src/subgraph/global-average-pooling-2d.c @@ -54,16 +54,6 @@ static enum xnn_status create_global_average_pooling_operator( node->flags, &opdata->operator_object); break; -#ifndef XNN_NO_F16_OPERATORS - case xnn_compute_type_fp16: - status = xnn_create_global_average_pooling_nwc_f16( - channel_dim /* channels */, channel_dim /* input stride */, channel_dim /* output stride */, - node->activation.output_min, - node->activation.output_max, - node->flags, - &opdata->operator_object); - break; -#endif // !defined(XNN_NO_F16_OPERATORS) #ifndef XNN_NO_QS8_OPERATORS case xnn_compute_type_qs8: { @@ -158,17 +148,6 @@ static enum xnn_status setup_global_average_pooling_operator( output_data, threadpool); break; -#ifndef XNN_NO_F16_OPERATORS - case xnn_operator_type_global_average_pooling_nwc_f16: - return xnn_setup_global_average_pooling_nwc_f16( - opdata->operator_object, - opdata->batch_size, - opdata->input_width, - input_data, - output_data, - threadpool); - break; -#endif // !defined(XNN_NO_F16_OPERATORS) #ifndef XNN_NO_QS8_OPERATORS case xnn_operator_type_global_average_pooling_nwc_qs8: return xnn_setup_global_average_pooling_nwc_qs8( diff --git a/src/subgraph/hardswish.c b/src/subgraph/hardswish.c index d5eb54fba..6e896519f 100644 --- a/src/subgraph/hardswish.c +++ b/src/subgraph/hardswish.c @@ -34,25 +34,10 @@ static enum xnn_status create_hardswish_operator( const size_t num_input_dims = values[input_id].shape.num_dims; const size_t channel_dim = num_input_dims == 0 ? 1 : values[input_id].shape.dim[num_input_dims - 1]; - enum xnn_status status; - switch (node->compute_type) { - case xnn_compute_type_fp32: - status = xnn_create_hardswish_nc_f32( - channel_dim /* channels */, channel_dim /* input stride */, channel_dim /* output stride */, - node->flags, - &opdata->operator_object); - break; -#ifndef XNN_NO_F16_OPERATORS - case xnn_compute_type_fp16: - status = xnn_create_hardswish_nc_f16( - channel_dim /* channels */, channel_dim /* input stride */, channel_dim /* output stride */, - node->flags, - &opdata->operator_object); - break; -#endif // !defined(XNN_NO_F16_OPERATORS) - default: - XNN_UNREACHABLE; - } + const enum xnn_status status = xnn_create_hardswish_nc_f32( + channel_dim /* channels */, channel_dim /* input stride */, channel_dim /* output stride */, + node->flags, + &opdata->operator_object); if (status == xnn_status_success) { opdata->batch_size = xnn_shape_multiply_non_channel_dims(&values[input_id].shape); opdata->inputs[0] = input_id; @@ -83,26 +68,12 @@ static enum xnn_status setup_hardswish_operator( void* output_data = output_blob->data; assert(output_data != NULL); - switch (opdata->operator_object->type) { - case xnn_operator_type_hardswish_nc_f32: - return xnn_setup_hardswish_nc_f32( - opdata->operator_object, - opdata->batch_size, - input_data, - output_data, - threadpool); -#ifndef XNN_NO_F16_OPERATORS - case xnn_operator_type_hardswish_nc_f16: - return xnn_setup_hardswish_nc_f16( - opdata->operator_object, - opdata->batch_size, - input_data, - output_data, - threadpool); -#endif // !defined(XNN_NO_F16_OPERATORS) - default: - XNN_UNREACHABLE; - } + return xnn_setup_hardswish_nc_f32( + opdata->operator_object, + opdata->batch_size, + input_data, + output_data, + threadpool); } enum xnn_status xnn_define_hardswish( diff --git a/src/xnnpack/subgraph.h b/src/xnnpack/subgraph.h index a132c4328..5f0fc1571 100644 --- a/src/xnnpack/subgraph.h +++ b/src/xnnpack/subgraph.h @@ -82,15 +82,6 @@ struct xnn_value { uint32_t num_consumers; uint32_t num_nchw_compatible_consumers; enum xnn_layout_type layout; - /// Set during analysis in xnn_subgraph_rewrite_for_fp16. - /// Indicates that this value should be converted to FP16. - bool fp16_compatible; - /// Set during analysis in xnn_subgraph_rewrite_for_fp16. - /// Indicates Value ID of the FP16 variant of this Value. - uint32_t fp16_id; - /// Set during analysis in xnn_subgraph_rewrite_for_fp16. - /// Indicates Value ID of the FP32 variant of this Value. - uint32_t fp32_id; }; struct xnn_blob { @@ -119,14 +110,11 @@ typedef enum xnn_status (*xnn_setup_operator_fn)( enum xnn_compute_type { xnn_compute_type_invalid = 0, xnn_compute_type_fp32, - xnn_compute_type_fp16, xnn_compute_type_qc8, xnn_compute_type_qs8, xnn_compute_type_qu8, - xnn_compute_type_fp32_to_fp16, xnn_compute_type_fp32_to_qs8, xnn_compute_type_fp32_to_qu8, - xnn_compute_type_fp16_to_fp32, xnn_compute_type_qs8_to_fp32, xnn_compute_type_qu8_to_fp32, }; @@ -334,8 +322,6 @@ struct xnn_value* xnn_subgraph_new_internal_value(xnn_subgraph_t subgraph); struct xnn_node* xnn_subgraph_new_node(xnn_subgraph_t subgraph); -void xnn_subgraph_add_nodes(xnn_subgraph_t subgraph, size_t num_nodes); - size_t xnn_tensor_get_size( xnn_subgraph_t subgraph, uint32_t value_id); @@ -355,14 +341,6 @@ void xnn_subgraph_rewrite_for_nchw(xnn_subgraph_t subgraph); void xnn_node_clear(struct xnn_node* node); void xnn_value_clear(struct xnn_value* value); -void xnn_value_copy(struct xnn_value* dst_value, const struct xnn_value* src_value); - -void xnn_init_convert_node( - struct xnn_node* node, - enum xnn_compute_type compute_type, - uint32_t input_id, - uint32_t output_id, - uint32_t flags); #ifdef __cplusplus } // extern "C" |