diff options
-rw-r--r-- | BUILD.bazel | 4 | ||||
-rw-r--r-- | include/xnnpack.h | 153 | ||||
-rw-r--r-- | src/runtime.c | 263 | ||||
-rw-r--r-- | src/subgraph.c | 425 | ||||
-rw-r--r-- | src/tensor.c | 101 | ||||
-rw-r--r-- | src/xnnpack/allocator.h | 8 | ||||
-rw-r--r-- | src/xnnpack/subgraph.h | 168 |
7 files changed, 1122 insertions, 0 deletions
diff --git a/BUILD.bazel b/BUILD.bazel index 619a16745..d8916440f 100644 --- a/BUILD.bazel +++ b/BUILD.bazel @@ -1474,6 +1474,7 @@ INTERNAL_HDRS = INTERNAL_MICROKERNEL_HDRS + [ "src/xnnpack/params-init.h", "src/xnnpack/requantization-stubs.h", "src/xnnpack/requantization.h", + "src/xnnpack/subgraph.h", ] ACCURACY_EVAL_HDRS = INTERNAL_MICROKERNEL_HDRS + [ @@ -1818,6 +1819,9 @@ xnnpack_cc_library( srcs = OPERATOR_SRCS + [ "src/memory.c", "src/operator-delete.c", + "src/runtime.c", + "src/subgraph.c", + "src/tensor.c", ], hdrs = INTERNAL_HDRS + LOGGING_HDRS, copts = xnnpack_std_copts() + LOGGING_COPTS + [ diff --git a/include/xnnpack.h b/include/xnnpack.h index 1511e6f13..1ee7827df 100644 --- a/include/xnnpack.h +++ b/include/xnnpack.h @@ -127,6 +127,159 @@ enum xnn_status xnn_initialize(const struct xnn_allocator* allocator); /// @retval xnn_status_success - deinitialization call succeeded. enum xnn_status xnn_deinitialize(void); +typedef struct xnn_subgraph* xnn_subgraph_t; + +enum xnn_status xnn_create_subgraph( + uint32_t external_value_ids, + uint32_t flags, + xnn_subgraph_t* subgraph_out); + +enum xnn_status xnn_delete_subgraph( + xnn_subgraph_t subgraph); + +#define XNN_VALUE_FLAG_EXTERNAL_INPUT 0x00000001 +#define XNN_VALUE_FLAG_EXTERNAL_OUTPUT 0x00000002 + +#define XNN_INVALID_VALUE_ID UINT32_MAX + +enum xnn_datatype { + xnn_datatype_invalid = 0, + xnn_datatype_fp32 = 1, + xnn_datatype_fp16 = 2, +}; + +/// Define a tensor-type Value and add it to a subgraph. +/// +/// @param datatype - type of tensor elements. +/// @param num_dims - number of dimensions in the shape. +/// @param dims - pointer to an array of @a num_dims shape dimensions. If num_dims is 0, this pointer can be NULL. +/// @param data - pointer to static data used for tensor initialization. If the tensor is not statically initialized, +/// this pointer must be is NULL. +/// @param external_id - external ID for the Value. The ID must be within the range of reversed Value IDs specified in +/// subgraph creation. If the external ID is XNN_INVALID_VALUE_ID, an internal ID will be created +/// for the Value. +/// @param subgraph - subgraph that will own the created value. +/// @param id_out - pointer to the variable that will be initialized with the Value ID upon successful return. +enum xnn_status xnn_define_tensor_value( + xnn_subgraph_t subgraph, + enum xnn_datatype datatype, + size_t num_dims, + const size_t* dims, + const void* data, + uint32_t external_id, + uint32_t flags, + uint32_t* id_out); + +/// Define a 2D Convolution node and add it to a subgraph. +/// +/// @param input_padding_top - implicit zero-padding above 2D input data. +/// @param input_padding_right - implicit zero-padding to the right of 2D input data. +/// @param input_padding_bottom - implicit zero-padding below 2D input data. +/// @param input_padding_left - implicit zero-padding to the left of 2D input data. +/// @param kernel_height - kernel (filter) height. +/// @param kernel_width - kernel (filter) width. +/// @param subsampling_height - height of subsampling region for convolution output (convolution height stride). +/// @param subsampling_width - width of subsampling region for convolution output (convolution width stride). +/// @param dilation_height - dilation of kernel elements along the height dimension. +/// @param dilation_width - dilation of kernel elements along the width dimension. +/// @param groups - number of convolution groups. +/// @param group_input_channels - number of input channels per group. +/// @param group_output_channels - number of output channels per group. +/// @param output_min - lower bound for clipping output values. +/// @param output_max - upper bound for clipping output values. +/// @param input_id - input tensor ID. Must be a 4D tensor with [N, IH, IW, groups * group_input_channels] dimensions. +/// @param filter_id - filter tensor ID. Must ge a 4D tensor with +/// [groups * group_output_channels, kernel_height, kernel_width, group_input_channels] dimensions. +/// @param bias_id - bias tensor ID. Must be a 1D tensor with [groups * group_output_channels] dimensions. +/// @param output_id - output tensor ID. Must be a 4D tensor with [N, OH, OW, groups * group_output_channels] dimensions. +enum xnn_status xnn_define_convolution_2d( + xnn_subgraph_t subgraph, + uint32_t input_padding_top, + uint32_t input_padding_right, + uint32_t input_padding_bottom, + uint32_t input_padding_left, + uint32_t kernel_height, + uint32_t kernel_width, + uint32_t subsampling_height, + uint32_t subsampling_width, + uint32_t dilation_height, + uint32_t dilation_width, + uint32_t groups, + size_t group_input_channels, + size_t group_output_channels, + float output_min, + float output_max, + uint32_t input_id, + uint32_t filter_id, + uint32_t bias_id, + uint32_t output_id, + uint32_t flags); + +/// Define a 2D Depthwise Convolution node and add it to a subgraph. +/// +/// @param input_padding_top - implicit zero-padding above 2D input data. +/// @param input_padding_right - implicit zero-padding to the right of 2D input data. +/// @param input_padding_bottom - implicit zero-padding below 2D input data. +/// @param input_padding_left - implicit zero-padding to the left of 2D input data. +/// @param kernel_height - kernel (filter) height. +/// @param kernel_width - kernel (filter) width. +/// @param subsampling_height - height of subsampling region for convolution output (convolution height stride). +/// @param subsampling_width - width of subsampling region for convolution output (convolution width stride). +/// @param dilation_height - dilation of kernel elements along the height dimension. +/// @param dilation_width - dilation of kernel elements along the width dimension. +/// @param depth_multiplier - ratio of output channels to input channels. +/// @param input_channels - number of input channels. +/// @param output_min - lower bound for clipping output values. +/// @param output_max - upper bound for clipping output values. +/// @param input_id - input tensor. Must be a 4D tensor with [N, IH, IW, input_channels] dimensions. +/// @param filter_id - filter tensor. Must ge a 4D tensor with +/// [1, kernel_height, kernel_width, input_channels * depth_multiplier] dimensions. +/// @param bias_id - bias tensor. Must be a 1D tensor with [input_channels * depth_multiplier] dimensions. +/// @param output_id - output tensor. Must be a 4D tensor with [N, OH, OW, input_channels * depth_multiplier] dimensions. +enum xnn_status xnn_define_depthwise_convolution_2d( + xnn_subgraph_t subgraph, + uint32_t input_padding_top, + uint32_t input_padding_right, + uint32_t input_padding_bottom, + uint32_t input_padding_left, + uint32_t kernel_height, + uint32_t kernel_width, + uint32_t subsampling_height, + uint32_t subsampling_width, + uint32_t dilation_height, + uint32_t dilation_width, + uint32_t depth_multiplier, + size_t input_channels, + float output_min, + float output_max, + uint32_t input_id, + uint32_t filter_id, + uint32_t bias_id, + uint32_t output_id, + uint32_t flags); + +typedef struct xnn_runtime* xnn_runtime_t; + +enum xnn_status xnn_create_runtime( + xnn_subgraph_t subgraph, + xnn_runtime_t* runtime_out); + +struct xnn_external_value { + uint32_t id; + void* data; +}; + +enum xnn_status xnn_setup_runtime( + xnn_runtime_t runtime, + size_t num_external_values, + const struct xnn_external_value* external_values); + +enum xnn_status xnn_invoke_runtime( + xnn_runtime_t runtime); + +enum xnn_status xnn_delete_runtime( + xnn_runtime_t runtime); + typedef struct xnn_operator* xnn_operator_t; enum xnn_status xnn_run_operator( diff --git a/src/runtime.c b/src/runtime.c new file mode 100644 index 000000000..d95abad20 --- /dev/null +++ b/src/runtime.c @@ -0,0 +1,263 @@ +// Copyright 2020 Google LLC +// +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. + +#include <math.h> +#include <stddef.h> +#include <stdint.h> +#include <stdlib.h> +#include <stdio.h> + +#include <xnnpack.h> +#include <xnnpack/allocator.h> +#include <xnnpack/log.h> +#include <xnnpack/math.h> +#include <xnnpack/operator.h> +#include <xnnpack/params.h> +#include <xnnpack/subgraph.h> + + +enum xnn_status xnn_create_runtime( + xnn_subgraph_t subgraph, + xnn_runtime_t* runtime_out) +{ + struct xnn_runtime* runtime = NULL; + enum xnn_status status = xnn_status_uninitialized; + + if (!xnn_params.initialized) { + xnn_log_error("failed to create runtime: XNNPACK is not initialized"); + goto error; + } + + status = xnn_status_out_of_memory; + + runtime = xnn_allocate_zero_memory(sizeof(struct xnn_runtime)); + if (runtime == NULL) { + xnn_log_error("failed to allocate %zu bytes for runtime descriptor", sizeof(struct xnn_runtime)); + goto error; + } + + runtime->ops = xnn_allocate_zero_memory(sizeof(struct xnn_operator_data) * subgraph->num_nodes); + if (runtime->ops == NULL) { + xnn_log_error("failed to allocate %zu bytes for opdata descriptors", + sizeof(struct xnn_operator_data) * subgraph->num_nodes); + goto error; + } + runtime->num_ops = subgraph->num_nodes; + + struct xnn_value* values = subgraph->values; + for (size_t i = 0; i < subgraph->num_nodes; i++) { + const struct xnn_node* node = subgraph->nodes + i; + switch (node->type) { + case xnn_node_type_convolution_2d: + status = xnn_create_convolution2d_nhwc_f32( + node->params.convolution_2d.input_padding_top, + node->params.convolution_2d.input_padding_right, + node->params.convolution_2d.input_padding_bottom, + node->params.convolution_2d.input_padding_left, + node->params.convolution_2d.kernel_height, + node->params.convolution_2d.kernel_width, + node->params.convolution_2d.subsampling_height, + node->params.convolution_2d.subsampling_width, + node->params.convolution_2d.dilation_height, + node->params.convolution_2d.dilation_width, + node->params.convolution_2d.groups, + node->params.convolution_2d.group_input_channels, + node->params.convolution_2d.group_output_channels, + node->params.convolution_2d.group_input_channels * node->params.convolution_2d.groups /* input_pixel_stride */, + node->params.convolution_2d.group_output_channels * node->params.convolution_2d.groups /* output_pixel_stride */, + values[node->inputs.convolution_2d.filter].data, + values[node->inputs.convolution_2d.bias].data, + node->params.convolution_2d.output_min, + node->params.convolution_2d.output_max, + node->flags, + &runtime->ops[i].op); + if (status != xnn_status_success) { + goto error; + } + runtime->ops[i].batch_size = subgraph->values[node->inputs.raw[0]].shape.dim[0]; + runtime->ops[i].input_height = subgraph->values[node->inputs.raw[0]].shape.dim[1]; + runtime->ops[i].input_width = subgraph->values[node->inputs.raw[0]].shape.dim[2]; + runtime->ops[i].inputs[0] = node->inputs.raw[0]; + runtime->ops[i].outputs[0] = node->outputs.raw[0]; + break; + case xnn_node_type_depthwise_convolution_2d: + status = xnn_create_convolution2d_nhwc_f32( + node->params.depthwise_convolution_2d.input_padding_top, + node->params.depthwise_convolution_2d.input_padding_right, + node->params.depthwise_convolution_2d.input_padding_bottom, + node->params.depthwise_convolution_2d.input_padding_left, + node->params.depthwise_convolution_2d.kernel_height, + node->params.depthwise_convolution_2d.kernel_width, + node->params.depthwise_convolution_2d.subsampling_height, + node->params.depthwise_convolution_2d.subsampling_width, + node->params.depthwise_convolution_2d.dilation_height, + node->params.depthwise_convolution_2d.dilation_width, + node->params.depthwise_convolution_2d.input_channels /* groups */, + 1 /* group_input_channels */, + node->params.depthwise_convolution_2d.depth_multiplier /* group_output_channels */, + node->params.depthwise_convolution_2d.input_channels /* input_pixel_stride */, + node->params.depthwise_convolution_2d.input_channels * node->params.depthwise_convolution_2d.depth_multiplier /* output_pixel_stride */, + values[node->inputs.convolution_2d.filter].data, + values[node->inputs.convolution_2d.bias].data, + node->params.depthwise_convolution_2d.output_min, + node->params.depthwise_convolution_2d.output_max, + node->flags | XNN_FLAG_DEPTHWISE_CONVOLUTION, + &runtime->ops[i].op); + if (status != xnn_status_success) { + goto error; + } + runtime->ops[i].batch_size = subgraph->values[node->inputs.raw[0]].shape.dim[0]; + runtime->ops[i].input_height = subgraph->values[node->inputs.raw[0]].shape.dim[1]; + runtime->ops[i].input_width = subgraph->values[node->inputs.raw[0]].shape.dim[2]; + runtime->ops[i].inputs[0] = node->inputs.raw[0]; + runtime->ops[i].outputs[0] = node->outputs.raw[0]; + break; + case xnn_node_type_invalid: + xnn_log_fatal("unexpected node type %d in node #%zu", node->type, i); + XNN_UNREACHABLE; + break; + } + } + + runtime->blobs = xnn_allocate_zero_memory(sizeof(struct xnn_blob) * subgraph->num_values); + if (runtime->blobs == NULL) { + xnn_log_error("failed to allocate %zu bytes for blob descriptors", + sizeof(struct xnn_blob) * subgraph->num_values); + goto error; + } + runtime->num_blobs = subgraph->num_values; + + size_t buffer_size = 0; + for (size_t i = 0; i < subgraph->num_values; i++) { + const struct xnn_value* value = &subgraph->values[i]; + struct xnn_blob* blob = &runtime->blobs[i]; + if (values->datatype != xnn_datatype_invalid && value->type == xnn_value_type_dense_tensor) { + blob->size = xnn_tensor_get_size(subgraph, i); + if (value->data == NULL) { + if ((value->flags & (XNN_VALUE_FLAG_EXTERNAL_INPUT | XNN_VALUE_FLAG_EXTERNAL_OUTPUT)) == 0) { + // Value is purely internal to the runtime, and must be allocated in its workspace. + buffer_size = round_up_po2(buffer_size + blob->size, XNN_EXTRA_BYTES); + } else { + // Value is non-static and external to the runtime: must be specified via a call to xnn_setup_runtime. + blob->external = true; + } + } + } + } + + runtime->workspace = xnn_allocate_simd_memory(buffer_size); + if (runtime->workspace == NULL) { + xnn_log_error("failed to allocate %zu bytes to runtime workspace", buffer_size); + goto error; + } + + size_t buffer_offset = 0; + for (size_t i = 0; i < subgraph->num_values; i++) { + const struct xnn_value* value = &subgraph->values[i]; + struct xnn_blob* blob = &runtime->blobs[i]; + if (values->datatype != xnn_datatype_invalid && value->type == xnn_value_type_dense_tensor) { + if (value->data == NULL && !blob->external) { + // Value is purely internal to the runtime, allocate it in the workspace. + blob->data = (void*) ((uintptr_t) runtime->workspace + buffer_offset); + buffer_offset = round_up_po2(buffer_offset + blob->size, XNN_EXTRA_BYTES); + } + } + } + + *runtime_out = runtime; + return xnn_status_success; + +error: + xnn_delete_runtime(runtime); + return status; +} + +enum xnn_status xnn_setup_runtime( + xnn_runtime_t runtime, + size_t num_external_values, + const struct xnn_external_value* external_values) +{ + // Validate inputs without changing internal state. + // This ensures that runtime stays in consistent state in case validation fails midway. + for (size_t i = 0; i < num_external_values; i++) { + const struct xnn_external_value* external_value = &external_values[i]; + const uint32_t value_id = external_value->id; + if (value_id >= runtime->num_blobs) { + xnn_log_error("failed to setup runtime: out-of-bounds ID %" PRIu32 " in external value #%zu", + value_id, i); + return xnn_status_invalid_parameter; + } + + const struct xnn_blob* blob = &runtime->blobs[value_id]; + if (!blob->external) { + xnn_log_error("failed to setup runtime: Value %" PRIu32 " is not external", value_id); + return xnn_status_invalid_parameter; + } + } + + // Apply runtime state changes. + for (size_t i = 0; i < num_external_values; i++) { + const struct xnn_external_value* external_value = &external_values[i]; + const uint32_t value_id = external_value->id; + struct xnn_blob* blob = &runtime->blobs[value_id]; + blob->data = external_value->data; + } + + for (size_t i = 0; i < runtime->num_ops; i++) { + const struct xnn_operator_data* op = &runtime->ops[i]; + enum xnn_status status = xnn_status_success; + switch (op->op->type) { + case xnn_operator_type_convolution_nhwc_f32: + status = xnn_setup_convolution2d_nhwc_f32( + op->op, + op->batch_size, + op->input_height, + op->input_width, + runtime->blobs[op->inputs[0]].data, + runtime->blobs[op->outputs[0]].data, + NULL /* threadpool */); + break; + default: + xnn_log_fatal("unexpected operator type %d in operator #%zu", op->op->type, i); + XNN_UNREACHABLE; + } + if (status != xnn_status_success) { + xnn_log_error("failed to setup runtime: error in operator #%zu", i); + return status; + } + } + + return xnn_status_success; +} + +enum xnn_status xnn_invoke_runtime( + xnn_runtime_t runtime) +{ + for (size_t i = 0; i < runtime->num_ops; i++) { + const enum xnn_status status = xnn_run_operator(runtime->ops[i].op, NULL /* thread pool */); + if (status != xnn_status_success) { + return status; + } + } + return xnn_status_success; +} + +enum xnn_status xnn_delete_runtime( + xnn_runtime_t runtime) +{ + if (runtime != NULL) { + if (runtime->ops != NULL) { + for (size_t i = 0; i < runtime->num_ops; i++) { + xnn_delete_operator(runtime->ops[i].op); + } + xnn_release_memory(runtime->ops); + + xnn_release_memory(runtime->blobs); + xnn_release_memory(runtime->workspace); + } + xnn_release_memory(runtime); + } + return xnn_status_success; +} diff --git a/src/subgraph.c b/src/subgraph.c new file mode 100644 index 000000000..1c0a565e8 --- /dev/null +++ b/src/subgraph.c @@ -0,0 +1,425 @@ +// Copyright 2020 Google LLC +// +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. + +#include <math.h> +#include <stddef.h> +#include <stdint.h> +#include <stdlib.h> + +#include <xnnpack.h> +#include <xnnpack/allocator.h> +#include <xnnpack/log.h> +#include <xnnpack/math.h> +#include <xnnpack/params.h> +#include <xnnpack/subgraph.h> + + +enum xnn_status xnn_create_subgraph( + uint32_t external_value_ids, + uint32_t flags, + xnn_subgraph_t* subgraph_out) +{ + struct xnn_subgraph* subgraph = NULL; + enum xnn_status status = xnn_status_uninitialized; + + if (!xnn_params.initialized) { + xnn_log_error("failed to create subgraph: XNNPACK is not initialized"); + goto error; + } + + status = xnn_status_out_of_memory; + + subgraph = xnn_allocate_zero_memory(sizeof(struct xnn_subgraph)); + if (subgraph == NULL) { + xnn_log_error("failed to allocate %zu bytes for subgraph descriptor", sizeof(struct xnn_subgraph)); + goto error; + } + + subgraph->external_value_ids = external_value_ids; + + subgraph->values = xnn_allocate_zero_memory(external_value_ids * sizeof(struct xnn_value)); + if (subgraph->values == NULL) { + xnn_log_error("failed to allocate %zu bytes for subgraph values", external_value_ids * sizeof(struct xnn_value)); + goto error; + } + for (size_t i = 0; i < external_value_ids; i++) { + subgraph->values[i].id = i; + } + subgraph->num_values = external_value_ids; + subgraph->num_reserved_values = external_value_ids; + + *subgraph_out = subgraph; + return xnn_status_success; + +error: + xnn_delete_subgraph(subgraph); + return status; +} + + +struct xnn_value* xnn_subgraph_new_internal_value(xnn_subgraph_t subgraph) +{ + struct xnn_value* values = subgraph->values; + const size_t size = subgraph->num_values; + const size_t capacity = subgraph->num_reserved_values; + if (capacity < size + 1) { + const size_t new_capacity = max(min(capacity * 2, capacity + 512), capacity + 64); + assert(new_capacity >= size + 1); + values = xnn_reallocate_memory(values, new_capacity * sizeof(struct xnn_value)); + if (values == NULL) { + xnn_log_error("failed to allocate %zu bytes for subgraph values", + capacity * sizeof(struct xnn_value)); + return values; + } + + memset(values + size, 0, (new_capacity - size) * sizeof(struct xnn_value)); + subgraph->num_reserved_values = new_capacity; + subgraph->values = values; + } + subgraph->num_values = size + 1; + struct xnn_value* new_value = values + size; + new_value->id = size; + return new_value; +} + +struct xnn_node* xnn_subgraph_new_node(xnn_subgraph_t subgraph) +{ + struct xnn_node* nodes = subgraph->nodes; + const size_t size = subgraph->num_nodes; + const size_t capacity = subgraph->num_reserved_nodes; + + if (capacity < size + 1) { + const size_t new_capacity = max(min(capacity * 2, capacity + 512), capacity + 64); + assert(new_capacity >= size + 1); + nodes = xnn_reallocate_memory(nodes, new_capacity * sizeof(struct xnn_node)); + if (nodes == NULL) { + xnn_log_error("failed to allocate %zu bytes for subgraph nodes", + capacity * sizeof(struct xnn_node)); + return nodes; + } + + memset(nodes + size, 0, (new_capacity - size) * sizeof(struct xnn_node)); + subgraph->num_reserved_nodes = new_capacity; + subgraph->nodes = nodes; + } + subgraph->num_nodes = size + 1; + struct xnn_node* new_node = nodes + size; + new_node->id = size; + return new_node; +} + +enum xnn_status xnn_define_convolution_2d( + xnn_subgraph_t subgraph, + uint32_t input_padding_top, + uint32_t input_padding_right, + uint32_t input_padding_bottom, + uint32_t input_padding_left, + uint32_t kernel_height, + uint32_t kernel_width, + uint32_t subsampling_height, + uint32_t subsampling_width, + uint32_t dilation_height, + uint32_t dilation_width, + uint32_t groups, + size_t group_input_channels, + size_t group_output_channels, + float output_min, + float output_max, + uint32_t input_id, + uint32_t filter_id, + uint32_t bias_id, + uint32_t output_id, + uint32_t flags) +{ + if (!xnn_params.initialized) { + xnn_log_error("failed to define Convolution operator: XNNPACK is not initialized"); + return xnn_status_uninitialized; + } + + if (kernel_width == 0 || kernel_height == 0) { + xnn_log_error( + "failed to define Convolution operator with %" PRIu32 "x%" PRIu32 " kernel: kernel dimensions must be non-zero", + kernel_width, kernel_height); + return xnn_status_invalid_parameter; + } + + if (subsampling_width == 0 || subsampling_height == 0) { + xnn_log_error( + "failed to define Convolution operator with %" PRIu32 "x%" PRIu32 " subsampling: " + "subsampling dimensions must be non-zero", + subsampling_width, subsampling_height); + return xnn_status_invalid_parameter; + } + + if (dilation_width == 0 || dilation_height == 0) { + xnn_log_error( + "failed to define Convolution operator with %" PRIu32 "x%" PRIu32 " dilation: " + "dilation dimensions must be non-zero", + dilation_width, dilation_height); + return xnn_status_invalid_parameter; + } + + if (groups == 0) { + xnn_log_error( + "failed to define Convolution operator with %" PRIu32 " groups: number of groups must be non-zero", groups); + return xnn_status_invalid_parameter; + } + + if (group_input_channels == 0) { + xnn_log_error( + "failed to define Convolution operator with %zu input channels per group: " + "number of channels must be non-zero", + group_input_channels); + return xnn_status_invalid_parameter; + } + + if (group_output_channels == 0) { + xnn_log_error( + "failed to define Convolution operator with %zu output channels per group: " + "number of channels must be non-zero", + group_output_channels); + return xnn_status_invalid_parameter; + } + + if (isnan(output_min)) { + xnn_log_error( + "failed to define Convolution operator with NaN output lower bound: lower bound must be non-NaN"); + return xnn_status_invalid_parameter; + } + + if (isnan(output_max)) { + xnn_log_error( + "failed to define Convolution operator with NaN output upper bound: upper bound must be non-NaN"); + return xnn_status_invalid_parameter; + } + + if (output_min >= output_max) { + xnn_log_error( + "failed to define Convolution operator with [%.7g, %.7g] output range: " + "lower bound must be below upper bound", + output_min, output_max); + return xnn_status_invalid_parameter; + } + + if (input_id >= subgraph->num_values) { + xnn_log_error( + "failed to define Convolution operator with input ID #%" PRIu32 ": invalid Value ID", + input_id); + return xnn_status_invalid_parameter; + } + + if (filter_id >= subgraph->num_values) { + xnn_log_error( + "failed to define Convolution operator with filter ID #%" PRIu32 ": invalid Value ID", + filter_id); + return xnn_status_invalid_parameter; + } + + if (bias_id >= subgraph->num_values) { + xnn_log_error( + "failed to define Convolution operator with bias ID #%" PRIu32 ": invalid Value ID", + bias_id); + return xnn_status_invalid_parameter; + } + + if (output_id >= subgraph->num_values) { + xnn_log_error( + "failed to define Convolution operator with output ID #%" PRIu32 ": invalid Value ID", + output_id); + return xnn_status_invalid_parameter; + } + + struct xnn_node* node = xnn_subgraph_new_node(subgraph); + if (node == NULL) { + return xnn_status_out_of_memory; + } + + node->type = xnn_node_type_convolution_2d; + node->params.convolution_2d.input_padding_top = input_padding_top; + node->params.convolution_2d.input_padding_right = input_padding_right; + node->params.convolution_2d.input_padding_bottom = input_padding_bottom; + node->params.convolution_2d.input_padding_left = input_padding_left; + node->params.convolution_2d.kernel_height = kernel_height; + node->params.convolution_2d.kernel_width = kernel_width; + node->params.convolution_2d.subsampling_height = subsampling_height; + node->params.convolution_2d.subsampling_width = subsampling_width; + node->params.convolution_2d.dilation_height = dilation_height; + node->params.convolution_2d.dilation_width = dilation_width; + node->params.convolution_2d.groups = groups; + node->params.convolution_2d.group_input_channels = group_input_channels; + node->params.convolution_2d.group_output_channels = group_output_channels; + node->params.convolution_2d.output_min = output_min; + node->params.convolution_2d.output_max = output_max; + node->num_inputs = 3; + node->inputs.raw[0] = input_id; + node->inputs.raw[1] = filter_id; + node->inputs.raw[2] = bias_id; + node->num_outputs = 1; + node->outputs.raw[0] = output_id; + node->flags = flags; + + return xnn_status_success; +}; + +enum xnn_status xnn_define_depthwise_convolution_2d( + xnn_subgraph_t subgraph, + uint32_t input_padding_top, + uint32_t input_padding_right, + uint32_t input_padding_bottom, + uint32_t input_padding_left, + uint32_t kernel_height, + uint32_t kernel_width, + uint32_t subsampling_height, + uint32_t subsampling_width, + uint32_t dilation_height, + uint32_t dilation_width, + uint32_t depth_multiplier, + size_t input_channels, + float output_min, + float output_max, + uint32_t input_id, + uint32_t filter_id, + uint32_t bias_id, + uint32_t output_id, + uint32_t flags) +{ + if (!xnn_params.initialized) { + xnn_log_error("failed to define Depthwise Convolution operator: XNNPACK is not initialized"); + return xnn_status_uninitialized; + } + + if (kernel_width == 0 || kernel_height == 0) { + xnn_log_error( + "failed to define Depthwise Convolution operator with %" PRIu32 "x%" PRIu32 " kernel: kernel dimensions must be non-zero", + kernel_width, kernel_height); + return xnn_status_invalid_parameter; + } + + if (subsampling_width == 0 || subsampling_height == 0) { + xnn_log_error( + "failed to define Depthwise Convolution operator with %" PRIu32 "x%" PRIu32 " subsampling: " + "subsampling dimensions must be non-zero", + subsampling_width, subsampling_height); + return xnn_status_invalid_parameter; + } + + if (dilation_width == 0 || dilation_height == 0) { + xnn_log_error( + "failed to define Depthwise Convolution operator with %" PRIu32 "x%" PRIu32 " dilation: " + "dilation dimensions must be non-zero", + dilation_width, dilation_height); + return xnn_status_invalid_parameter; + } + + if (depth_multiplier == 0) { + xnn_log_error( + "failed to define Depthwise Convolution operator with %" PRIu32 " depth multiplier: " + "depth multiplier must be non-zero", + depth_multiplier); + return xnn_status_invalid_parameter; + } + + if (input_channels == 0) { + xnn_log_error( + "failed to define Depthwise Convolution operator with %zu input channels: " + "number of channels must be non-zero", + input_channels); + return xnn_status_invalid_parameter; + } + + if (isnan(output_min)) { + xnn_log_error( + "failed to define Depthwise Convolution operator with NaN output lower bound: lower bound must be non-NaN"); + return xnn_status_invalid_parameter; + } + + if (isnan(output_max)) { + xnn_log_error( + "failed to define Depthwise Convolution operator with NaN output upper bound: upper bound must be non-NaN"); + return xnn_status_invalid_parameter; + } + + if (output_min >= output_max) { + xnn_log_error( + "failed to define Depthwise Convolution operator with [%.7g, %.7g] output range: " + "lower bound must be below upper bound", + output_min, output_max); + return xnn_status_invalid_parameter; + } + + if (input_id >= subgraph->num_values) { + xnn_log_error( + "failed to define Depthwise Convolution operator with input ID #%" PRIu32 ": invalid Value ID", + input_id); + return xnn_status_invalid_parameter; + } + + if (filter_id >= subgraph->num_values) { + xnn_log_error( + "failed to define Depthwise Convolution operator with filter ID #%" PRIu32 ": invalid Value ID", + filter_id); + return xnn_status_invalid_parameter; + } + + if (bias_id >= subgraph->num_values) { + xnn_log_error( + "failed to define Depthwise Convolution operator with bias ID #%" PRIu32 ": invalid Value ID", + bias_id); + return xnn_status_invalid_parameter; + } + + if (output_id >= subgraph->num_values) { + xnn_log_error( + "failed to define Depthwise Convolution operator with output ID #%" PRIu32 ": invalid Value ID", + output_id); + return xnn_status_invalid_parameter; + } + + struct xnn_node* node = xnn_subgraph_new_node(subgraph); + if (node == NULL) { + return xnn_status_out_of_memory; + } + + node->type = xnn_node_type_depthwise_convolution_2d; + node->params.depthwise_convolution_2d.input_padding_top = input_padding_top; + node->params.depthwise_convolution_2d.input_padding_right = input_padding_right; + node->params.depthwise_convolution_2d.input_padding_bottom = input_padding_bottom; + node->params.depthwise_convolution_2d.input_padding_left = input_padding_left; + node->params.depthwise_convolution_2d.kernel_height = kernel_height; + node->params.depthwise_convolution_2d.kernel_width = kernel_width; + node->params.depthwise_convolution_2d.subsampling_height = subsampling_height; + node->params.depthwise_convolution_2d.subsampling_width = subsampling_width; + node->params.depthwise_convolution_2d.dilation_height = dilation_height; + node->params.depthwise_convolution_2d.dilation_width = dilation_width; + node->params.depthwise_convolution_2d.depth_multiplier = depth_multiplier; + node->params.depthwise_convolution_2d.input_channels = input_channels; + node->params.depthwise_convolution_2d.output_min = output_min; + node->params.depthwise_convolution_2d.output_max = output_max; + node->num_inputs = 3; + node->inputs.raw[0] = input_id; + node->inputs.raw[1] = filter_id; + node->inputs.raw[2] = bias_id; + node->num_outputs = 1; + node->outputs.raw[0] = output_id; + node->flags = flags; + + return xnn_status_success; +}; + +enum xnn_status xnn_delete_subgraph( + xnn_subgraph_t subgraph) +{ + if (subgraph != NULL) { + memset(subgraph->nodes, 0, sizeof(struct xnn_node) * subgraph->num_nodes); + xnn_release_memory(subgraph->nodes); + + memset(subgraph->values, 0, sizeof(struct xnn_value) * subgraph->num_values); + xnn_release_memory(subgraph->values); + + memset(subgraph, 0, sizeof(struct xnn_subgraph)); + xnn_release_memory(subgraph); + } + return xnn_status_success; +} diff --git a/src/tensor.c b/src/tensor.c new file mode 100644 index 000000000..8730e4502 --- /dev/null +++ b/src/tensor.c @@ -0,0 +1,101 @@ +// Copyright 2020 Google LLC +// +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. + +#include <assert.h> +#include <stddef.h> +#include <stdint.h> +#include <stdlib.h> + +#include <xnnpack.h> +#include <xnnpack/allocator.h> +#include <xnnpack/log.h> +#include <xnnpack/params.h> +#include <xnnpack/subgraph.h> + + +enum xnn_status xnn_define_tensor_value( + xnn_subgraph_t subgraph, + enum xnn_datatype datatype, + size_t num_dims, + const size_t* dims, + const void* data, + uint32_t external_id, + uint32_t flags, + uint32_t* id_out) +{ + if (!xnn_params.initialized) { + xnn_log_error("failed to create Dense Tensor value: XNNPACK is not initialized"); + return xnn_status_uninitialized; + } + + if (external_id != XNN_INVALID_VALUE_ID && external_id >= subgraph->external_value_ids) { + xnn_log_error( + "failed to create Dense Tensor value: " + "external ID %" PRIu32 " exceeds the number of reserved external IDs in subgraph (%" PRIu32 ")", + external_id, subgraph->external_value_ids); + return xnn_status_invalid_parameter; + } + + if (num_dims > XNN_MAX_TENSOR_DIMS) { + xnn_log_error("failed to create Dense Tensor value: num of dimensions exceeds XNNPACK limit (%d)", + XNN_MAX_TENSOR_DIMS); + return xnn_status_unsupported_parameter; + } + + switch (datatype) { + case xnn_datatype_fp32: + case xnn_datatype_fp16: + break; + default: + xnn_log_error("failed to create Dense Tensor value: invalid data type (%d)", datatype); + return xnn_status_unsupported_parameter; + } + + struct xnn_value* value = subgraph->values + external_id; + if (external_id == XNN_INVALID_VALUE_ID) { + value = xnn_subgraph_new_internal_value(subgraph); + if (value == NULL) { + return xnn_status_out_of_memory; + } + } + value->type = xnn_value_type_dense_tensor; + value->datatype = datatype; + value->shape.num_dims = num_dims; + memcpy(value->shape.dim, dims, num_dims * sizeof(size_t)); + value->flags = flags; + value->data = data; + + *id_out = value->id; + return xnn_status_success; +} + +size_t xnn_tensor_get_size( + xnn_subgraph_t subgraph, + uint32_t value_id) +{ + assert(value_id < subgraph->num_values); + + const struct xnn_value* value = subgraph->values + value_id; + assert(value->type == xnn_value_type_dense_tensor); + assert(value->datatype != xnn_datatype_invalid); + + size_t size = 0; + switch (value->datatype) { + case xnn_datatype_fp16: + size = 2; + break; + case xnn_datatype_fp32: + size = 4; + break; + case xnn_datatype_invalid: + XNN_UNREACHABLE; + } + + for (size_t i = 0; i < value->shape.num_dims; i++) { + size *= value->shape.dim[i]; + } + + return size; +} diff --git a/src/xnnpack/allocator.h b/src/xnnpack/allocator.h index fdcfce5bf..a39387416 100644 --- a/src/xnnpack/allocator.h +++ b/src/xnnpack/allocator.h @@ -25,6 +25,14 @@ inline static void* xnn_allocate_memory(size_t memory_size) { return xnn_params.allocator.allocate(xnn_params.allocator.context, memory_size); } +inline static void* xnn_allocate_zero_memory(size_t memory_size) { + void* memory_pointer = xnn_params.allocator.allocate(xnn_params.allocator.context, memory_size); + if (memory_pointer != NULL) { + memset(memory_pointer, 0, memory_size); + } + return memory_pointer; +} + inline static void* xnn_reallocate_memory(void* memory_pointer, size_t memory_size) { return xnn_params.allocator.reallocate(xnn_params.allocator.context, memory_pointer, memory_size); } diff --git a/src/xnnpack/subgraph.h b/src/xnnpack/subgraph.h new file mode 100644 index 000000000..76fee6215 --- /dev/null +++ b/src/xnnpack/subgraph.h @@ -0,0 +1,168 @@ +// Copyright 2020 Google LLC +// +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. + +#pragma once + +#include <stddef.h> +#include <stdint.h> + +#include <xnnpack.h> + +#define XNN_MAX_INPUTS 3 +#define XNN_MAX_OUTPUTS 1 + +#define XNN_MAX_RUNTIME_INPUTS 2 +#define XNN_MAX_RUNTIME_OUTPUTS 1 + +struct xnn_shape { + size_t num_dims; + size_t dim[XNN_MAX_TENSOR_DIMS]; +}; + +enum xnn_value_type { + xnn_value_type_invalid = 0, + xnn_value_type_dense_tensor = 1, +}; + +/// Abstraction for a collections of elements produced and consumed by nodes. +struct xnn_value { + /// Unique ID for the value. + uint32_t id; + /// Type of the collection of elements. + /// + /// Currently only dense tensors are supported. + /// Other types (e.g. sparse tensors) might be supported in the future. + enum xnn_value_type type; + /// Type of elements in the collection. + enum xnn_datatype datatype; + /// Tensor shape. + struct xnn_shape shape; + /// Binary features of the tensor. Supported values are any combination of: + /// - XNN_VALUE_FLAG_EXTERNAL_INPUT + /// - XNN_VALUE_FLAG_EXTERNAL_OUTPUT + uint32_t flags; + /// Static initialization data. Must be null for non-static values. + const void* data; +}; + +struct xnn_blob { + /// Size in bytes. + size_t size; + /// Data pointer. + void* data; + bool external; +}; + +enum xnn_node_type { + xnn_node_type_invalid = 0, + xnn_node_type_convolution_2d, + xnn_node_type_depthwise_convolution_2d, +}; + +struct xnn_node { + enum xnn_node_type type; + uint32_t id; + /// Static parameters of the operator node. + union { + struct { + uint32_t input_padding_top; + uint32_t input_padding_right; + uint32_t input_padding_bottom; + uint32_t input_padding_left; + uint32_t kernel_height; + uint32_t kernel_width; + uint32_t subsampling_height; + uint32_t subsampling_width; + uint32_t dilation_height; + uint32_t dilation_width; + uint32_t groups; + size_t group_input_channels; + size_t group_output_channels; + float output_min; + float output_max; + } convolution_2d; + struct { + uint32_t input_padding_top; + uint32_t input_padding_right; + uint32_t input_padding_bottom; + uint32_t input_padding_left; + uint32_t kernel_height; + uint32_t kernel_width; + uint32_t subsampling_height; + uint32_t subsampling_width; + uint32_t dilation_height; + uint32_t dilation_width; + uint32_t depth_multiplier; + size_t input_channels; + float output_min; + float output_max; + } depthwise_convolution_2d; + } params; + /// Value IDs for node inputs. + union { + uint32_t raw[XNN_MAX_INPUTS]; + struct { + uint32_t input; + uint32_t filter; + uint32_t bias; + } convolution_2d; + } inputs; + uint32_t num_inputs; + /// Value IDs for node outputs. + union { + struct { + uint32_t output; + } convolution_2d; + uint32_t raw[XNN_MAX_OUTPUTS]; + } outputs; + uint32_t num_outputs; + uint32_t flags; +}; + +struct xnn_operator_data { + xnn_operator_t op; + size_t batch_size; + size_t input_height; + size_t input_width; + uint32_t inputs[XNN_MAX_RUNTIME_INPUTS]; + uint32_t outputs[XNN_MAX_RUNTIME_OUTPUTS]; +}; + +struct xnn_subgraph { + /// Number of Value IDs reserved for communication with external graph representation. + /// Values created during subgraph transformation avoid using IDs in [0, reserved_value_ids-1] range. + uint32_t external_value_ids; + + uint32_t num_reserved_values; + uint32_t num_values; + struct xnn_value* values; + + uint32_t num_reserved_nodes; + uint32_t num_nodes; + struct xnn_node* nodes; +}; + +/// Runtime is a combination of an execution plan for subgraph Nodes and a memory manager for subgraph Values. +struct xnn_runtime { + uint32_t num_external_values; + + /// List of operators in the execution plan, in execution order. + struct xnn_operator_data* ops; + /// Number of operators in the execution plan. + size_t num_ops; + + struct xnn_blob* blobs; + size_t num_blobs; + + void* workspace; +}; + +struct xnn_value* xnn_subgraph_new_internal_value(xnn_subgraph_t subgraph); + +struct xnn_node* xnn_subgraph_new_node(xnn_subgraph_t subgraph); + +size_t xnn_tensor_get_size( + xnn_subgraph_t subgraph, + uint32_t value_id); |