aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarat Dukhan <maratek@google.com>2020-02-03 12:23:01 -0800
committerXNNPACK Team <xnnpack-github-robot@google.com>2020-02-03 12:23:31 -0800
commit1d75a544d2bfcfd2904b7c20915ffbcbe79b8efd (patch)
tree4fef811ef32b6507cb47488bb4f65ca0e40a7345
parent03bc407c1a603b150796ece01ba9385fc6b465b3 (diff)
downloadXNNPACK-1d75a544d2bfcfd2904b7c20915ffbcbe79b8efd.tar.gz
Subgraph API
Entry point for delegation of TFLite subgraphs PiperOrigin-RevId: 292977451
-rw-r--r--BUILD.bazel4
-rw-r--r--include/xnnpack.h153
-rw-r--r--src/runtime.c263
-rw-r--r--src/subgraph.c425
-rw-r--r--src/tensor.c101
-rw-r--r--src/xnnpack/allocator.h8
-rw-r--r--src/xnnpack/subgraph.h168
7 files changed, 1122 insertions, 0 deletions
diff --git a/BUILD.bazel b/BUILD.bazel
index 619a16745..d8916440f 100644
--- a/BUILD.bazel
+++ b/BUILD.bazel
@@ -1474,6 +1474,7 @@ INTERNAL_HDRS = INTERNAL_MICROKERNEL_HDRS + [
"src/xnnpack/params-init.h",
"src/xnnpack/requantization-stubs.h",
"src/xnnpack/requantization.h",
+ "src/xnnpack/subgraph.h",
]
ACCURACY_EVAL_HDRS = INTERNAL_MICROKERNEL_HDRS + [
@@ -1818,6 +1819,9 @@ xnnpack_cc_library(
srcs = OPERATOR_SRCS + [
"src/memory.c",
"src/operator-delete.c",
+ "src/runtime.c",
+ "src/subgraph.c",
+ "src/tensor.c",
],
hdrs = INTERNAL_HDRS + LOGGING_HDRS,
copts = xnnpack_std_copts() + LOGGING_COPTS + [
diff --git a/include/xnnpack.h b/include/xnnpack.h
index 1511e6f13..1ee7827df 100644
--- a/include/xnnpack.h
+++ b/include/xnnpack.h
@@ -127,6 +127,159 @@ enum xnn_status xnn_initialize(const struct xnn_allocator* allocator);
/// @retval xnn_status_success - deinitialization call succeeded.
enum xnn_status xnn_deinitialize(void);
+typedef struct xnn_subgraph* xnn_subgraph_t;
+
+enum xnn_status xnn_create_subgraph(
+ uint32_t external_value_ids,
+ uint32_t flags,
+ xnn_subgraph_t* subgraph_out);
+
+enum xnn_status xnn_delete_subgraph(
+ xnn_subgraph_t subgraph);
+
+#define XNN_VALUE_FLAG_EXTERNAL_INPUT 0x00000001
+#define XNN_VALUE_FLAG_EXTERNAL_OUTPUT 0x00000002
+
+#define XNN_INVALID_VALUE_ID UINT32_MAX
+
+enum xnn_datatype {
+ xnn_datatype_invalid = 0,
+ xnn_datatype_fp32 = 1,
+ xnn_datatype_fp16 = 2,
+};
+
+/// Define a tensor-type Value and add it to a subgraph.
+///
+/// @param datatype - type of tensor elements.
+/// @param num_dims - number of dimensions in the shape.
+/// @param dims - pointer to an array of @a num_dims shape dimensions. If num_dims is 0, this pointer can be NULL.
+/// @param data - pointer to static data used for tensor initialization. If the tensor is not statically initialized,
+/// this pointer must be is NULL.
+/// @param external_id - external ID for the Value. The ID must be within the range of reversed Value IDs specified in
+/// subgraph creation. If the external ID is XNN_INVALID_VALUE_ID, an internal ID will be created
+/// for the Value.
+/// @param subgraph - subgraph that will own the created value.
+/// @param id_out - pointer to the variable that will be initialized with the Value ID upon successful return.
+enum xnn_status xnn_define_tensor_value(
+ xnn_subgraph_t subgraph,
+ enum xnn_datatype datatype,
+ size_t num_dims,
+ const size_t* dims,
+ const void* data,
+ uint32_t external_id,
+ uint32_t flags,
+ uint32_t* id_out);
+
+/// Define a 2D Convolution node and add it to a subgraph.
+///
+/// @param input_padding_top - implicit zero-padding above 2D input data.
+/// @param input_padding_right - implicit zero-padding to the right of 2D input data.
+/// @param input_padding_bottom - implicit zero-padding below 2D input data.
+/// @param input_padding_left - implicit zero-padding to the left of 2D input data.
+/// @param kernel_height - kernel (filter) height.
+/// @param kernel_width - kernel (filter) width.
+/// @param subsampling_height - height of subsampling region for convolution output (convolution height stride).
+/// @param subsampling_width - width of subsampling region for convolution output (convolution width stride).
+/// @param dilation_height - dilation of kernel elements along the height dimension.
+/// @param dilation_width - dilation of kernel elements along the width dimension.
+/// @param groups - number of convolution groups.
+/// @param group_input_channels - number of input channels per group.
+/// @param group_output_channels - number of output channels per group.
+/// @param output_min - lower bound for clipping output values.
+/// @param output_max - upper bound for clipping output values.
+/// @param input_id - input tensor ID. Must be a 4D tensor with [N, IH, IW, groups * group_input_channels] dimensions.
+/// @param filter_id - filter tensor ID. Must ge a 4D tensor with
+/// [groups * group_output_channels, kernel_height, kernel_width, group_input_channels] dimensions.
+/// @param bias_id - bias tensor ID. Must be a 1D tensor with [groups * group_output_channels] dimensions.
+/// @param output_id - output tensor ID. Must be a 4D tensor with [N, OH, OW, groups * group_output_channels] dimensions.
+enum xnn_status xnn_define_convolution_2d(
+ xnn_subgraph_t subgraph,
+ uint32_t input_padding_top,
+ uint32_t input_padding_right,
+ uint32_t input_padding_bottom,
+ uint32_t input_padding_left,
+ uint32_t kernel_height,
+ uint32_t kernel_width,
+ uint32_t subsampling_height,
+ uint32_t subsampling_width,
+ uint32_t dilation_height,
+ uint32_t dilation_width,
+ uint32_t groups,
+ size_t group_input_channels,
+ size_t group_output_channels,
+ float output_min,
+ float output_max,
+ uint32_t input_id,
+ uint32_t filter_id,
+ uint32_t bias_id,
+ uint32_t output_id,
+ uint32_t flags);
+
+/// Define a 2D Depthwise Convolution node and add it to a subgraph.
+///
+/// @param input_padding_top - implicit zero-padding above 2D input data.
+/// @param input_padding_right - implicit zero-padding to the right of 2D input data.
+/// @param input_padding_bottom - implicit zero-padding below 2D input data.
+/// @param input_padding_left - implicit zero-padding to the left of 2D input data.
+/// @param kernel_height - kernel (filter) height.
+/// @param kernel_width - kernel (filter) width.
+/// @param subsampling_height - height of subsampling region for convolution output (convolution height stride).
+/// @param subsampling_width - width of subsampling region for convolution output (convolution width stride).
+/// @param dilation_height - dilation of kernel elements along the height dimension.
+/// @param dilation_width - dilation of kernel elements along the width dimension.
+/// @param depth_multiplier - ratio of output channels to input channels.
+/// @param input_channels - number of input channels.
+/// @param output_min - lower bound for clipping output values.
+/// @param output_max - upper bound for clipping output values.
+/// @param input_id - input tensor. Must be a 4D tensor with [N, IH, IW, input_channels] dimensions.
+/// @param filter_id - filter tensor. Must ge a 4D tensor with
+/// [1, kernel_height, kernel_width, input_channels * depth_multiplier] dimensions.
+/// @param bias_id - bias tensor. Must be a 1D tensor with [input_channels * depth_multiplier] dimensions.
+/// @param output_id - output tensor. Must be a 4D tensor with [N, OH, OW, input_channels * depth_multiplier] dimensions.
+enum xnn_status xnn_define_depthwise_convolution_2d(
+ xnn_subgraph_t subgraph,
+ uint32_t input_padding_top,
+ uint32_t input_padding_right,
+ uint32_t input_padding_bottom,
+ uint32_t input_padding_left,
+ uint32_t kernel_height,
+ uint32_t kernel_width,
+ uint32_t subsampling_height,
+ uint32_t subsampling_width,
+ uint32_t dilation_height,
+ uint32_t dilation_width,
+ uint32_t depth_multiplier,
+ size_t input_channels,
+ float output_min,
+ float output_max,
+ uint32_t input_id,
+ uint32_t filter_id,
+ uint32_t bias_id,
+ uint32_t output_id,
+ uint32_t flags);
+
+typedef struct xnn_runtime* xnn_runtime_t;
+
+enum xnn_status xnn_create_runtime(
+ xnn_subgraph_t subgraph,
+ xnn_runtime_t* runtime_out);
+
+struct xnn_external_value {
+ uint32_t id;
+ void* data;
+};
+
+enum xnn_status xnn_setup_runtime(
+ xnn_runtime_t runtime,
+ size_t num_external_values,
+ const struct xnn_external_value* external_values);
+
+enum xnn_status xnn_invoke_runtime(
+ xnn_runtime_t runtime);
+
+enum xnn_status xnn_delete_runtime(
+ xnn_runtime_t runtime);
+
typedef struct xnn_operator* xnn_operator_t;
enum xnn_status xnn_run_operator(
diff --git a/src/runtime.c b/src/runtime.c
new file mode 100644
index 000000000..d95abad20
--- /dev/null
+++ b/src/runtime.c
@@ -0,0 +1,263 @@
+// Copyright 2020 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#include <math.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+#include <xnnpack.h>
+#include <xnnpack/allocator.h>
+#include <xnnpack/log.h>
+#include <xnnpack/math.h>
+#include <xnnpack/operator.h>
+#include <xnnpack/params.h>
+#include <xnnpack/subgraph.h>
+
+
+enum xnn_status xnn_create_runtime(
+ xnn_subgraph_t subgraph,
+ xnn_runtime_t* runtime_out)
+{
+ struct xnn_runtime* runtime = NULL;
+ enum xnn_status status = xnn_status_uninitialized;
+
+ if (!xnn_params.initialized) {
+ xnn_log_error("failed to create runtime: XNNPACK is not initialized");
+ goto error;
+ }
+
+ status = xnn_status_out_of_memory;
+
+ runtime = xnn_allocate_zero_memory(sizeof(struct xnn_runtime));
+ if (runtime == NULL) {
+ xnn_log_error("failed to allocate %zu bytes for runtime descriptor", sizeof(struct xnn_runtime));
+ goto error;
+ }
+
+ runtime->ops = xnn_allocate_zero_memory(sizeof(struct xnn_operator_data) * subgraph->num_nodes);
+ if (runtime->ops == NULL) {
+ xnn_log_error("failed to allocate %zu bytes for opdata descriptors",
+ sizeof(struct xnn_operator_data) * subgraph->num_nodes);
+ goto error;
+ }
+ runtime->num_ops = subgraph->num_nodes;
+
+ struct xnn_value* values = subgraph->values;
+ for (size_t i = 0; i < subgraph->num_nodes; i++) {
+ const struct xnn_node* node = subgraph->nodes + i;
+ switch (node->type) {
+ case xnn_node_type_convolution_2d:
+ status = xnn_create_convolution2d_nhwc_f32(
+ node->params.convolution_2d.input_padding_top,
+ node->params.convolution_2d.input_padding_right,
+ node->params.convolution_2d.input_padding_bottom,
+ node->params.convolution_2d.input_padding_left,
+ node->params.convolution_2d.kernel_height,
+ node->params.convolution_2d.kernel_width,
+ node->params.convolution_2d.subsampling_height,
+ node->params.convolution_2d.subsampling_width,
+ node->params.convolution_2d.dilation_height,
+ node->params.convolution_2d.dilation_width,
+ node->params.convolution_2d.groups,
+ node->params.convolution_2d.group_input_channels,
+ node->params.convolution_2d.group_output_channels,
+ node->params.convolution_2d.group_input_channels * node->params.convolution_2d.groups /* input_pixel_stride */,
+ node->params.convolution_2d.group_output_channels * node->params.convolution_2d.groups /* output_pixel_stride */,
+ values[node->inputs.convolution_2d.filter].data,
+ values[node->inputs.convolution_2d.bias].data,
+ node->params.convolution_2d.output_min,
+ node->params.convolution_2d.output_max,
+ node->flags,
+ &runtime->ops[i].op);
+ if (status != xnn_status_success) {
+ goto error;
+ }
+ runtime->ops[i].batch_size = subgraph->values[node->inputs.raw[0]].shape.dim[0];
+ runtime->ops[i].input_height = subgraph->values[node->inputs.raw[0]].shape.dim[1];
+ runtime->ops[i].input_width = subgraph->values[node->inputs.raw[0]].shape.dim[2];
+ runtime->ops[i].inputs[0] = node->inputs.raw[0];
+ runtime->ops[i].outputs[0] = node->outputs.raw[0];
+ break;
+ case xnn_node_type_depthwise_convolution_2d:
+ status = xnn_create_convolution2d_nhwc_f32(
+ node->params.depthwise_convolution_2d.input_padding_top,
+ node->params.depthwise_convolution_2d.input_padding_right,
+ node->params.depthwise_convolution_2d.input_padding_bottom,
+ node->params.depthwise_convolution_2d.input_padding_left,
+ node->params.depthwise_convolution_2d.kernel_height,
+ node->params.depthwise_convolution_2d.kernel_width,
+ node->params.depthwise_convolution_2d.subsampling_height,
+ node->params.depthwise_convolution_2d.subsampling_width,
+ node->params.depthwise_convolution_2d.dilation_height,
+ node->params.depthwise_convolution_2d.dilation_width,
+ node->params.depthwise_convolution_2d.input_channels /* groups */,
+ 1 /* group_input_channels */,
+ node->params.depthwise_convolution_2d.depth_multiplier /* group_output_channels */,
+ node->params.depthwise_convolution_2d.input_channels /* input_pixel_stride */,
+ node->params.depthwise_convolution_2d.input_channels * node->params.depthwise_convolution_2d.depth_multiplier /* output_pixel_stride */,
+ values[node->inputs.convolution_2d.filter].data,
+ values[node->inputs.convolution_2d.bias].data,
+ node->params.depthwise_convolution_2d.output_min,
+ node->params.depthwise_convolution_2d.output_max,
+ node->flags | XNN_FLAG_DEPTHWISE_CONVOLUTION,
+ &runtime->ops[i].op);
+ if (status != xnn_status_success) {
+ goto error;
+ }
+ runtime->ops[i].batch_size = subgraph->values[node->inputs.raw[0]].shape.dim[0];
+ runtime->ops[i].input_height = subgraph->values[node->inputs.raw[0]].shape.dim[1];
+ runtime->ops[i].input_width = subgraph->values[node->inputs.raw[0]].shape.dim[2];
+ runtime->ops[i].inputs[0] = node->inputs.raw[0];
+ runtime->ops[i].outputs[0] = node->outputs.raw[0];
+ break;
+ case xnn_node_type_invalid:
+ xnn_log_fatal("unexpected node type %d in node #%zu", node->type, i);
+ XNN_UNREACHABLE;
+ break;
+ }
+ }
+
+ runtime->blobs = xnn_allocate_zero_memory(sizeof(struct xnn_blob) * subgraph->num_values);
+ if (runtime->blobs == NULL) {
+ xnn_log_error("failed to allocate %zu bytes for blob descriptors",
+ sizeof(struct xnn_blob) * subgraph->num_values);
+ goto error;
+ }
+ runtime->num_blobs = subgraph->num_values;
+
+ size_t buffer_size = 0;
+ for (size_t i = 0; i < subgraph->num_values; i++) {
+ const struct xnn_value* value = &subgraph->values[i];
+ struct xnn_blob* blob = &runtime->blobs[i];
+ if (values->datatype != xnn_datatype_invalid && value->type == xnn_value_type_dense_tensor) {
+ blob->size = xnn_tensor_get_size(subgraph, i);
+ if (value->data == NULL) {
+ if ((value->flags & (XNN_VALUE_FLAG_EXTERNAL_INPUT | XNN_VALUE_FLAG_EXTERNAL_OUTPUT)) == 0) {
+ // Value is purely internal to the runtime, and must be allocated in its workspace.
+ buffer_size = round_up_po2(buffer_size + blob->size, XNN_EXTRA_BYTES);
+ } else {
+ // Value is non-static and external to the runtime: must be specified via a call to xnn_setup_runtime.
+ blob->external = true;
+ }
+ }
+ }
+ }
+
+ runtime->workspace = xnn_allocate_simd_memory(buffer_size);
+ if (runtime->workspace == NULL) {
+ xnn_log_error("failed to allocate %zu bytes to runtime workspace", buffer_size);
+ goto error;
+ }
+
+ size_t buffer_offset = 0;
+ for (size_t i = 0; i < subgraph->num_values; i++) {
+ const struct xnn_value* value = &subgraph->values[i];
+ struct xnn_blob* blob = &runtime->blobs[i];
+ if (values->datatype != xnn_datatype_invalid && value->type == xnn_value_type_dense_tensor) {
+ if (value->data == NULL && !blob->external) {
+ // Value is purely internal to the runtime, allocate it in the workspace.
+ blob->data = (void*) ((uintptr_t) runtime->workspace + buffer_offset);
+ buffer_offset = round_up_po2(buffer_offset + blob->size, XNN_EXTRA_BYTES);
+ }
+ }
+ }
+
+ *runtime_out = runtime;
+ return xnn_status_success;
+
+error:
+ xnn_delete_runtime(runtime);
+ return status;
+}
+
+enum xnn_status xnn_setup_runtime(
+ xnn_runtime_t runtime,
+ size_t num_external_values,
+ const struct xnn_external_value* external_values)
+{
+ // Validate inputs without changing internal state.
+ // This ensures that runtime stays in consistent state in case validation fails midway.
+ for (size_t i = 0; i < num_external_values; i++) {
+ const struct xnn_external_value* external_value = &external_values[i];
+ const uint32_t value_id = external_value->id;
+ if (value_id >= runtime->num_blobs) {
+ xnn_log_error("failed to setup runtime: out-of-bounds ID %" PRIu32 " in external value #%zu",
+ value_id, i);
+ return xnn_status_invalid_parameter;
+ }
+
+ const struct xnn_blob* blob = &runtime->blobs[value_id];
+ if (!blob->external) {
+ xnn_log_error("failed to setup runtime: Value %" PRIu32 " is not external", value_id);
+ return xnn_status_invalid_parameter;
+ }
+ }
+
+ // Apply runtime state changes.
+ for (size_t i = 0; i < num_external_values; i++) {
+ const struct xnn_external_value* external_value = &external_values[i];
+ const uint32_t value_id = external_value->id;
+ struct xnn_blob* blob = &runtime->blobs[value_id];
+ blob->data = external_value->data;
+ }
+
+ for (size_t i = 0; i < runtime->num_ops; i++) {
+ const struct xnn_operator_data* op = &runtime->ops[i];
+ enum xnn_status status = xnn_status_success;
+ switch (op->op->type) {
+ case xnn_operator_type_convolution_nhwc_f32:
+ status = xnn_setup_convolution2d_nhwc_f32(
+ op->op,
+ op->batch_size,
+ op->input_height,
+ op->input_width,
+ runtime->blobs[op->inputs[0]].data,
+ runtime->blobs[op->outputs[0]].data,
+ NULL /* threadpool */);
+ break;
+ default:
+ xnn_log_fatal("unexpected operator type %d in operator #%zu", op->op->type, i);
+ XNN_UNREACHABLE;
+ }
+ if (status != xnn_status_success) {
+ xnn_log_error("failed to setup runtime: error in operator #%zu", i);
+ return status;
+ }
+ }
+
+ return xnn_status_success;
+}
+
+enum xnn_status xnn_invoke_runtime(
+ xnn_runtime_t runtime)
+{
+ for (size_t i = 0; i < runtime->num_ops; i++) {
+ const enum xnn_status status = xnn_run_operator(runtime->ops[i].op, NULL /* thread pool */);
+ if (status != xnn_status_success) {
+ return status;
+ }
+ }
+ return xnn_status_success;
+}
+
+enum xnn_status xnn_delete_runtime(
+ xnn_runtime_t runtime)
+{
+ if (runtime != NULL) {
+ if (runtime->ops != NULL) {
+ for (size_t i = 0; i < runtime->num_ops; i++) {
+ xnn_delete_operator(runtime->ops[i].op);
+ }
+ xnn_release_memory(runtime->ops);
+
+ xnn_release_memory(runtime->blobs);
+ xnn_release_memory(runtime->workspace);
+ }
+ xnn_release_memory(runtime);
+ }
+ return xnn_status_success;
+}
diff --git a/src/subgraph.c b/src/subgraph.c
new file mode 100644
index 000000000..1c0a565e8
--- /dev/null
+++ b/src/subgraph.c
@@ -0,0 +1,425 @@
+// Copyright 2020 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#include <math.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+#include <xnnpack.h>
+#include <xnnpack/allocator.h>
+#include <xnnpack/log.h>
+#include <xnnpack/math.h>
+#include <xnnpack/params.h>
+#include <xnnpack/subgraph.h>
+
+
+enum xnn_status xnn_create_subgraph(
+ uint32_t external_value_ids,
+ uint32_t flags,
+ xnn_subgraph_t* subgraph_out)
+{
+ struct xnn_subgraph* subgraph = NULL;
+ enum xnn_status status = xnn_status_uninitialized;
+
+ if (!xnn_params.initialized) {
+ xnn_log_error("failed to create subgraph: XNNPACK is not initialized");
+ goto error;
+ }
+
+ status = xnn_status_out_of_memory;
+
+ subgraph = xnn_allocate_zero_memory(sizeof(struct xnn_subgraph));
+ if (subgraph == NULL) {
+ xnn_log_error("failed to allocate %zu bytes for subgraph descriptor", sizeof(struct xnn_subgraph));
+ goto error;
+ }
+
+ subgraph->external_value_ids = external_value_ids;
+
+ subgraph->values = xnn_allocate_zero_memory(external_value_ids * sizeof(struct xnn_value));
+ if (subgraph->values == NULL) {
+ xnn_log_error("failed to allocate %zu bytes for subgraph values", external_value_ids * sizeof(struct xnn_value));
+ goto error;
+ }
+ for (size_t i = 0; i < external_value_ids; i++) {
+ subgraph->values[i].id = i;
+ }
+ subgraph->num_values = external_value_ids;
+ subgraph->num_reserved_values = external_value_ids;
+
+ *subgraph_out = subgraph;
+ return xnn_status_success;
+
+error:
+ xnn_delete_subgraph(subgraph);
+ return status;
+}
+
+
+struct xnn_value* xnn_subgraph_new_internal_value(xnn_subgraph_t subgraph)
+{
+ struct xnn_value* values = subgraph->values;
+ const size_t size = subgraph->num_values;
+ const size_t capacity = subgraph->num_reserved_values;
+ if (capacity < size + 1) {
+ const size_t new_capacity = max(min(capacity * 2, capacity + 512), capacity + 64);
+ assert(new_capacity >= size + 1);
+ values = xnn_reallocate_memory(values, new_capacity * sizeof(struct xnn_value));
+ if (values == NULL) {
+ xnn_log_error("failed to allocate %zu bytes for subgraph values",
+ capacity * sizeof(struct xnn_value));
+ return values;
+ }
+
+ memset(values + size, 0, (new_capacity - size) * sizeof(struct xnn_value));
+ subgraph->num_reserved_values = new_capacity;
+ subgraph->values = values;
+ }
+ subgraph->num_values = size + 1;
+ struct xnn_value* new_value = values + size;
+ new_value->id = size;
+ return new_value;
+}
+
+struct xnn_node* xnn_subgraph_new_node(xnn_subgraph_t subgraph)
+{
+ struct xnn_node* nodes = subgraph->nodes;
+ const size_t size = subgraph->num_nodes;
+ const size_t capacity = subgraph->num_reserved_nodes;
+
+ if (capacity < size + 1) {
+ const size_t new_capacity = max(min(capacity * 2, capacity + 512), capacity + 64);
+ assert(new_capacity >= size + 1);
+ nodes = xnn_reallocate_memory(nodes, new_capacity * sizeof(struct xnn_node));
+ if (nodes == NULL) {
+ xnn_log_error("failed to allocate %zu bytes for subgraph nodes",
+ capacity * sizeof(struct xnn_node));
+ return nodes;
+ }
+
+ memset(nodes + size, 0, (new_capacity - size) * sizeof(struct xnn_node));
+ subgraph->num_reserved_nodes = new_capacity;
+ subgraph->nodes = nodes;
+ }
+ subgraph->num_nodes = size + 1;
+ struct xnn_node* new_node = nodes + size;
+ new_node->id = size;
+ return new_node;
+}
+
+enum xnn_status xnn_define_convolution_2d(
+ xnn_subgraph_t subgraph,
+ uint32_t input_padding_top,
+ uint32_t input_padding_right,
+ uint32_t input_padding_bottom,
+ uint32_t input_padding_left,
+ uint32_t kernel_height,
+ uint32_t kernel_width,
+ uint32_t subsampling_height,
+ uint32_t subsampling_width,
+ uint32_t dilation_height,
+ uint32_t dilation_width,
+ uint32_t groups,
+ size_t group_input_channels,
+ size_t group_output_channels,
+ float output_min,
+ float output_max,
+ uint32_t input_id,
+ uint32_t filter_id,
+ uint32_t bias_id,
+ uint32_t output_id,
+ uint32_t flags)
+{
+ if (!xnn_params.initialized) {
+ xnn_log_error("failed to define Convolution operator: XNNPACK is not initialized");
+ return xnn_status_uninitialized;
+ }
+
+ if (kernel_width == 0 || kernel_height == 0) {
+ xnn_log_error(
+ "failed to define Convolution operator with %" PRIu32 "x%" PRIu32 " kernel: kernel dimensions must be non-zero",
+ kernel_width, kernel_height);
+ return xnn_status_invalid_parameter;
+ }
+
+ if (subsampling_width == 0 || subsampling_height == 0) {
+ xnn_log_error(
+ "failed to define Convolution operator with %" PRIu32 "x%" PRIu32 " subsampling: "
+ "subsampling dimensions must be non-zero",
+ subsampling_width, subsampling_height);
+ return xnn_status_invalid_parameter;
+ }
+
+ if (dilation_width == 0 || dilation_height == 0) {
+ xnn_log_error(
+ "failed to define Convolution operator with %" PRIu32 "x%" PRIu32 " dilation: "
+ "dilation dimensions must be non-zero",
+ dilation_width, dilation_height);
+ return xnn_status_invalid_parameter;
+ }
+
+ if (groups == 0) {
+ xnn_log_error(
+ "failed to define Convolution operator with %" PRIu32 " groups: number of groups must be non-zero", groups);
+ return xnn_status_invalid_parameter;
+ }
+
+ if (group_input_channels == 0) {
+ xnn_log_error(
+ "failed to define Convolution operator with %zu input channels per group: "
+ "number of channels must be non-zero",
+ group_input_channels);
+ return xnn_status_invalid_parameter;
+ }
+
+ if (group_output_channels == 0) {
+ xnn_log_error(
+ "failed to define Convolution operator with %zu output channels per group: "
+ "number of channels must be non-zero",
+ group_output_channels);
+ return xnn_status_invalid_parameter;
+ }
+
+ if (isnan(output_min)) {
+ xnn_log_error(
+ "failed to define Convolution operator with NaN output lower bound: lower bound must be non-NaN");
+ return xnn_status_invalid_parameter;
+ }
+
+ if (isnan(output_max)) {
+ xnn_log_error(
+ "failed to define Convolution operator with NaN output upper bound: upper bound must be non-NaN");
+ return xnn_status_invalid_parameter;
+ }
+
+ if (output_min >= output_max) {
+ xnn_log_error(
+ "failed to define Convolution operator with [%.7g, %.7g] output range: "
+ "lower bound must be below upper bound",
+ output_min, output_max);
+ return xnn_status_invalid_parameter;
+ }
+
+ if (input_id >= subgraph->num_values) {
+ xnn_log_error(
+ "failed to define Convolution operator with input ID #%" PRIu32 ": invalid Value ID",
+ input_id);
+ return xnn_status_invalid_parameter;
+ }
+
+ if (filter_id >= subgraph->num_values) {
+ xnn_log_error(
+ "failed to define Convolution operator with filter ID #%" PRIu32 ": invalid Value ID",
+ filter_id);
+ return xnn_status_invalid_parameter;
+ }
+
+ if (bias_id >= subgraph->num_values) {
+ xnn_log_error(
+ "failed to define Convolution operator with bias ID #%" PRIu32 ": invalid Value ID",
+ bias_id);
+ return xnn_status_invalid_parameter;
+ }
+
+ if (output_id >= subgraph->num_values) {
+ xnn_log_error(
+ "failed to define Convolution operator with output ID #%" PRIu32 ": invalid Value ID",
+ output_id);
+ return xnn_status_invalid_parameter;
+ }
+
+ struct xnn_node* node = xnn_subgraph_new_node(subgraph);
+ if (node == NULL) {
+ return xnn_status_out_of_memory;
+ }
+
+ node->type = xnn_node_type_convolution_2d;
+ node->params.convolution_2d.input_padding_top = input_padding_top;
+ node->params.convolution_2d.input_padding_right = input_padding_right;
+ node->params.convolution_2d.input_padding_bottom = input_padding_bottom;
+ node->params.convolution_2d.input_padding_left = input_padding_left;
+ node->params.convolution_2d.kernel_height = kernel_height;
+ node->params.convolution_2d.kernel_width = kernel_width;
+ node->params.convolution_2d.subsampling_height = subsampling_height;
+ node->params.convolution_2d.subsampling_width = subsampling_width;
+ node->params.convolution_2d.dilation_height = dilation_height;
+ node->params.convolution_2d.dilation_width = dilation_width;
+ node->params.convolution_2d.groups = groups;
+ node->params.convolution_2d.group_input_channels = group_input_channels;
+ node->params.convolution_2d.group_output_channels = group_output_channels;
+ node->params.convolution_2d.output_min = output_min;
+ node->params.convolution_2d.output_max = output_max;
+ node->num_inputs = 3;
+ node->inputs.raw[0] = input_id;
+ node->inputs.raw[1] = filter_id;
+ node->inputs.raw[2] = bias_id;
+ node->num_outputs = 1;
+ node->outputs.raw[0] = output_id;
+ node->flags = flags;
+
+ return xnn_status_success;
+};
+
+enum xnn_status xnn_define_depthwise_convolution_2d(
+ xnn_subgraph_t subgraph,
+ uint32_t input_padding_top,
+ uint32_t input_padding_right,
+ uint32_t input_padding_bottom,
+ uint32_t input_padding_left,
+ uint32_t kernel_height,
+ uint32_t kernel_width,
+ uint32_t subsampling_height,
+ uint32_t subsampling_width,
+ uint32_t dilation_height,
+ uint32_t dilation_width,
+ uint32_t depth_multiplier,
+ size_t input_channels,
+ float output_min,
+ float output_max,
+ uint32_t input_id,
+ uint32_t filter_id,
+ uint32_t bias_id,
+ uint32_t output_id,
+ uint32_t flags)
+{
+ if (!xnn_params.initialized) {
+ xnn_log_error("failed to define Depthwise Convolution operator: XNNPACK is not initialized");
+ return xnn_status_uninitialized;
+ }
+
+ if (kernel_width == 0 || kernel_height == 0) {
+ xnn_log_error(
+ "failed to define Depthwise Convolution operator with %" PRIu32 "x%" PRIu32 " kernel: kernel dimensions must be non-zero",
+ kernel_width, kernel_height);
+ return xnn_status_invalid_parameter;
+ }
+
+ if (subsampling_width == 0 || subsampling_height == 0) {
+ xnn_log_error(
+ "failed to define Depthwise Convolution operator with %" PRIu32 "x%" PRIu32 " subsampling: "
+ "subsampling dimensions must be non-zero",
+ subsampling_width, subsampling_height);
+ return xnn_status_invalid_parameter;
+ }
+
+ if (dilation_width == 0 || dilation_height == 0) {
+ xnn_log_error(
+ "failed to define Depthwise Convolution operator with %" PRIu32 "x%" PRIu32 " dilation: "
+ "dilation dimensions must be non-zero",
+ dilation_width, dilation_height);
+ return xnn_status_invalid_parameter;
+ }
+
+ if (depth_multiplier == 0) {
+ xnn_log_error(
+ "failed to define Depthwise Convolution operator with %" PRIu32 " depth multiplier: "
+ "depth multiplier must be non-zero",
+ depth_multiplier);
+ return xnn_status_invalid_parameter;
+ }
+
+ if (input_channels == 0) {
+ xnn_log_error(
+ "failed to define Depthwise Convolution operator with %zu input channels: "
+ "number of channels must be non-zero",
+ input_channels);
+ return xnn_status_invalid_parameter;
+ }
+
+ if (isnan(output_min)) {
+ xnn_log_error(
+ "failed to define Depthwise Convolution operator with NaN output lower bound: lower bound must be non-NaN");
+ return xnn_status_invalid_parameter;
+ }
+
+ if (isnan(output_max)) {
+ xnn_log_error(
+ "failed to define Depthwise Convolution operator with NaN output upper bound: upper bound must be non-NaN");
+ return xnn_status_invalid_parameter;
+ }
+
+ if (output_min >= output_max) {
+ xnn_log_error(
+ "failed to define Depthwise Convolution operator with [%.7g, %.7g] output range: "
+ "lower bound must be below upper bound",
+ output_min, output_max);
+ return xnn_status_invalid_parameter;
+ }
+
+ if (input_id >= subgraph->num_values) {
+ xnn_log_error(
+ "failed to define Depthwise Convolution operator with input ID #%" PRIu32 ": invalid Value ID",
+ input_id);
+ return xnn_status_invalid_parameter;
+ }
+
+ if (filter_id >= subgraph->num_values) {
+ xnn_log_error(
+ "failed to define Depthwise Convolution operator with filter ID #%" PRIu32 ": invalid Value ID",
+ filter_id);
+ return xnn_status_invalid_parameter;
+ }
+
+ if (bias_id >= subgraph->num_values) {
+ xnn_log_error(
+ "failed to define Depthwise Convolution operator with bias ID #%" PRIu32 ": invalid Value ID",
+ bias_id);
+ return xnn_status_invalid_parameter;
+ }
+
+ if (output_id >= subgraph->num_values) {
+ xnn_log_error(
+ "failed to define Depthwise Convolution operator with output ID #%" PRIu32 ": invalid Value ID",
+ output_id);
+ return xnn_status_invalid_parameter;
+ }
+
+ struct xnn_node* node = xnn_subgraph_new_node(subgraph);
+ if (node == NULL) {
+ return xnn_status_out_of_memory;
+ }
+
+ node->type = xnn_node_type_depthwise_convolution_2d;
+ node->params.depthwise_convolution_2d.input_padding_top = input_padding_top;
+ node->params.depthwise_convolution_2d.input_padding_right = input_padding_right;
+ node->params.depthwise_convolution_2d.input_padding_bottom = input_padding_bottom;
+ node->params.depthwise_convolution_2d.input_padding_left = input_padding_left;
+ node->params.depthwise_convolution_2d.kernel_height = kernel_height;
+ node->params.depthwise_convolution_2d.kernel_width = kernel_width;
+ node->params.depthwise_convolution_2d.subsampling_height = subsampling_height;
+ node->params.depthwise_convolution_2d.subsampling_width = subsampling_width;
+ node->params.depthwise_convolution_2d.dilation_height = dilation_height;
+ node->params.depthwise_convolution_2d.dilation_width = dilation_width;
+ node->params.depthwise_convolution_2d.depth_multiplier = depth_multiplier;
+ node->params.depthwise_convolution_2d.input_channels = input_channels;
+ node->params.depthwise_convolution_2d.output_min = output_min;
+ node->params.depthwise_convolution_2d.output_max = output_max;
+ node->num_inputs = 3;
+ node->inputs.raw[0] = input_id;
+ node->inputs.raw[1] = filter_id;
+ node->inputs.raw[2] = bias_id;
+ node->num_outputs = 1;
+ node->outputs.raw[0] = output_id;
+ node->flags = flags;
+
+ return xnn_status_success;
+};
+
+enum xnn_status xnn_delete_subgraph(
+ xnn_subgraph_t subgraph)
+{
+ if (subgraph != NULL) {
+ memset(subgraph->nodes, 0, sizeof(struct xnn_node) * subgraph->num_nodes);
+ xnn_release_memory(subgraph->nodes);
+
+ memset(subgraph->values, 0, sizeof(struct xnn_value) * subgraph->num_values);
+ xnn_release_memory(subgraph->values);
+
+ memset(subgraph, 0, sizeof(struct xnn_subgraph));
+ xnn_release_memory(subgraph);
+ }
+ return xnn_status_success;
+}
diff --git a/src/tensor.c b/src/tensor.c
new file mode 100644
index 000000000..8730e4502
--- /dev/null
+++ b/src/tensor.c
@@ -0,0 +1,101 @@
+// Copyright 2020 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#include <assert.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+#include <xnnpack.h>
+#include <xnnpack/allocator.h>
+#include <xnnpack/log.h>
+#include <xnnpack/params.h>
+#include <xnnpack/subgraph.h>
+
+
+enum xnn_status xnn_define_tensor_value(
+ xnn_subgraph_t subgraph,
+ enum xnn_datatype datatype,
+ size_t num_dims,
+ const size_t* dims,
+ const void* data,
+ uint32_t external_id,
+ uint32_t flags,
+ uint32_t* id_out)
+{
+ if (!xnn_params.initialized) {
+ xnn_log_error("failed to create Dense Tensor value: XNNPACK is not initialized");
+ return xnn_status_uninitialized;
+ }
+
+ if (external_id != XNN_INVALID_VALUE_ID && external_id >= subgraph->external_value_ids) {
+ xnn_log_error(
+ "failed to create Dense Tensor value: "
+ "external ID %" PRIu32 " exceeds the number of reserved external IDs in subgraph (%" PRIu32 ")",
+ external_id, subgraph->external_value_ids);
+ return xnn_status_invalid_parameter;
+ }
+
+ if (num_dims > XNN_MAX_TENSOR_DIMS) {
+ xnn_log_error("failed to create Dense Tensor value: num of dimensions exceeds XNNPACK limit (%d)",
+ XNN_MAX_TENSOR_DIMS);
+ return xnn_status_unsupported_parameter;
+ }
+
+ switch (datatype) {
+ case xnn_datatype_fp32:
+ case xnn_datatype_fp16:
+ break;
+ default:
+ xnn_log_error("failed to create Dense Tensor value: invalid data type (%d)", datatype);
+ return xnn_status_unsupported_parameter;
+ }
+
+ struct xnn_value* value = subgraph->values + external_id;
+ if (external_id == XNN_INVALID_VALUE_ID) {
+ value = xnn_subgraph_new_internal_value(subgraph);
+ if (value == NULL) {
+ return xnn_status_out_of_memory;
+ }
+ }
+ value->type = xnn_value_type_dense_tensor;
+ value->datatype = datatype;
+ value->shape.num_dims = num_dims;
+ memcpy(value->shape.dim, dims, num_dims * sizeof(size_t));
+ value->flags = flags;
+ value->data = data;
+
+ *id_out = value->id;
+ return xnn_status_success;
+}
+
+size_t xnn_tensor_get_size(
+ xnn_subgraph_t subgraph,
+ uint32_t value_id)
+{
+ assert(value_id < subgraph->num_values);
+
+ const struct xnn_value* value = subgraph->values + value_id;
+ assert(value->type == xnn_value_type_dense_tensor);
+ assert(value->datatype != xnn_datatype_invalid);
+
+ size_t size = 0;
+ switch (value->datatype) {
+ case xnn_datatype_fp16:
+ size = 2;
+ break;
+ case xnn_datatype_fp32:
+ size = 4;
+ break;
+ case xnn_datatype_invalid:
+ XNN_UNREACHABLE;
+ }
+
+ for (size_t i = 0; i < value->shape.num_dims; i++) {
+ size *= value->shape.dim[i];
+ }
+
+ return size;
+}
diff --git a/src/xnnpack/allocator.h b/src/xnnpack/allocator.h
index fdcfce5bf..a39387416 100644
--- a/src/xnnpack/allocator.h
+++ b/src/xnnpack/allocator.h
@@ -25,6 +25,14 @@ inline static void* xnn_allocate_memory(size_t memory_size) {
return xnn_params.allocator.allocate(xnn_params.allocator.context, memory_size);
}
+inline static void* xnn_allocate_zero_memory(size_t memory_size) {
+ void* memory_pointer = xnn_params.allocator.allocate(xnn_params.allocator.context, memory_size);
+ if (memory_pointer != NULL) {
+ memset(memory_pointer, 0, memory_size);
+ }
+ return memory_pointer;
+}
+
inline static void* xnn_reallocate_memory(void* memory_pointer, size_t memory_size) {
return xnn_params.allocator.reallocate(xnn_params.allocator.context, memory_pointer, memory_size);
}
diff --git a/src/xnnpack/subgraph.h b/src/xnnpack/subgraph.h
new file mode 100644
index 000000000..76fee6215
--- /dev/null
+++ b/src/xnnpack/subgraph.h
@@ -0,0 +1,168 @@
+// Copyright 2020 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#pragma once
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <xnnpack.h>
+
+#define XNN_MAX_INPUTS 3
+#define XNN_MAX_OUTPUTS 1
+
+#define XNN_MAX_RUNTIME_INPUTS 2
+#define XNN_MAX_RUNTIME_OUTPUTS 1
+
+struct xnn_shape {
+ size_t num_dims;
+ size_t dim[XNN_MAX_TENSOR_DIMS];
+};
+
+enum xnn_value_type {
+ xnn_value_type_invalid = 0,
+ xnn_value_type_dense_tensor = 1,
+};
+
+/// Abstraction for a collections of elements produced and consumed by nodes.
+struct xnn_value {
+ /// Unique ID for the value.
+ uint32_t id;
+ /// Type of the collection of elements.
+ ///
+ /// Currently only dense tensors are supported.
+ /// Other types (e.g. sparse tensors) might be supported in the future.
+ enum xnn_value_type type;
+ /// Type of elements in the collection.
+ enum xnn_datatype datatype;
+ /// Tensor shape.
+ struct xnn_shape shape;
+ /// Binary features of the tensor. Supported values are any combination of:
+ /// - XNN_VALUE_FLAG_EXTERNAL_INPUT
+ /// - XNN_VALUE_FLAG_EXTERNAL_OUTPUT
+ uint32_t flags;
+ /// Static initialization data. Must be null for non-static values.
+ const void* data;
+};
+
+struct xnn_blob {
+ /// Size in bytes.
+ size_t size;
+ /// Data pointer.
+ void* data;
+ bool external;
+};
+
+enum xnn_node_type {
+ xnn_node_type_invalid = 0,
+ xnn_node_type_convolution_2d,
+ xnn_node_type_depthwise_convolution_2d,
+};
+
+struct xnn_node {
+ enum xnn_node_type type;
+ uint32_t id;
+ /// Static parameters of the operator node.
+ union {
+ struct {
+ uint32_t input_padding_top;
+ uint32_t input_padding_right;
+ uint32_t input_padding_bottom;
+ uint32_t input_padding_left;
+ uint32_t kernel_height;
+ uint32_t kernel_width;
+ uint32_t subsampling_height;
+ uint32_t subsampling_width;
+ uint32_t dilation_height;
+ uint32_t dilation_width;
+ uint32_t groups;
+ size_t group_input_channels;
+ size_t group_output_channels;
+ float output_min;
+ float output_max;
+ } convolution_2d;
+ struct {
+ uint32_t input_padding_top;
+ uint32_t input_padding_right;
+ uint32_t input_padding_bottom;
+ uint32_t input_padding_left;
+ uint32_t kernel_height;
+ uint32_t kernel_width;
+ uint32_t subsampling_height;
+ uint32_t subsampling_width;
+ uint32_t dilation_height;
+ uint32_t dilation_width;
+ uint32_t depth_multiplier;
+ size_t input_channels;
+ float output_min;
+ float output_max;
+ } depthwise_convolution_2d;
+ } params;
+ /// Value IDs for node inputs.
+ union {
+ uint32_t raw[XNN_MAX_INPUTS];
+ struct {
+ uint32_t input;
+ uint32_t filter;
+ uint32_t bias;
+ } convolution_2d;
+ } inputs;
+ uint32_t num_inputs;
+ /// Value IDs for node outputs.
+ union {
+ struct {
+ uint32_t output;
+ } convolution_2d;
+ uint32_t raw[XNN_MAX_OUTPUTS];
+ } outputs;
+ uint32_t num_outputs;
+ uint32_t flags;
+};
+
+struct xnn_operator_data {
+ xnn_operator_t op;
+ size_t batch_size;
+ size_t input_height;
+ size_t input_width;
+ uint32_t inputs[XNN_MAX_RUNTIME_INPUTS];
+ uint32_t outputs[XNN_MAX_RUNTIME_OUTPUTS];
+};
+
+struct xnn_subgraph {
+ /// Number of Value IDs reserved for communication with external graph representation.
+ /// Values created during subgraph transformation avoid using IDs in [0, reserved_value_ids-1] range.
+ uint32_t external_value_ids;
+
+ uint32_t num_reserved_values;
+ uint32_t num_values;
+ struct xnn_value* values;
+
+ uint32_t num_reserved_nodes;
+ uint32_t num_nodes;
+ struct xnn_node* nodes;
+};
+
+/// Runtime is a combination of an execution plan for subgraph Nodes and a memory manager for subgraph Values.
+struct xnn_runtime {
+ uint32_t num_external_values;
+
+ /// List of operators in the execution plan, in execution order.
+ struct xnn_operator_data* ops;
+ /// Number of operators in the execution plan.
+ size_t num_ops;
+
+ struct xnn_blob* blobs;
+ size_t num_blobs;
+
+ void* workspace;
+};
+
+struct xnn_value* xnn_subgraph_new_internal_value(xnn_subgraph_t subgraph);
+
+struct xnn_node* xnn_subgraph_new_node(xnn_subgraph_t subgraph);
+
+size_t xnn_tensor_get_size(
+ xnn_subgraph_t subgraph,
+ uint32_t value_id);