7 files changed, 1122 insertions, 0 deletions
diff --git a/BUILD.bazel b/BUILD.bazel
index 619a16745..d8916440f 100644
--- a/BUILD.bazel
+++ b/BUILD.bazel
@@ -1474,6 +1474,7 @@ INTERNAL_HDRS = INTERNAL_MICROKERNEL_HDRS + [
     "src/xnnpack/params-init.h",
     "src/xnnpack/requantization-stubs.h",
     "src/xnnpack/requantization.h",
+    "src/xnnpack/subgraph.h",
 ]
 
 ACCURACY_EVAL_HDRS = INTERNAL_MICROKERNEL_HDRS + [
@@ -1818,6 +1819,9 @@ xnnpack_cc_library(
     srcs = OPERATOR_SRCS + [
         "src/memory.c",
         "src/operator-delete.c",
+        "src/runtime.c",
+        "src/subgraph.c",
+        "src/tensor.c",
     ],
     hdrs = INTERNAL_HDRS + LOGGING_HDRS,
     copts = xnnpack_std_copts() + LOGGING_COPTS + [
diff --git a/include/xnnpack.h b/include/xnnpack.h
index 1511e6f13..1ee7827df 100644
--- a/include/xnnpack.h
+++ b/include/xnnpack.h
@@ -127,6 +127,159 @@ enum xnn_status xnn_initialize(const struct xnn_allocator* allocator);
 /// @retval xnn_status_success - deinitialization call succeeded.
 enum xnn_status xnn_deinitialize(void);
 
+typedef struct xnn_subgraph* xnn_subgraph_t;
+
+enum xnn_status xnn_create_subgraph(
+  uint32_t external_value_ids,
+  uint32_t flags,
+  xnn_subgraph_t* subgraph_out);
+
+enum xnn_status xnn_delete_subgraph(
+  xnn_subgraph_t subgraph);
+
+#define XNN_VALUE_FLAG_EXTERNAL_INPUT  0x00000001
+#define XNN_VALUE_FLAG_EXTERNAL_OUTPUT 0x00000002
+
+#define XNN_INVALID_VALUE_ID UINT32_MAX
+
+enum xnn_datatype {
+  xnn_datatype_invalid = 0,
+  xnn_datatype_fp32 = 1,
+  xnn_datatype_fp16 = 2,
+};
+
+/// Define a tensor-type Value and add it to a subgraph.
+///
+/// @param datatype - type of tensor elements.
+/// @param num_dims - number of dimensions in the shape.
+/// @param dims - pointer to an array of @a num_dims shape dimensions. If num_dims is 0, this pointer can be NULL.
+/// @param data - pointer to static data used for tensor initialization. If the tensor is not statically initialized,
+///               this pointer must be is NULL.
+/// @param external_id - external ID for the Value. The ID must be within the range of reversed Value IDs specified in
+///                      subgraph creation. If the external ID is XNN_INVALID_VALUE_ID, an internal ID will be created
+///                      for the Value.
+/// @param subgraph - subgraph that will own the created value.
+/// @param id_out - pointer to the variable that will be initialized with the Value ID upon successful return.
+enum xnn_status xnn_define_tensor_value(
+  xnn_subgraph_t subgraph,
+  enum xnn_datatype datatype,
+  size_t num_dims,
+  const size_t* dims,
+  const void* data,
+  uint32_t external_id,
+  uint32_t flags,
+  uint32_t* id_out);
+
+/// Define a 2D Convolution node and add it to a subgraph.
+///
+/// @param input_padding_top - implicit zero-padding above 2D input data.
+/// @param input_padding_right - implicit zero-padding to the right of 2D input data.
+/// @param input_padding_bottom - implicit zero-padding below 2D input data.
+/// @param input_padding_left - implicit zero-padding to the left of 2D input data.
+/// @param kernel_height - kernel (filter) height.
+/// @param kernel_width - kernel (filter) width.
+/// @param subsampling_height - height of subsampling region for convolution output (convolution height stride).
+/// @param subsampling_width - width of subsampling region for convolution output (convolution width stride).
+/// @param dilation_height - dilation of kernel elements along the height dimension.
+/// @param dilation_width - dilation of kernel elements along the width dimension.
+/// @param groups - number of convolution groups.
+/// @param group_input_channels - number of input channels per group.
+/// @param group_output_channels - number of output channels per group.
+/// @param output_min - lower bound for clipping output values.
+/// @param output_max - upper bound for clipping output values.
+/// @param input_id - input tensor ID. Must be a 4D tensor with [N, IH, IW, groups * group_input_channels] dimensions.
+/// @param filter_id - filter tensor ID. Must ge a 4D tensor with
+///                 [groups * group_output_channels, kernel_height, kernel_width, group_input_channels] dimensions.
+/// @param bias_id - bias tensor ID. Must be a 1D tensor with [groups * group_output_channels] dimensions.
+/// @param output_id - output tensor ID. Must be a 4D tensor with [N, OH, OW, groups * group_output_channels] dimensions.
+enum xnn_status xnn_define_convolution_2d(
+  xnn_subgraph_t subgraph,
+  uint32_t input_padding_top,
+  uint32_t input_padding_right,
+  uint32_t input_padding_bottom,
+  uint32_t input_padding_left,
+  uint32_t kernel_height,
+  uint32_t kernel_width,
+  uint32_t subsampling_height,
+  uint32_t subsampling_width,
+  uint32_t dilation_height,
+  uint32_t dilation_width,
+  uint32_t groups,
+  size_t group_input_channels,
+  size_t group_output_channels,
+  float output_min,
+  float output_max,
+  uint32_t input_id,
+  uint32_t filter_id,
+  uint32_t bias_id,
+  uint32_t output_id,
+  uint32_t flags);
+
+/// Define a 2D Depthwise Convolution node and add it to a subgraph.
+///
+/// @param input_padding_top - implicit zero-padding above 2D input data.
+/// @param input_padding_right - implicit zero-padding to the right of 2D input data.
+/// @param input_padding_bottom - implicit zero-padding below 2D input data.
+/// @param input_padding_left - implicit zero-padding to the left of 2D input data.
+/// @param kernel_height - kernel (filter) height.
+/// @param kernel_width - kernel (filter) width.
+/// @param subsampling_height - height of subsampling region for convolution output (convolution height stride).
+/// @param subsampling_width - width of subsampling region for convolution output (convolution width stride).
+/// @param dilation_height - dilation of kernel elements along the height dimension.
+/// @param dilation_width - dilation of kernel elements along the width dimension.
+/// @param depth_multiplier - ratio of output channels to input channels.
+/// @param input_channels - number of input channels.
+/// @param output_min - lower bound for clipping output values.
+/// @param output_max - upper bound for clipping output values.
+/// @param input_id - input tensor. Must be a 4D tensor with [N, IH, IW, input_channels] dimensions.
+/// @param filter_id - filter tensor. Must ge a 4D tensor with
+///                 [1, kernel_height, kernel_width, input_channels * depth_multiplier] dimensions.
+/// @param bias_id - bias tensor. Must be a 1D tensor with [input_channels * depth_multiplier] dimensions.
+/// @param output_id - output tensor. Must be a 4D tensor with [N, OH, OW, input_channels * depth_multiplier] dimensions.
+enum xnn_status xnn_define_depthwise_convolution_2d(
+  xnn_subgraph_t subgraph,
+  uint32_t input_padding_top,
+  uint32_t input_padding_right,
+  uint32_t input_padding_bottom,
+  uint32_t input_padding_left,
+  uint32_t kernel_height,
+  uint32_t kernel_width,
+  uint32_t subsampling_height,
+  uint32_t subsampling_width,
+  uint32_t dilation_height,
+  uint32_t dilation_width,
+  uint32_t depth_multiplier,
+  size_t input_channels,
+  float output_min,
+  float output_max,
+  uint32_t input_id,
+  uint32_t filter_id,
+  uint32_t bias_id,
+  uint32_t output_id,
+  uint32_t flags);
+
+typedef struct xnn_runtime* xnn_runtime_t;
+
+enum xnn_status xnn_create_runtime(
+  xnn_subgraph_t subgraph,
+  xnn_runtime_t* runtime_out);
+
+struct xnn_external_value {
+  uint32_t id;
+  void* data;
+};
+
+enum xnn_status xnn_setup_runtime(
+  xnn_runtime_t runtime,
+  size_t num_external_values,
+  const struct xnn_external_value* external_values);
+
+enum xnn_status xnn_invoke_runtime(
+  xnn_runtime_t runtime);
+
+enum xnn_status xnn_delete_runtime(
+  xnn_runtime_t runtime);
+
 typedef struct xnn_operator* xnn_operator_t;
 
 enum xnn_status xnn_run_operator(
diff --git a/src/runtime.c b/src/runtime.c
new file mode 100644
index 000000000..d95abad20
--- /dev/null
+++ b/src/runtime.c
@@ -0,0 +1,263 @@
+// Copyright 2020 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#include <math.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+#include <xnnpack.h>
+#include <xnnpack/allocator.h>
+#include <xnnpack/log.h>
+#include <xnnpack/math.h>
+#include <xnnpack/operator.h>
+#include <xnnpack/params.h>
+#include <xnnpack/subgraph.h>
+
+
+enum xnn_status xnn_create_runtime(
+  xnn_subgraph_t subgraph,
+  xnn_runtime_t* runtime_out)
+{
+  struct xnn_runtime* runtime = NULL;
+  enum xnn_status status = xnn_status_uninitialized;
+
+  if (!xnn_params.initialized) {
+    xnn_log_error("failed to create runtime: XNNPACK is not initialized");
+    goto error;
+  }
+
+  status = xnn_status_out_of_memory;
+
+  runtime = xnn_allocate_zero_memory(sizeof(struct xnn_runtime));
+  if (runtime == NULL) {
+    xnn_log_error("failed to allocate %zu bytes for runtime descriptor", sizeof(struct xnn_runtime));
+    goto error;
+  }
+
+  runtime->ops = xnn_allocate_zero_memory(sizeof(struct xnn_operator_data) * subgraph->num_nodes);
+  if (runtime->ops == NULL) {
+    xnn_log_error("failed to allocate %zu bytes for opdata descriptors",
+      sizeof(struct xnn_operator_data) * subgraph->num_nodes);
+    goto error;
+  }
+  runtime->num_ops = subgraph->num_nodes;
+
+  struct xnn_value* values = subgraph->values;
+  for (size_t i = 0; i < subgraph->num_nodes; i++) {
+    const struct xnn_node* node = subgraph->nodes + i;
+    switch (node->type) {
+      case xnn_node_type_convolution_2d:
+        status = xnn_create_convolution2d_nhwc_f32(
+          node->params.convolution_2d.input_padding_top,
+          node->params.convolution_2d.input_padding_right,
+          node->params.convolution_2d.input_padding_bottom,
+          node->params.convolution_2d.input_padding_left,
+          node->params.convolution_2d.kernel_height,
+          node->params.convolution_2d.kernel_width,
+          node->params.convolution_2d.subsampling_height,
+          node->params.convolution_2d.subsampling_width,
+          node->params.convolution_2d.dilation_height,
+          node->params.convolution_2d.dilation_width,
+          node->params.convolution_2d.groups,
+          node->params.convolution_2d.group_input_channels,
+          node->params.convolution_2d.group_output_channels,
+          node->params.convolution_2d.group_input_channels * node->params.convolution_2d.groups /* input_pixel_stride */,
+          node->params.convolution_2d.group_output_channels * node->params.convolution_2d.groups /* output_pixel_stride */,
+          values[node->inputs.convolution_2d.filter].data,
+          values[node->inputs.convolution_2d.bias].data,
+          node->params.convolution_2d.output_min,
+          node->params.convolution_2d.output_max,
+          node->flags,
+          &runtime->ops[i].op);
+        if (status != xnn_status_success) {
+          goto error;
+        }
+        runtime->ops[i].batch_size = subgraph->values[node->inputs.raw[0]].shape.dim[0];
+        runtime->ops[i].input_height = subgraph->values[node->inputs.raw[0]].shape.dim[1];
+        runtime->ops[i].input_width = subgraph->values[node->inputs.raw[0]].shape.dim[2];
+        runtime->ops[i].inputs[0] = node->inputs.raw[0];
+        runtime->ops[i].outputs[0] = node->outputs.raw[0];
+        break;
+      case xnn_node_type_depthwise_convolution_2d:
+        status = xnn_create_convolution2d_nhwc_f32(
+          node->params.depthwise_convolution_2d.input_padding_top,
+          node->params.depthwise_convolution_2d.input_padding_right,
+          node->params.depthwise_convolution_2d.input_padding_bottom,
+          node->params.depthwise_convolution_2d.input_padding_left,
+          node->params.depthwise_convolution_2d.kernel_height,
+          node->params.depthwise_convolution_2d.kernel_width,
+          node->params.depthwise_convolution_2d.subsampling_height,
+          node->params.depthwise_convolution_2d.subsampling_width,
+          node->params.depthwise_convolution_2d.dilation_height,
+          node->params.depthwise_convolution_2d.dilation_width,
+          node->params.depthwise_convolution_2d.input_channels /* groups */,
+          1 /* group_input_channels */,
+          node->params.depthwise_convolution_2d.depth_multiplier /* group_output_channels */,
+          node->params.depthwise_convolution_2d.input_channels /* input_pixel_stride */,
+          node->params.depthwise_convolution_2d.input_channels * node->params.depthwise_convolution_2d.depth_multiplier /* output_pixel_stride */,
+          values[node->inputs.convolution_2d.filter].data,
+          values[node->inputs.convolution_2d.bias].data,
+          node->params.depthwise_convolution_2d.output_min,
+          node->params.depthwise_convolution_2d.output_max,
+          node->flags | XNN_FLAG_DEPTHWISE_CONVOLUTION,
+          &runtime->ops[i].op);
+        if (status != xnn_status_success) {
+          goto error;
+        }
+        runtime->ops[i].batch_size = subgraph->values[node->inputs.raw[0]].shape.dim[0];
+        runtime->ops[i].input_height = subgraph->values[node->inputs.raw[0]].shape.dim[1];
+        runtime->ops[i].input_width = subgraph->values[node->inputs.raw[0]].shape.dim[2];
+        runtime->ops[i].inputs[0] = node->inputs.raw[0];
+        runtime->ops[i].outputs[0] = node->outputs.raw[0];
+        break;
+      case xnn_node_type_invalid:
+        xnn_log_fatal("unexpected node type %d in node #%zu", node->type, i);
+        XNN_UNREACHABLE;
+        break;
+    }
+  }
+
+  runtime->blobs = xnn_allocate_zero_memory(sizeof(struct xnn_blob) * subgraph->num_values);
+  if (runtime->blobs == NULL) {
+    xnn_log_error("failed to allocate %zu bytes for blob descriptors",
+      sizeof(struct xnn_blob) * subgraph->num_values);
+    goto error;
+  }
+  runtime->num_blobs = subgraph->num_values;
+
+  size_t buffer_size = 0;
+  for (size_t i = 0; i < subgraph->num_values; i++) {
+    const struct xnn_value* value = &subgraph->values[i];
+    struct xnn_blob* blob = &runtime->blobs[i];
+    if (values->datatype != xnn_datatype_invalid && value->type == xnn_value_type_dense_tensor) {
+      blob->size = xnn_tensor_get_size(subgraph, i);
+      if (value->data == NULL) {
+        if ((value->flags & (XNN_VALUE_FLAG_EXTERNAL_INPUT | XNN_VALUE_FLAG_EXTERNAL_OUTPUT)) == 0) {
+          // Value is purely internal to the runtime, and must be allocated in its workspace.
+          buffer_size = round_up_po2(buffer_size + blob->size, XNN_EXTRA_BYTES);
+        } else {
+          // Value is non-static and external to the runtime: must be specified via a call to xnn_setup_runtime.
+          blob->external = true;
+        }
+      }
+    }
+  }
+
+  runtime->workspace = xnn_allocate_simd_memory(buffer_size);
+  if (runtime->workspace == NULL) {
+    xnn_log_error("failed to allocate %zu bytes to runtime workspace", buffer_size);
+    goto error;
+  }
+
+  size_t buffer_offset = 0;
+  for (size_t i = 0; i < subgraph->num_values; i++) {
+    const struct xnn_value* value = &subgraph->values[i];
+    struct xnn_blob* blob = &runtime->blobs[i];
+    if (values->datatype != xnn_datatype_invalid && value->type == xnn_value_type_dense_tensor) {
+      if (value->data == NULL && !blob->external) {
+        // Value is purely internal to the runtime, allocate it in the workspace.
+        blob->data = (void*) ((uintptr_t) runtime->workspace + buffer_offset);
+        buffer_offset = round_up_po2(buffer_offset + blob->size, XNN_EXTRA_BYTES);
+      }
+    }
+  }
+
+  *runtime_out = runtime;
+  return xnn_status_success;
+
+error:
+  xnn_delete_runtime(runtime);
+  return status;
+}
+
+enum xnn_status xnn_setup_runtime(
+  xnn_runtime_t runtime,
+  size_t num_external_values,
+  const struct xnn_external_value* external_values)
+{
+  // Validate inputs without changing internal state.
+  // This ensures that runtime stays in consistent state in case validation fails midway.
+  for (size_t i = 0; i < num_external_values; i++) {
+    const struct xnn_external_value* external_value = &external_values[i];
+    const uint32_t value_id = external_value->id;
+    if (value_id >= runtime->num_blobs) {
+      xnn_log_error("failed to setup runtime: out-of-bounds ID %" PRIu32 " in external value #%zu",
+        value_id, i);
+      return xnn_status_invalid_parameter;
+    }
+
+    const struct xnn_blob* blob = &runtime->blobs[value_id];
+    if (!blob->external) {
+      xnn_log_error("failed to setup runtime: Value %" PRIu32 " is not external", value_id);
+      return xnn_status_invalid_parameter;
+    }
+  }
+
+  // Apply runtime state changes.
+  for (size_t i = 0; i < num_external_values; i++) {
+    const struct xnn_external_value* external_value = &external_values[i];
+    const uint32_t value_id = external_value->id;
+    struct xnn_blob* blob = &runtime->blobs[value_id];
+    blob->data = external_value->data;
+  }
+
+  for (size_t i = 0; i < runtime->num_ops; i++) {
+    const struct xnn_operator_data* op = &runtime->ops[i];
+    enum xnn_status status = xnn_status_success;
+    switch (op->op->type) {
+      case xnn_operator_type_convolution_nhwc_f32:
+        status = xnn_setup_convolution2d_nhwc_f32(
+          op->op,
+          op->batch_size,
+          op->input_height,
+          op->input_width,
+          runtime->blobs[op->inputs[0]].data,
+          runtime->blobs[op->outputs[0]].data,
+          NULL /* threadpool */);
+        break;
+      default:
+        xnn_log_fatal("unexpected operator type %d in operator #%zu", op->op->type, i);
+        XNN_UNREACHABLE;
+    }
+    if (status != xnn_status_success) {
+      xnn_log_error("failed to setup runtime: error in operator #%zu", i);
+      return status;
+    }
+  }
+
+  return xnn_status_success;
+}
+
+enum xnn_status xnn_invoke_runtime(
+  xnn_runtime_t runtime)
+{
+  for (size_t i = 0; i < runtime->num_ops; i++) {
+    const enum xnn_status status = xnn_run_operator(runtime->ops[i].op, NULL /* thread pool */);
+    if (status != xnn_status_success) {
+      return status;
+    }
+  }
+  return xnn_status_success;
+}
+
+enum xnn_status xnn_delete_runtime(
+  xnn_runtime_t runtime)
+{
+  if (runtime != NULL) {
+    if (runtime->ops != NULL) {
+      for (size_t i = 0; i < runtime->num_ops; i++) {
+        xnn_delete_operator(runtime->ops[i].op);
+      }
+      xnn_release_memory(runtime->ops);
+
+      xnn_release_memory(runtime->blobs);
+      xnn_release_memory(runtime->workspace);
+    }
+    xnn_release_memory(runtime);
+  }
+  return xnn_status_success;
+}
diff --git a/src/subgraph.c b/src/subgraph.c
new file mode 100644
index 000000000..1c0a565e8
--- /dev/null
+++ b/src/subgraph.c
@@ -0,0 +1,425 @@
+// Copyright 2020 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#include <math.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+#include <xnnpack.h>
+#include <xnnpack/allocator.h>
+#include <xnnpack/log.h>
+#include <xnnpack/math.h>
+#include <xnnpack/params.h>
+#include <xnnpack/subgraph.h>
+
+
+enum xnn_status xnn_create_subgraph(
+    uint32_t external_value_ids,
+    uint32_t flags,
+    xnn_subgraph_t* subgraph_out)
+{
+  struct xnn_subgraph* subgraph = NULL;
+  enum xnn_status status = xnn_status_uninitialized;
+
+  if (!xnn_params.initialized) {
+    xnn_log_error("failed to create subgraph: XNNPACK is not initialized");
+    goto error;
+  }
+
+  status = xnn_status_out_of_memory;
+
+  subgraph = xnn_allocate_zero_memory(sizeof(struct xnn_subgraph));
+  if (subgraph == NULL) {
+    xnn_log_error("failed to allocate %zu bytes for subgraph descriptor", sizeof(struct xnn_subgraph));
+    goto error;
+  }
+
+  subgraph->external_value_ids = external_value_ids;
+
+  subgraph->values = xnn_allocate_zero_memory(external_value_ids * sizeof(struct xnn_value));
+  if (subgraph->values == NULL) {
+    xnn_log_error("failed to allocate %zu bytes for subgraph values", external_value_ids * sizeof(struct xnn_value));
+    goto error;
+  }
+  for (size_t i = 0; i < external_value_ids; i++) {
+    subgraph->values[i].id = i;
+  }
+  subgraph->num_values = external_value_ids;
+  subgraph->num_reserved_values = external_value_ids;
+
+  *subgraph_out = subgraph;
+  return xnn_status_success;
+
+error:
+  xnn_delete_subgraph(subgraph);
+  return status;
+}
+
+
+struct xnn_value* xnn_subgraph_new_internal_value(xnn_subgraph_t subgraph)
+{
+  struct xnn_value* values = subgraph->values;
+  const size_t size = subgraph->num_values;
+  const size_t capacity = subgraph->num_reserved_values;
+  if (capacity < size + 1) {
+    const size_t new_capacity = max(min(capacity * 2, capacity + 512), capacity + 64);
+    assert(new_capacity >= size + 1);
+    values = xnn_reallocate_memory(values, new_capacity * sizeof(struct xnn_value));
+    if (values == NULL) {
+      xnn_log_error("failed to allocate %zu bytes for subgraph values",
+        capacity * sizeof(struct xnn_value));
+      return values;
+    }
+
+    memset(values + size, 0, (new_capacity - size) * sizeof(struct xnn_value));
+    subgraph->num_reserved_values = new_capacity;
+    subgraph->values = values;
+  }
+  subgraph->num_values = size + 1;
+  struct xnn_value* new_value = values + size;
+  new_value->id = size;
+  return new_value;
+}
+
+struct xnn_node* xnn_subgraph_new_node(xnn_subgraph_t subgraph)
+{
+  struct xnn_node* nodes = subgraph->nodes;
+  const size_t size = subgraph->num_nodes;
+  const size_t capacity = subgraph->num_reserved_nodes;
+
+  if (capacity < size + 1) {
+    const size_t new_capacity = max(min(capacity * 2, capacity + 512), capacity + 64);
+    assert(new_capacity >= size + 1);
+    nodes = xnn_reallocate_memory(nodes, new_capacity * sizeof(struct xnn_node));
+    if (nodes == NULL) {
+      xnn_log_error("failed to allocate %zu bytes for subgraph nodes",
+        capacity * sizeof(struct xnn_node));
+      return nodes;
+    }
+
+    memset(nodes + size, 0, (new_capacity - size) * sizeof(struct xnn_node));
+    subgraph->num_reserved_nodes = new_capacity;
+    subgraph->nodes = nodes;
+  }
+  subgraph->num_nodes = size + 1;
+  struct xnn_node* new_node = nodes + size;
+  new_node->id = size;
+  return new_node;
+}
+
+enum xnn_status xnn_define_convolution_2d(
+  xnn_subgraph_t subgraph,
+  uint32_t input_padding_top,
+  uint32_t input_padding_right,
+  uint32_t input_padding_bottom,
+  uint32_t input_padding_left,
+  uint32_t kernel_height,
+  uint32_t kernel_width,
+  uint32_t subsampling_height,
+  uint32_t subsampling_width,
+  uint32_t dilation_height,
+  uint32_t dilation_width,
+  uint32_t groups,
+  size_t group_input_channels,
+  size_t group_output_channels,
+  float output_min,
+  float output_max,
+  uint32_t input_id,
+  uint32_t filter_id,
+  uint32_t bias_id,
+  uint32_t output_id,
+  uint32_t flags)
+{
+  if (!xnn_params.initialized) {
+    xnn_log_error("failed to define Convolution operator: XNNPACK is not initialized");
+    return xnn_status_uninitialized;
+  }
+
+  if (kernel_width == 0 || kernel_height == 0) {
+    xnn_log_error(
+      "failed to define Convolution operator with %" PRIu32 "x%" PRIu32 " kernel: kernel dimensions must be non-zero",
+      kernel_width, kernel_height);
+    return xnn_status_invalid_parameter;
+  }
+
+  if (subsampling_width == 0 || subsampling_height == 0) {
+    xnn_log_error(
+      "failed to define Convolution operator with %" PRIu32 "x%" PRIu32 " subsampling: "
+      "subsampling dimensions must be non-zero",
+      subsampling_width, subsampling_height);
+    return xnn_status_invalid_parameter;
+  }
+
+  if (dilation_width == 0 || dilation_height == 0) {
+    xnn_log_error(
+      "failed to define Convolution operator with %" PRIu32 "x%" PRIu32 " dilation: "
+      "dilation dimensions must be non-zero",
+      dilation_width, dilation_height);
+    return xnn_status_invalid_parameter;
+  }
+
+  if (groups == 0) {
+    xnn_log_error(
+      "failed to define Convolution operator with %" PRIu32 " groups: number of groups must be non-zero", groups);
+    return xnn_status_invalid_parameter;
+  }
+
+  if (group_input_channels == 0) {
+    xnn_log_error(
+      "failed to define Convolution operator with %zu input channels per group: "
+      "number of channels must be non-zero",
+      group_input_channels);
+    return xnn_status_invalid_parameter;
+  }
+
+  if (group_output_channels == 0) {
+    xnn_log_error(
+      "failed to define Convolution operator with %zu output channels per group: "
+      "number of channels must be non-zero",
+      group_output_channels);
+    return xnn_status_invalid_parameter;
+  }
+
+  if (isnan(output_min)) {
+    xnn_log_error(
+      "failed to define Convolution operator with NaN output lower bound: lower bound must be non-NaN");
+    return xnn_status_invalid_parameter;
+  }
+
+  if (isnan(output_max)) {
+    xnn_log_error(
+      "failed to define Convolution operator with NaN output upper bound: upper bound must be non-NaN");
+    return xnn_status_invalid_parameter;
+  }
+
+  if (output_min >= output_max) {
+    xnn_log_error(
+      "failed to define Convolution operator with [%.7g, %.7g] output range: "
+      "lower bound must be below upper bound",
+      output_min, output_max);
+    return xnn_status_invalid_parameter;
+  }
+
+  if (input_id >= subgraph->num_values) {
+    xnn_log_error(
+      "failed to define Convolution operator with input ID #%" PRIu32 ": invalid Value ID",
+      input_id);
+    return xnn_status_invalid_parameter;
+  }
+
+  if (filter_id >= subgraph->num_values) {
+    xnn_log_error(
+      "failed to define Convolution operator with filter ID #%" PRIu32 ": invalid Value ID",
+      filter_id);
+    return xnn_status_invalid_parameter;
+  }
+
+  if (bias_id >= subgraph->num_values) {
+    xnn_log_error(
+      "failed to define Convolution operator with bias ID #%" PRIu32 ": invalid Value ID",
+      bias_id);
+    return xnn_status_invalid_parameter;
+  }
+
+  if (output_id >= subgraph->num_values) {
+    xnn_log_error(
+      "failed to define Convolution operator with output ID #%" PRIu32 ": invalid Value ID",
+      output_id);
+    return xnn_status_invalid_parameter;
+  }
+
+  struct xnn_node* node = xnn_subgraph_new_node(subgraph);
+  if (node == NULL) {
+    return xnn_status_out_of_memory;
+  }
+
+  node->type = xnn_node_type_convolution_2d;
+  node->params.convolution_2d.input_padding_top = input_padding_top;
+  node->params.convolution_2d.input_padding_right = input_padding_right;
+  node->params.convolution_2d.input_padding_bottom = input_padding_bottom;
+  node->params.convolution_2d.input_padding_left = input_padding_left;
+  node->params.convolution_2d.kernel_height = kernel_height;
+  node->params.convolution_2d.kernel_width = kernel_width;
+  node->params.convolution_2d.subsampling_height = subsampling_height;
+  node->params.convolution_2d.subsampling_width = subsampling_width;
+  node->params.convolution_2d.dilation_height = dilation_height;
+  node->params.convolution_2d.dilation_width = dilation_width;
+  node->params.convolution_2d.groups = groups;
+  node->params.convolution_2d.group_input_channels = group_input_channels;
+  node->params.convolution_2d.group_output_channels = group_output_channels;
+  node->params.convolution_2d.output_min = output_min;
+  node->params.convolution_2d.output_max = output_max;
+  node->num_inputs = 3;
+  node->inputs.raw[0] = input_id;
+  node->inputs.raw[1] = filter_id;
+  node->inputs.raw[2] = bias_id;
+  node->num_outputs = 1;
+  node->outputs.raw[0] = output_id;
+  node->flags = flags;
+
+  return xnn_status_success;
+};
+
+enum xnn_status xnn_define_depthwise_convolution_2d(
+  xnn_subgraph_t subgraph,
+  uint32_t input_padding_top,
+  uint32_t input_padding_right,
+  uint32_t input_padding_bottom,
+  uint32_t input_padding_left,
+  uint32_t kernel_height,
+  uint32_t kernel_width,
+  uint32_t subsampling_height,
+  uint32_t subsampling_width,
+  uint32_t dilation_height,
+  uint32_t dilation_width,
+  uint32_t depth_multiplier,
+  size_t input_channels,
+  float output_min,
+  float output_max,
+  uint32_t input_id,
+  uint32_t filter_id,
+  uint32_t bias_id,
+  uint32_t output_id,
+  uint32_t flags)
+{
+  if (!xnn_params.initialized) {
+    xnn_log_error("failed to define Depthwise Convolution operator: XNNPACK is not initialized");
+    return xnn_status_uninitialized;
+  }
+
+  if (kernel_width == 0 || kernel_height == 0) {
+    xnn_log_error(
+      "failed to define Depthwise Convolution operator with %" PRIu32 "x%" PRIu32 " kernel: kernel dimensions must be non-zero",
+      kernel_width, kernel_height);
+    return xnn_status_invalid_parameter;
+  }
+
+  if (subsampling_width == 0 || subsampling_height == 0) {
+    xnn_log_error(
+      "failed to define Depthwise Convolution operator with %" PRIu32 "x%" PRIu32 " subsampling: "
+      "subsampling dimensions must be non-zero",
+      subsampling_width, subsampling_height);
+    return xnn_status_invalid_parameter;
+  }
+
+  if (dilation_width == 0 || dilation_height == 0) {
+    xnn_log_error(
+      "failed to define Depthwise Convolution operator with %" PRIu32 "x%" PRIu32 " dilation: "
+      "dilation dimensions must be non-zero",
+      dilation_width, dilation_height);
+    return xnn_status_invalid_parameter;
+  }
+
+  if (depth_multiplier == 0) {
+    xnn_log_error(
+      "failed to define Depthwise Convolution operator with %" PRIu32 " depth multiplier: "
+      "depth multiplier must be non-zero",
+      depth_multiplier);
+    return xnn_status_invalid_parameter;
+  }
+
+  if (input_channels == 0) {
+    xnn_log_error(
+      "failed to define Depthwise Convolution operator with %zu input channels: "
+      "number of channels must be non-zero",
+      input_channels);
+    return xnn_status_invalid_parameter;
+  }
+
+  if (isnan(output_min)) {
+    xnn_log_error(
+      "failed to define Depthwise Convolution operator with NaN output lower bound: lower bound must be non-NaN");
+    return xnn_status_invalid_parameter;
+  }
+
+  if (isnan(output_max)) {
+    xnn_log_error(
+      "failed to define Depthwise Convolution operator with NaN output upper bound: upper bound must be non-NaN");
+    return xnn_status_invalid_parameter;
+  }
+
+  if (output_min >= output_max) {
+    xnn_log_error(
+      "failed to define Depthwise Convolution operator with [%.7g, %.7g] output range: "
+      "lower bound must be below upper bound",
+      output_min, output_max);
+    return xnn_status_invalid_parameter;
+  }
+
+  if (input_id >= subgraph->num_values) {
+    xnn_log_error(
+      "failed to define Depthwise Convolution operator with input ID #%" PRIu32 ": invalid Value ID",
+      input_id);
+    return xnn_status_invalid_parameter;
+  }
+
+  if (filter_id >= subgraph->num_values) {
+    xnn_log_error(
+      "failed to define Depthwise Convolution operator with filter ID #%" PRIu32 ": invalid Value ID",
+      filter_id);
+    return xnn_status_invalid_parameter;
+  }
+
+  if (bias_id >= subgraph->num_values) {
+    xnn_log_error(
+      "failed to define Depthwise Convolution operator with bias ID #%" PRIu32 ": invalid Value ID",
+      bias_id);
+    return xnn_status_invalid_parameter;
+  }
+
+  if (output_id >= subgraph->num_values) {
+    xnn_log_error(
+      "failed to define Depthwise Convolution operator with output ID #%" PRIu32 ": invalid Value ID",
+      output_id);
+    return xnn_status_invalid_parameter;
+  }
+
+  struct xnn_node* node = xnn_subgraph_new_node(subgraph);
+  if (node == NULL) {
+    return xnn_status_out_of_memory;
+  }
+
+  node->type = xnn_node_type_depthwise_convolution_2d;
+  node->params.depthwise_convolution_2d.input_padding_top = input_padding_top;
+  node->params.depthwise_convolution_2d.input_padding_right = input_padding_right;
+  node->params.depthwise_convolution_2d.input_padding_bottom = input_padding_bottom;
+  node->params.depthwise_convolution_2d.input_padding_left = input_padding_left;
+  node->params.depthwise_convolution_2d.kernel_height = kernel_height;
+  node->params.depthwise_convolution_2d.kernel_width = kernel_width;
+  node->params.depthwise_convolution_2d.subsampling_height = subsampling_height;
+  node->params.depthwise_convolution_2d.subsampling_width = subsampling_width;
+  node->params.depthwise_convolution_2d.dilation_height = dilation_height;
+  node->params.depthwise_convolution_2d.dilation_width = dilation_width;
+  node->params.depthwise_convolution_2d.depth_multiplier = depth_multiplier;
+  node->params.depthwise_convolution_2d.input_channels = input_channels;
+  node->params.depthwise_convolution_2d.output_min = output_min;
+  node->params.depthwise_convolution_2d.output_max = output_max;
+  node->num_inputs = 3;
+  node->inputs.raw[0] = input_id;
+  node->inputs.raw[1] = filter_id;
+  node->inputs.raw[2] = bias_id;
+  node->num_outputs = 1;
+  node->outputs.raw[0] = output_id;
+  node->flags = flags;
+
+  return xnn_status_success;
+};
+
+enum xnn_status xnn_delete_subgraph(
+  xnn_subgraph_t subgraph)
+{
+  if (subgraph != NULL) {
+    memset(subgraph->nodes, 0, sizeof(struct xnn_node) * subgraph->num_nodes);
+    xnn_release_memory(subgraph->nodes);
+
+    memset(subgraph->values, 0, sizeof(struct xnn_value) * subgraph->num_values);
+    xnn_release_memory(subgraph->values);
+
+    memset(subgraph, 0, sizeof(struct xnn_subgraph));
+    xnn_release_memory(subgraph);
+  }
+  return xnn_status_success;
+}
diff --git a/src/tensor.c b/src/tensor.c
new file mode 100644
index 000000000..8730e4502
--- /dev/null
+++ b/src/tensor.c
@@ -0,0 +1,101 @@
+// Copyright 2020 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#include <assert.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+#include <xnnpack.h>
+#include <xnnpack/allocator.h>
+#include <xnnpack/log.h>
+#include <xnnpack/params.h>
+#include <xnnpack/subgraph.h>
+
+
+enum xnn_status xnn_define_tensor_value(
+    xnn_subgraph_t subgraph,
+    enum xnn_datatype datatype,
+    size_t num_dims,
+    const size_t* dims,
+    const void* data,
+    uint32_t external_id,
+    uint32_t flags,
+    uint32_t* id_out)
+{
+  if (!xnn_params.initialized) {
+    xnn_log_error("failed to create Dense Tensor value: XNNPACK is not initialized");
+    return xnn_status_uninitialized;
+  }
+
+  if (external_id != XNN_INVALID_VALUE_ID && external_id >= subgraph->external_value_ids) {
+    xnn_log_error(
+      "failed to create Dense Tensor value: "
+      "external ID %" PRIu32 " exceeds the number of reserved external IDs in subgraph (%" PRIu32 ")",
+      external_id, subgraph->external_value_ids);
+    return xnn_status_invalid_parameter;
+  }
+
+  if (num_dims > XNN_MAX_TENSOR_DIMS) {
+    xnn_log_error("failed to create Dense Tensor value: num of dimensions exceeds XNNPACK limit (%d)",
+      XNN_MAX_TENSOR_DIMS);
+    return xnn_status_unsupported_parameter;
+  }
+
+  switch (datatype) {
+    case xnn_datatype_fp32:
+    case xnn_datatype_fp16:
+      break;
+    default:
+      xnn_log_error("failed to create Dense Tensor value: invalid data type (%d)", datatype);
+      return xnn_status_unsupported_parameter;
+  }
+
+  struct xnn_value* value = subgraph->values + external_id;
+  if (external_id == XNN_INVALID_VALUE_ID) {
+    value = xnn_subgraph_new_internal_value(subgraph);
+    if (value == NULL) {
+      return xnn_status_out_of_memory;
+    }
+  }
+  value->type = xnn_value_type_dense_tensor;
+  value->datatype = datatype;
+  value->shape.num_dims = num_dims;
+  memcpy(value->shape.dim, dims, num_dims * sizeof(size_t));
+  value->flags = flags;
+  value->data = data;
+
+  *id_out = value->id;
+  return xnn_status_success;
+}
+
+size_t xnn_tensor_get_size(
+  xnn_subgraph_t subgraph,
+  uint32_t value_id)
+{
+  assert(value_id < subgraph->num_values);
+
+  const struct xnn_value* value = subgraph->values + value_id;
+  assert(value->type == xnn_value_type_dense_tensor);
+  assert(value->datatype != xnn_datatype_invalid);
+
+  size_t size = 0;
+  switch (value->datatype) {
+    case xnn_datatype_fp16:
+      size = 2;
+      break;
+    case xnn_datatype_fp32:
+      size = 4;
+      break;
+    case xnn_datatype_invalid:
+      XNN_UNREACHABLE;
+  }
+
+  for (size_t i = 0; i < value->shape.num_dims; i++) {
+    size *= value->shape.dim[i];
+  }
+
+  return size;
+}
diff --git a/src/xnnpack/allocator.h b/src/xnnpack/allocator.h
index fdcfce5bf..a39387416 100644
--- a/src/xnnpack/allocator.h
+++ b/src/xnnpack/allocator.h
@@ -25,6 +25,14 @@ inline static void* xnn_allocate_memory(size_t memory_size) {
   return xnn_params.allocator.allocate(xnn_params.allocator.context, memory_size);
 }
 
+inline static void* xnn_allocate_zero_memory(size_t memory_size) {
+  void* memory_pointer = xnn_params.allocator.allocate(xnn_params.allocator.context, memory_size);
+  if (memory_pointer != NULL) {
+    memset(memory_pointer, 0, memory_size);
+  }
+  return memory_pointer;
+}
+
 inline static void* xnn_reallocate_memory(void* memory_pointer, size_t memory_size) {
   return xnn_params.allocator.reallocate(xnn_params.allocator.context, memory_pointer, memory_size);
 }
diff --git a/src/xnnpack/subgraph.h b/src/xnnpack/subgraph.h
new file mode 100644
index 000000000..76fee6215
--- /dev/null
+++ b/src/xnnpack/subgraph.h
@@ -0,0 +1,168 @@
+// Copyright 2020 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#pragma once
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <xnnpack.h>
+
+#define XNN_MAX_INPUTS 3
+#define XNN_MAX_OUTPUTS 1
+
+#define XNN_MAX_RUNTIME_INPUTS 2
+#define XNN_MAX_RUNTIME_OUTPUTS 1
+
+struct xnn_shape {
+  size_t num_dims;
+  size_t dim[XNN_MAX_TENSOR_DIMS];
+};
+
+enum xnn_value_type {
+  xnn_value_type_invalid = 0,
+  xnn_value_type_dense_tensor = 1,
+};
+
+/// Abstraction for a collections of elements produced and consumed by nodes.
+struct xnn_value {
+  /// Unique ID for the value.
+  uint32_t id;
+  /// Type of the collection of elements.
+  ///
+  /// Currently only dense tensors are supported.
+  /// Other types (e.g. sparse tensors) might be supported in the future.
+  enum xnn_value_type type;
+  /// Type of elements in the collection.
+  enum xnn_datatype datatype;
+  /// Tensor shape.
+  struct xnn_shape shape;
+  /// Binary features of the tensor. Supported values are any combination of:
+  /// - XNN_VALUE_FLAG_EXTERNAL_INPUT
+  /// - XNN_VALUE_FLAG_EXTERNAL_OUTPUT
+  uint32_t flags;
+  /// Static initialization data. Must be null for non-static values.
+  const void* data;
+};
+
+struct xnn_blob {
+  /// Size in bytes.
+  size_t size;
+  /// Data pointer.
+  void* data;
+  bool external;
+};
+
+enum xnn_node_type {
+  xnn_node_type_invalid = 0,
+  xnn_node_type_convolution_2d,
+  xnn_node_type_depthwise_convolution_2d,
+};
+
+struct xnn_node {
+  enum xnn_node_type type;
+  uint32_t id;
+  /// Static parameters of the operator node.
+  union {
+    struct {
+      uint32_t input_padding_top;
+      uint32_t input_padding_right;
+      uint32_t input_padding_bottom;
+      uint32_t input_padding_left;
+      uint32_t kernel_height;
+      uint32_t kernel_width;
+      uint32_t subsampling_height;
+      uint32_t subsampling_width;
+      uint32_t dilation_height;
+      uint32_t dilation_width;
+      uint32_t groups;
+      size_t group_input_channels;
+      size_t group_output_channels;
+      float output_min;
+      float output_max;
+    } convolution_2d;
+    struct {
+      uint32_t input_padding_top;
+      uint32_t input_padding_right;
+      uint32_t input_padding_bottom;
+      uint32_t input_padding_left;
+      uint32_t kernel_height;
+      uint32_t kernel_width;
+      uint32_t subsampling_height;
+      uint32_t subsampling_width;
+      uint32_t dilation_height;
+      uint32_t dilation_width;
+      uint32_t depth_multiplier;
+      size_t input_channels;
+      float output_min;
+      float output_max;
+    } depthwise_convolution_2d;
+  } params;
+  /// Value IDs for node inputs.
+  union {
+    uint32_t raw[XNN_MAX_INPUTS];
+    struct {
+      uint32_t input;
+      uint32_t filter;
+      uint32_t bias;
+    } convolution_2d;
+  } inputs;
+  uint32_t num_inputs;
+  /// Value IDs for node outputs.
+  union {
+    struct {
+      uint32_t output;
+    } convolution_2d;
+    uint32_t raw[XNN_MAX_OUTPUTS];
+  } outputs;
+  uint32_t num_outputs;
+  uint32_t flags;
+};
+
+struct xnn_operator_data {
+  xnn_operator_t op;
+  size_t batch_size;
+  size_t input_height;
+  size_t input_width;
+  uint32_t inputs[XNN_MAX_RUNTIME_INPUTS];
+  uint32_t outputs[XNN_MAX_RUNTIME_OUTPUTS];
+};
+
+struct xnn_subgraph {
+  /// Number of Value IDs reserved for communication with external graph representation.
+  /// Values created during subgraph transformation avoid using IDs in [0, reserved_value_ids-1] range.
+  uint32_t external_value_ids;
+
+  uint32_t num_reserved_values;
+  uint32_t num_values;
+  struct xnn_value* values;
+
+  uint32_t num_reserved_nodes;
+  uint32_t num_nodes;
+  struct xnn_node* nodes;
+};
+
+/// Runtime is a combination of an execution plan for subgraph Nodes and a memory manager for subgraph Values.
+struct xnn_runtime {
+  uint32_t num_external_values;
+
+  /// List of operators in the execution plan, in execution order.
+  struct xnn_operator_data* ops;
+  /// Number of operators in the execution plan.
+  size_t num_ops;
+
+  struct xnn_blob* blobs;
+  size_t num_blobs;
+
+  void* workspace;
+};
+
+struct xnn_value* xnn_subgraph_new_internal_value(xnn_subgraph_t subgraph);
+
+struct xnn_node* xnn_subgraph_new_node(xnn_subgraph_t subgraph);
+
+size_t xnn_tensor_get_size(
+  xnn_subgraph_t subgraph,
+  uint32_t value_id);