Make fully_connected op use reference implementation in certain cases.

- The optimized implementation will generate incorrect results when batch_size > 1 and the number of input elements is the square of batch_size. - This change makes fully_connected fallback to reference impelementation in this cases. - The bug only affects float32 path. Bug: 80425683 Bug: 109805089 Test: mma Test: NeuralNetworksTest_static --gtest_filter=*fully* Merged-In: Ib1aa9fb47e536c64756b4ff3904def5bb4fe0f48 Change-Id: Ib1aa9fb47e536c64756b4ff3904def5bb4fe0f48 (cherry picked from commit c78d7cc2580720eca5c91595b87615a213c0a99d) (cherry picked from commit 5d40f5ebc7e29bd048348de261fc703be55b4db5)
author: Miao Wang <miaowang@google.com> 2018-06-07 13:58:42 -0700
committer: android-build-team Robot <android-build-team-robot@google.com> 2018-06-12 01:48:54 +0000
commit: 829a18d4c706c5562931ad05d1d62c3aac9fb618 (patch)
tree: dab27d8c5172542187a1997f62dbd15922df20ae /nn
parent: 5770d3343d5a149fac43c0b6803c71eeb9b4cf14 (diff)
download: ml-829a18d4c706c5562931ad05d1d62c3aac9fb618.tar.gz
5 files changed, 123 insertions, 7 deletions
diff --git a/nn/common/operations/FullyConnected.cpp b/nn/common/operations/FullyConnected.cpp
index 4d2008d93..4e2deff78 100644
--- a/nn/common/operations/FullyConnected.cpp
+++ b/nn/common/operations/FullyConnected.cpp
@@ -18,6 +18,7 @@
 #include "CpuOperationUtils.h"
 
 #include "tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h"
+#include "tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h"
 
 namespace android {
 namespace nn {
@@ -36,13 +37,25 @@ bool fullyConnectedFloat32(const float* inputData, const Shape& inputShape,
     CalculateActivationRangeFloat(activation, &output_activation_min,
                                   &output_activation_max);
 
-    tflite::optimized_ops::FullyConnected(
-            inputData, convertShapeToDims(inputShape),
-            weightsData, convertShapeToDims(weightsShape),
-            biasData, convertShapeToDims(biasShape),
-            output_activation_min, output_activation_max,
-            outputData, convertShapeToDims(outputShape));
-
+    // b/80425683, optimized implementation produces incorrect results when the
+    // number of input elements is the squre of batch_size.
+    uint32_t batch_size = getSizeOfDimension(outputShape, 0);
+    uint32_t input_n_elements = getNumberOfElements(inputShape);
+    if (batch_size * batch_size == input_n_elements) {
+        tflite::reference_ops::FullyConnected(
+                inputData, convertShapeToDims(inputShape),
+                weightsData, convertShapeToDims(weightsShape),
+                biasData, convertShapeToDims(biasShape),
+                output_activation_min, output_activation_max,
+                outputData, convertShapeToDims(outputShape));
+    } else {
+        tflite::optimized_ops::FullyConnected(
+                inputData, convertShapeToDims(inputShape),
+                weightsData, convertShapeToDims(weightsShape),
+                biasData, convertShapeToDims(biasShape),
+                output_activation_min, output_activation_max,
+                outputData, convertShapeToDims(outputShape));
+    }
     return true;
 }
 
diff --git a/nn/runtime/test/generated/examples/fully_connected_float_3.example.cpp b/nn/runtime/test/generated/examples/fully_connected_float_3.example.cpp
new file mode 100644
index 000000000..14ee46d5f
--- /dev/null
+++ b/nn/runtime/test/generated/examples/fully_connected_float_3.example.cpp
@@ -0,0 +1,22 @@
+// Generated file (from: fully_connected_float_3.mod.py). Do not edit
+// Begin of an example
+{
+//Input(s)
+{ // See tools/test_generator/include/TestHarness.h:MixedTyped
+  // int -> FLOAT32 map
+  {{0, {1, 2, 2, 1}}},
+  // int -> INT32 map
+  {},
+  // int -> QUANT8_ASYMM map
+  {}
+},
+//Output(s)
+{ // See tools/test_generator/include/TestHarness.h:MixedTyped
+  // int -> FLOAT32 map
+  {{0, {11, 9}}},
+  // int -> INT32 map
+  {},
+  // int -> QUANT8_ASYMM map
+  {}
+}
+}, // End of an example
diff --git a/nn/runtime/test/generated/models/fully_connected_float_3.model.cpp b/nn/runtime/test/generated/models/fully_connected_float_3.model.cpp
new file mode 100644
index 000000000..15275251f
--- /dev/null
+++ b/nn/runtime/test/generated/models/fully_connected_float_3.model.cpp
@@ -0,0 +1,32 @@
+// Generated file (from: fully_connected_float_3.mod.py). Do not edit
+void CreateModel(Model *model) {
+  OperandType type4(Type::INT32, {});
+  OperandType type1(Type::TENSOR_FLOAT32, {1, 2});
+  OperandType type2(Type::TENSOR_FLOAT32, {1});
+  OperandType type3(Type::TENSOR_FLOAT32, {2, 1});
+  OperandType type0(Type::TENSOR_FLOAT32, {2, 2});
+  // Phase 1, operands
+  auto op1 = model->addOperand(&type0);
+  auto op2 = model->addOperand(&type1);
+  auto b0 = model->addOperand(&type2);
+  auto op3 = model->addOperand(&type3);
+  auto act = model->addOperand(&type4);
+  // Phase 2, operations
+  static float op2_init[] = {2.0f, 4.0f};
+  model->setOperandValue(op2, op2_init, sizeof(float) * 2);
+  static float b0_init[] = {1.0f};
+  model->setOperandValue(b0, b0_init, sizeof(float) * 1);
+  static int32_t act_init[] = {0};
+  model->setOperandValue(act, act_init, sizeof(int32_t) * 1);
+  model->addOperation(ANEURALNETWORKS_FULLY_CONNECTED, {op1, op2, b0, act}, {op3});
+  // Phase 3, inputs and outputs
+  model->identifyInputsAndOutputs(
+    {op1},
+    {op3});
+  assert(model->isValid());
+}
+
+bool is_ignored(int i) {
+  static std::set<int> ignore = {};
+  return ignore.find(i) != ignore.end();
+}
diff --git a/nn/runtime/test/generated/tests/fully_connected_float_3.mod.py.cpp b/nn/runtime/test/generated/tests/fully_connected_float_3.mod.py.cpp
new file mode 100644
index 000000000..eef737ef1
--- /dev/null
+++ b/nn/runtime/test/generated/tests/fully_connected_float_3.mod.py.cpp
@@ -0,0 +1,17 @@
+// DO NOT EDIT;
+// Generated by ml/nn/runtime/test/specs/generate_test.sh
+#include "../../TestGenerated.h"
+
+namespace fully_connected_float_3 {
+std::vector<MixedTypedExample> examples = {
+// Generated fully_connected_float_3 test
+#include "generated/examples/fully_connected_float_3.example.cpp"
+};
+// Generated model constructor
+#include "generated/models/fully_connected_float_3.model.cpp"
+} // namespace fully_connected_float_3
+TEST_F(GeneratedTests, fully_connected_float_3) {
+    execute(fully_connected_float_3::CreateModel,
+            fully_connected_float_3::is_ignored,
+            fully_connected_float_3::examples);
+}
diff --git a/nn/runtime/test/specs/V1_0/fully_connected_float_3.mod.py b/nn/runtime/test/specs/V1_0/fully_connected_float_3.mod.py
new file mode 100644
index 000000000..804f81266
--- /dev/null
+++ b/nn/runtime/test/specs/V1_0/fully_connected_float_3.mod.py
@@ -0,0 +1,32 @@
+#
+# Copyright (C) 2018 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+model = Model()
+in0 = Input("op1", "TENSOR_FLOAT32", "{2, 2}")
+weights = Parameter("op2", "TENSOR_FLOAT32", "{1, 2}", [2, 4])
+bias = Parameter("b0", "TENSOR_FLOAT32", "{1}", [1])
+out0 = Output("op3", "TENSOR_FLOAT32", "{2, 1}")
+act = Int32Scalar("act", 0)
+model = model.Operation("FULLY_CONNECTED", in0, weights, bias, act).To(out0)
+
+# Example 1. Input in operand 0,
+input0 = {in0: # input 0
+          [1, 2, 2, 1]}
+output0 = {out0: # output 0
+               [11, 9]}
+
+# Instantiate an example
+Example((input0, output0))
author	Miao Wang <miaowang@google.com>	2018-06-07 13:58:42 -0700
committer	android-build-team Robot <android-build-team-robot@google.com>	2018-06-12 01:48:54 +0000
commit	829a18d4c706c5562931ad05d1d62c3aac9fb618 (patch)
tree	dab27d8c5172542187a1997f62dbd15922df20ae /nn
parent	5770d3343d5a149fac43c0b6803c71eeb9b4cf14 (diff)
download	ml-829a18d4c706c5562931ad05d1d62c3aac9fb618.tar.gz