Add 1.3 support to TestPartitioning.cpp

Bug: 141704706 Test: NeuralNetworksTest_static on coral Change-Id: Icd2ee06877f6790a9dd610c501908dc110959972
author: Przemysław Szczepaniak <pszczepaniak@google.com> 2020-05-05 18:39:17 +0100
committer: Przemyslaw Szczepaniak <pszczepaniak@google.com> 2020-05-11 15:27:49 +0000
commit: ef9c23ee28a3f36e494b3e9fc2aeba1cc284ae3c (patch)
tree: 09f1631b1a1972d3f64e68ea999c508ffe2f665c /nn
parent: 6a8e397336cb494c488951183cc2d591deb8fe13 (diff)
download: ml-ef9c23ee28a3f36e494b3e9fc2aeba1cc284ae3c.tar.gz
1 files changed, 96 insertions, 28 deletions
diff --git a/nn/runtime/test/TestPartitioning.cpp b/nn/runtime/test/TestPartitioning.cpp
index 3bde4cf90..7b4205ac8 100644
--- a/nn/runtime/test/TestPartitioning.cpp
+++ b/nn/runtime/test/TestPartitioning.cpp
@@ -86,6 +86,13 @@
 //     MINIMUM, POW, or PRELU.  These operations take no activation
 //     function, so we only get 4 operation kinds, for which we
 //     use operation encodings 16..19.
+// - There is another collection of operations (each of which has one inpus
+//   and one output):
+//   - Single operation available at driver version V1_3 or
+//     later.  It is represented in the graph as HARD_SWISH.
+//     These operations take no activation function, for which we
+//     use operation encodings 20..20.
+
 // When we instantiate a device for testing purposes, we specify what subset of
 // those operations the device is able to execute.
 //
@@ -204,6 +211,11 @@ const uint32_t kFirstEncodingPRELU = kFirstEncodingPOW + 1;
 const uint32_t kFirstEncodingV1_2 = kFirstEncodingMAXIMUM;
 const uint32_t kLastEncodingV1_2 = kFirstEncodingPRELU;
 
+// V1_3 operations
+const uint32_t kFirstEncodingHARD_SWISH = kLastEncodingV1_2 + 1;
+const uint32_t kFirstEncodingV1_3 = kFirstEncodingHARD_SWISH;
+const uint32_t kLastEncodingV1_3 = kFirstEncodingHARD_SWISH;
+
 const std::map<OperationType, uint32_t> operationToFirstEncoding = {
         {OperationType::ADD, kFirstEncodingADD},
         {OperationType::MUL, kFirstEncodingMUL},
@@ -213,6 +225,7 @@ const std::map<OperationType, uint32_t> operationToFirstEncoding = {
         {OperationType::MINIMUM, kFirstEncodingMINIMUM},
         {OperationType::POW, kFirstEncodingPOW},
         {OperationType::PRELU, kFirstEncodingPRELU},
+        {OperationType::HARD_SWISH, kFirstEncodingHARD_SWISH},
 };
 
 // Sorted in reverse order (std::greater) so that we can use map::lower_bound to
@@ -227,6 +240,7 @@ const std::map<uint32_t, std::pair<uint32_t, bool>, std::greater<>> firstEncodin
         {kFirstEncodingMINIMUM, {ANEURALNETWORKS_MINIMUM, false}},
         {kFirstEncodingPOW, {ANEURALNETWORKS_POW, false}},
         {kFirstEncodingPRELU, {ANEURALNETWORKS_PRELU, false}},
+        {kFirstEncodingHARD_SWISH, {ANEURALNETWORKS_HARD_SWISH, false}},
 };
 
 // Look up the operation with the specified index in a graph, and return the
@@ -664,6 +678,16 @@ class PartitioningModel : private WrapperModel {
         return addOperation2To1(operation + kFirstEncodingV1_2, input0, input1, dimensionedOutput);
     }
 
+    // Create a V1_3 operation with two inputs and one output, specifying the
+    // operation kind (where 0 is the first V1_3 operation) and the input
+    // operand indexes.
+    // Returns the output operand index.
+    uint32_t addOperation1To1V1_3(uint32_t operation, const uint32_t input0,
+                                  Dimensioned dimensionedOutput = Dimensioned::YES) {
+        CHECK_LE(operation, kLastEncodingV1_3 - kFirstEncodingV1_3);
+        return addOperation1To1(operation + kFirstEncodingV1_3, input0, dimensionedOutput);
+    }
+
     // Create an OEM operation with one input and one output,
     // specifying the input operand index.  Returns the output operand
     // index.
@@ -725,6 +749,20 @@ class PartitioningModel : private WrapperModel {
         }
     }
 
+    // Create an operation with one inputs and one output, specifying
+    // the operation kind and the input operand indexes.
+    // Returns the output operand index.
+    uint32_t addOperation1To1(uint32_t operation, const uint32_t input0,
+                              Dimensioned dimensionedOutput = Dimensioned::YES) {
+        auto it = firstEncodingToOperation.lower_bound(operation);
+        CHECK(it != firstEncodingToOperation.end());
+        ANeuralNetworksOperationType type = it->second.first;
+
+        uint32_t output = addOperandOfSameType(input0, dimensionedOutput);
+        addOperation(type, {input0}, {output});
+        return output;
+    }
+
     // Create a scalar integer operand of the specified value, and
     // return the corresponding operand index.
     uint32_t addIntOperand(int32_t value) {
@@ -850,10 +888,11 @@ class PartitioningTest : public ::testing::Test {
         }
         DeviceSpecification(const std::string& name, float perf, HalVersion halVersion,
                             uint32_t operationMaskV1_0, uint32_t operationMaskV1_1 = 0,
-                            uint32_t operationMaskV1_2 = 0)
-            : DeviceSpecification(name, perf, perf,
-                                  makeOperationMask(halVersion, operationMaskV1_0,
-                                                    operationMaskV1_1, operationMaskV1_2)) {
+                            uint32_t operationMaskV1_2 = 0, uint32_t operationMaskV1_3 = 0)
+            : DeviceSpecification(
+                      name, perf, perf,
+                      makeOperationMask(halVersion, operationMaskV1_0, operationMaskV1_1,
+                                        operationMaskV1_2, operationMaskV1_3)) {
             mHalVersion = halVersion;
         }
 
@@ -886,7 +925,11 @@ class PartitioningTest : public ::testing::Test {
         // This is used by a DeviceSpecification constructor to build a mask of
         // operations to be supported by the device.
         static uint32_t makeOperationMask(HalVersion halVersion, uint32_t operationMaskV1_0,
-                                          uint32_t operationMaskV1_1, uint32_t operationMaskV1_2) {
+                                          uint32_t operationMaskV1_1, uint32_t operationMaskV1_2,
+                                          uint32_t operationMaskV1_3) {
+            if (halVersion < HalVersion::V1_3) {
+                CHECK(!operationMaskV1_3);
+            }
             if (halVersion < HalVersion::V1_2) {
                 CHECK(!operationMaskV1_2);
             }
@@ -900,9 +943,12 @@ class PartitioningTest : public ::testing::Test {
                     maskOfWidth(kLastEncodingV1_1 - kFirstEncodingV1_1 + 1);
             static const uint32_t kOperationMaskV1_2 =
                     maskOfWidth(kLastEncodingV1_2 - kFirstEncodingV1_2 + 1);
+            static const uint32_t kOperationMaskV1_3 =
+                    maskOfWidth(kLastEncodingV1_3 - kFirstEncodingV1_3 + 1);
             return ((operationMaskV1_0 & kOperationMaskV1_0) << kFirstEncodingV1_0) |
                    ((operationMaskV1_1 & kOperationMaskV1_1) << kFirstEncodingV1_1) |
-                   ((operationMaskV1_2 & kOperationMaskV1_2) << kFirstEncodingV1_2);
+                   ((operationMaskV1_2 & kOperationMaskV1_2) << kFirstEncodingV1_2) |
+                   ((operationMaskV1_3 & kOperationMaskV1_3) << kFirstEncodingV1_3);
         }
     };
     static std::vector<std::shared_ptr<Device>> makeDevices(
@@ -1394,36 +1440,39 @@ TEST_F(PartitioningTest, SliceModel) {
     uint32_t opnd3 = model.addOperation2To1V1_0(1, opnd0, opnd1);
     uint32_t opnd4 = model.addOperation2To1V1_1(0, opnd0, opnd1);
     uint32_t opnd5 = model.addOperation2To1V1_2(0, opnd2, opnd3);
-    model.identifyInputsAndOutputs({opnd0, opnd1}, {opnd2, opnd4, opnd5});
+    uint32_t opnd6 = model.addOperation1To1V1_3(0, opnd2);
+    model.identifyInputsAndOutputs({opnd0, opnd1}, {opnd2, opnd4, opnd5, opnd6});
     model.finish();
     ASSERT_TRUE(model.isValid());
 
-    // Simple partition (V1_0, V1_1, V1_2 devices are available; V1_2 has best perf).
+    // Simple partition (V1_0, V1_1, V1_2, V1_3 devices are available; V1_3 has best perf).
     // No need to compare the original model to the model from the plan -- we
     // didn't actually do any partitioning.
     const auto devicesA = makeDevices({{"V1_0", 0.8, HalVersion::V1_0, ~0U},
                                        {"V1_1", 0.7, HalVersion::V1_1, ~0U, ~0U},
-                                       {"V1_2", 0.6, HalVersion::V1_2, ~0U, ~0U, ~0U}});
+                                       {"V1_2", 0.6, HalVersion::V1_2, ~0U, ~0U, ~0U},
+                                       {"V1_3", 0.5, HalVersion::V1_3, ~0U, ~0U, ~0U, ~0U}});
     ExecutionPlan planA;
     ASSERT_EQ(model.partitionTheWork(devicesA, ExecutePreference::PREFER_LOW_POWER,
                                      ExecutePriority::DEFAULT, {}, &planA),
               ANEURALNETWORKS_NO_ERROR);
     ASSERT_EQ(planA.forTest_getKind(), ExecutionPlan::Kind::SIMPLE);
     ASSERT_NE(planA.forTest_simpleGetDevice().get(), nullptr);
-    ASSERT_EQ(planA.forTest_simpleGetDevice()->getName(), "V1_2");
+    ASSERT_EQ(planA.forTest_simpleGetDevice()->getName(), "V1_3");
 
     // Compound partition (V1_0, V1_1, V1_2 devices are available, in decreasing
     // order of performance; model is distributed across all three devices).
     const auto devicesB = makeDevices({{"V1_0", 0.6, HalVersion::V1_0, ~0U},
                                        {"V1_1", 0.7, HalVersion::V1_1, ~0U, ~0U},
-                                       {"V1_2", 0.8, HalVersion::V1_2, ~0U, ~0U, ~0U}});
+                                       {"V1_2", 0.8, HalVersion::V1_2, ~0U, ~0U, ~0U},
+                                       {"V1_3", 0.9, HalVersion::V1_3, ~0U, ~0U, ~0U, ~0U}});
     ExecutionPlan planB;
     ASSERT_EQ(model.partitionTheWork(devicesB, ExecutePreference::PREFER_LOW_POWER,
                                      ExecutePriority::DEFAULT, {}, &planB),
               ANEURALNETWORKS_NO_ERROR);
     ASSERT_EQ(planB.forTest_getKind(), ExecutionPlan::Kind::COMPOUND);
     const auto& stepsB = planB.forTest_compoundGetSteps();
-    ASSERT_EQ(stepsB.size(), size_t(3));
+    ASSERT_EQ(stepsB.size(), size_t(4));
     {
         // Build a model to compare against the step model from stepsB[0].
         PartitioningModel modelB0;
@@ -1465,25 +1514,44 @@ TEST_F(PartitioningTest, SliceModel) {
         // Build a model to compare against the step model from stepsB[2].
         PartitioningModel modelB2;
         uint32_t b2Opnd0 = modelB2.addFloatOperand();
-        uint32_t b2Opnd1 = modelB2.addFloatOperand();
-        uint32_t b2Opnd2 = modelB2.addOperation2To1V1_2(0, b2Opnd0, b2Opnd1);
+        uint32_t b2Opnd1 = modelB2.addOperation1To1V1_3(0, b2Opnd0);
         // Note: In the partitioning algorithm, temps that are
         // step model inputs precede model outputs that are step model
-        // inputs.  In the original model "model", opnd3 is a temp and
-        // opnd2 is a model output; so in the step model "modelB2", the
-        // corresponding inputs b2Opnd1 and b2Opnd0 must appear in
-        // that order.
-        modelB2.identifyInputsAndOutputs({b2Opnd1, b2Opnd0}, {b2Opnd2});
+        // inputs.
+        modelB2.identifyInputsAndOutputs({b2Opnd0}, {b2Opnd1});
         modelB2.finish();
         ASSERT_TRUE(modelB2.isValid());
 
         ASSERT_NO_FATAL_FAILURE(
-                compare(stepsB[2], &modelB2, devicesB[2], RemapVectorType{},  // modelInputs
-                        RemapVectorType{{opnd5, b2Opnd2}},                    // modelOutputs
-                        RemapVectorType{{opnd3, b2Opnd1}},    // tempsAsStepModelInputs
+                compare(stepsB[2], &modelB2, devicesB[3], RemapVectorType{},  // modelInputs
+                        RemapVectorType{{opnd6, b2Opnd1}},                    // modelOutputs
+                        RemapVectorType{},                    // tempsAsStepModelInputs
                         StepModelOutputSetType{},             // tempsAsStepModelOutputs
                         RemapVectorType{{opnd2, b2Opnd0}}));  // outputsAsStepModelInputs
     }
+    {
+        // Build a model to compare against the step model from stepsB[3].
+        PartitioningModel modelB3;
+        uint32_t b3Opnd0 = modelB3.addFloatOperand();
+        uint32_t b3Opnd1 = modelB3.addFloatOperand();
+        uint32_t b3Opnd2 = modelB3.addOperation2To1V1_2(0, b3Opnd0, b3Opnd1);
+        // Note: In the partitioning algorithm, temps that are
+        // step model inputs precede model outputs that are step model
+        // inputs.  In the original model "model", opnd3 is a temp and
+        // opnd2 is a model output; so in the step model "modelB3", the
+        // corresponding inputs b3Opnd1 and b3Opnd0 must appear in
+        // that order.
+        modelB3.identifyInputsAndOutputs({b3Opnd1, b3Opnd0}, {b3Opnd2});
+        modelB3.finish();
+        ASSERT_TRUE(modelB3.isValid());
+
+        ASSERT_NO_FATAL_FAILURE(
+                compare(stepsB[3], &modelB3, devicesB[2], RemapVectorType{},  // modelInputs
+                        RemapVectorType{{opnd5, b3Opnd2}},                    // modelOutputs
+                        RemapVectorType{{opnd3, b3Opnd1}},    // tempsAsStepModelInputs
+                        StepModelOutputSetType{},             // tempsAsStepModelOutputs
+                        RemapVectorType{{opnd2, b3Opnd0}}));  // outputsAsStepModelInputs
+    }
 
     // TODO: Make sure this still works when we have multiple devices
     // of same version available for slicing. An easy (?) choice would
@@ -1494,25 +1562,25 @@ TEST_F(PartitioningTest, SliceModel) {
 TEST_F(PartitioningTest, SliceModelToEmpty) {
     PartitioningModel model;
     uint32_t opnd0 = model.addFloatOperand();
-    uint32_t opnd1 = model.addFloatOperand();
-    uint32_t opnd2 = model.addOperation2To1V1_2(0, opnd0, opnd1);
-    model.identifyInputsAndOutputs({opnd0, opnd1}, {opnd2});
+    uint32_t opnd1 = model.addOperation1To1V1_3(0, opnd0);
+    model.identifyInputsAndOutputs({opnd0}, {opnd1});
     model.finish();
     ASSERT_TRUE(model.isValid());
 
-    // Only the V1_2 device can handle any operations in the model.
+    // Only the V1_3 device can handle any operations in the model.
     // No need to compare the original model to the model from the plan -- we
     // didn't actually do any partitioning.
     const auto devices = makeDevices({{"V1_0", 0.6, HalVersion::V1_0, ~0U},
                                       {"V1_1", 0.7, HalVersion::V1_1, ~0U, ~0U},
-                                      {"V1_2", 0.8, HalVersion::V1_2, ~0U, ~0U, ~0U}});
+                                      {"V1_2", 0.8, HalVersion::V1_2, ~0U, ~0U, ~0U},
+                                      {"V1_3", 0.9, HalVersion::V1_3, ~0U, ~0U, ~0U, ~0U}});
     ExecutionPlan plan;
     ASSERT_EQ(model.partitionTheWork(devices, ExecutePreference::PREFER_LOW_POWER,
                                      ExecutePriority::DEFAULT, {}, &plan),
               ANEURALNETWORKS_NO_ERROR);
     ASSERT_EQ(plan.forTest_getKind(), ExecutionPlan::Kind::SIMPLE);
     ASSERT_NE(plan.forTest_simpleGetDevice().get(), nullptr);
-    ASSERT_EQ(plan.forTest_simpleGetDevice()->getName(), "V1_2");
+    ASSERT_EQ(plan.forTest_simpleGetDevice()->getName(), "V1_3");
 }
 
 TEST_F(PartitioningTest, Cpu) {
author	Przemysław Szczepaniak <pszczepaniak@google.com>	2020-05-05 18:39:17 +0100
committer	Przemyslaw Szczepaniak <pszczepaniak@google.com>	2020-05-11 15:27:49 +0000
commit	ef9c23ee28a3f36e494b3e9fc2aeba1cc284ae3c (patch)
tree	09f1631b1a1972d3f64e68ea999c508ffe2f665c /nn
parent	6a8e397336cb494c488951183cc2d591deb8fe13 (diff)
download	ml-ef9c23ee28a3f36e494b3e9fc2aeba1cc284ae3c.tar.gz