Unknown dimension tests for partitioning and hvx

More tests for unknown dimensions: - more extensive tests for partitioning - change existing unknown dimension test to use quantized tensors so that it uses the hvx driver when available Added logging to device selection to make it easier to debug why hvx was not getting chosen. TODO: parametrize unknown dimensions test for both floats and quantized tensors so that it can be used to test other drivers as well Test: NeuralNetworksTest_static Bug: 72448000 Bug: 77234888 Change-Id: Id9768cbd871a04d968627411cff90cfe6e73d84a
author: Mika Raento <mikie@google.com> 2018-04-26 18:03:42 +0100
committer: Mika Raento <mikie@google.com> 2018-05-08 20:03:22 +0100
commit: bb255b6e87dc343eb90dec998be1cf153106ab65 (patch)
tree: 655a670cf5236fb68176fc8e3d8c144cc3919e31
parent: 35e4e2c28a589de70ced656c18df14d6d899237f (diff)
download: ml-bb255b6e87dc343eb90dec998be1cf153106ab65.tar.gz
5 files changed, 92 insertions, 30 deletions
diff --git a/nn/runtime/ExecutionPlan.cpp b/nn/runtime/ExecutionPlan.cpp
index 384de7840..c8f755069 100644
--- a/nn/runtime/ExecutionPlan.cpp
+++ b/nn/runtime/ExecutionPlan.cpp
@@ -749,6 +749,10 @@ const std::vector<std::shared_ptr<ExecutionStep>>& ExecutionPlan::forTest_compou
     return compound()->mSteps;
 }
 
+bool ExecutionPlan::forTest_hasSubModelOutputsOfUnknownSize() const {
+    return mBody->hasSubModelOutputsOfUnknownSize();
+}
+
 void ExecutionPlan::SimpleBody::dump() const {
     VLOG(COMPILATION) << "SIMPLE for " << (mDevice == nullptr ? "CPU" : mDevice->getName());
 }
@@ -944,8 +948,8 @@ int ModelBuilder::findBestDeviceForEachOperation(
         int bestChoice = -1;
         float bestPerfVal = 0.0;  // Do not check bestPerfVal if bestChoice < 0.
         for (size_t deviceIndex = 0; deviceIndex < nonCpuDeviceCount; deviceIndex++) {
+            const auto& device = devices[deviceIndex];
             if (canDo[deviceIndex].check(operationIndex)) {
-                const auto& device = devices[deviceIndex];
                 const PerformanceInfo perf = getPerformanceInfo(device, operationIndex);
                 const float perfVal =
                             (preference == ANEURALNETWORKS_PREFER_LOW_POWER ? perf.powerUsage
@@ -954,6 +958,15 @@ int ModelBuilder::findBestDeviceForEachOperation(
                     bestChoice = deviceIndex;
                     bestPerfVal = perfVal;
                 }
+            } else {
+                // Somewhat noisy logging, but only place where the user of
+                // NNAPI can get feedback on why an operation was not run on a
+                // specific device.
+                // Logs O(operationCount * nonCpuDeviceCount) times, but
+                // typically nonCpuDeviceCount is very small.
+                VLOG(COMPILATION) << "Device " << device->getName()
+                                  << " can't do operation "
+                                  << toString(getOperation(operationIndex).type);
             }
         }
         // If it's the OEM op, we'd better have a device able to do it.
diff --git a/nn/runtime/ExecutionPlan.h b/nn/runtime/ExecutionPlan.h
index 843447275..a2d018c0e 100644
--- a/nn/runtime/ExecutionPlan.h
+++ b/nn/runtime/ExecutionPlan.h
@@ -220,6 +220,7 @@ public:
     Kind forTest_getKind() const;
     std::shared_ptr<const Device> forTest_simpleGetDevice() const;
     const std::vector<std::shared_ptr<ExecutionStep>>& forTest_compoundGetSteps() const;
+    bool forTest_hasSubModelOutputsOfUnknownSize() const;
 
 private:
     void findTempsAsSubModelOutputs();
@@ -228,6 +229,7 @@ private:
         virtual ~Body() {}
         virtual void dump() const = 0;
         virtual int finish(const ModelBuilder* fromModel, int32_t executionPreference) = 0;
+        virtual bool hasSubModelOutputsOfUnknownSize() const = 0;
         bool mSuccessfulFinish = false;
     };
 
@@ -237,6 +239,7 @@ private:
 
         void dump() const override;
         int finish(const ModelBuilder* fromModel, int32_t executionPreference) override;
+        virtual bool hasSubModelOutputsOfUnknownSize() const override { return false; }
 
         std::shared_ptr<Device> mDevice;  // nullptr signifies CPU
         const ModelBuilder* mModel;
@@ -246,6 +249,9 @@ private:
     struct CompoundBody : Body {
         void dump() const override;
         int finish(const ModelBuilder* fromModel, int32_t executionPreference) override;
+        virtual bool hasSubModelOutputsOfUnknownSize() const override {
+            return mHasSubModelOutputOfUnknownSize;
+        }
 
         // TODO: Some of the data is working state information that
         // shouldn't be needed after we've constructed but not
diff --git a/nn/runtime/test/TestPartitioning.cpp b/nn/runtime/test/TestPartitioning.cpp
index 9b989b716..b653603cb 100644
--- a/nn/runtime/test/TestPartitioning.cpp
+++ b/nn/runtime/test/TestPartitioning.cpp
@@ -1014,6 +1014,7 @@ TEST_F(PartitioningTest, SetPartitioning) {
     PartitioningCompilation cPWithoutFallback(&model);
     ASSERT_EQ(cPWithoutFallback.setPartitioning(DeviceManager::kPartitioningWithoutFallback), Result::NO_ERROR);
     ASSERT_EQ(cPWithoutFallback.finish(devices), Result::OP_FAILED);
+    ASSERT_TRUE(cPWithoutFallback.getExecutionPlan().forTest_hasSubModelOutputsOfUnknownSize());
     ASSERT_EQ(cPWithoutFallback.getExecutionPlan().forTest_getKind(), ExecutionPlan::Kind::ERROR);
 }
 
diff --git a/nn/runtime/test/TestPartitioningRandom.cpp b/nn/runtime/test/TestPartitioningRandom.cpp
index c302b133f..8613a2b81 100644
--- a/nn/runtime/test/TestPartitioningRandom.cpp
+++ b/nn/runtime/test/TestPartitioningRandom.cpp
@@ -58,6 +58,8 @@
 //     data, randomly assign inputs and outputs to CPU memory or to shared
 //     memory.
 //
+//     Randomly leaves dimensions unset for intermediate operands.
+//
 // (2) Randomly generate drivers based on the sample driver, each of which
 //     executes models on the CPU.  They differ according to which operations
 //     they support.
@@ -124,7 +126,7 @@ static const unsigned kMaxProblemSize = 8;
 static const unsigned kFirstSeed = 0;
 
 // Number of test cases.
-static const unsigned kNumTestCases = 200;
+static const unsigned kNumTestCases = 225;
 
 // Force all graph weights into a single pool (as we recommend to users)
 // or allow them to be distributed across multiple pools (more stress
@@ -548,11 +550,13 @@ TEST_P(RandomPartitioningTest, Test) {
 
     const unsigned problemSize = 1+randUInt(kMaxProblemSize);
     const WrapperOperandType problemType(WrapperType::TENSOR_FLOAT32, { problemSize, problemSize });
+    const WrapperOperandType unknownDimensionsType(WrapperType::TENSOR_FLOAT32, { 0, 0 });
 
     static const WrapperOperandType activationFunctionType(WrapperType::INT32, { });
 
     const unsigned numOperations = 2+randUInt(kMaxNumOperations-1);
     const bool allowDeadOperations = (randFrac() < 0.2);
+    const bool allowUnknownDimensions = (randFrac() < 0.25);
 
     // TODO: The current algorithm builds the graph in a forward
     // direction (i.e., later-generated operations consume outputs
@@ -598,6 +602,11 @@ TEST_P(RandomPartitioningTest, Test) {
     // operations).
     unsigned rootOperationCount = 0;
 
+    // Track if we added operands with unknown dimensions. In this case,
+    // partitioned compilation will fail if such an operand is read in a
+    // different partition than it is written.
+    bool hasUnknownDimensions = false;
+
     // Generate operations.
     for (unsigned i = 0; i < numOperations; i++) {
         const unsigned operationPatternIndex =
@@ -788,7 +797,18 @@ TEST_P(RandomPartitioningTest, Test) {
 
         std::vector<uint32_t> operationOutputs(operationPattern.mNumOutputs);
         std::generate(operationOutputs.begin(), operationOutputs.end(),
-                      [&model, &problemType]{ return model.addOperand(&problemType); });
+                      [&model, &problemType, &unknownDimensionsType, &hasUnknownDimensions,
+                       allowUnknownDimensions, this]{
+                          // 3% unknowns causes ~35% of partitionings to fail
+                          // (determined by commenting out the fallback code,
+                          // running tests and noting number of failures).
+                          if (allowUnknownDimensions && randFrac() < 0.03) {
+                              hasUnknownDimensions = true;
+                              return model.addOperand(&unknownDimensionsType);
+                          } else {
+                              return model.addOperand(&problemType);
+                          }
+                      });
 
         // OPERATION ///////////////////////////////////////////////////////////////////////////////
 
@@ -921,15 +941,32 @@ TEST_P(RandomPartitioningTest, Test) {
     }
 
     // Partitioned compilation.
-    TestCompilation c2(&model);
-    ASSERT_EQ(c2.setPartitioning(DeviceManager::kPartitioningWithoutFallback), Result::NO_ERROR);
-    ASSERT_EQ(c2.finish(devices), Result::NO_ERROR);
+    // For test cases without unknown intermediate operand sizes we require the
+    // partitioning to succeed without CPU fallback. With unknown sizes we
+    // retry with a fallback if the non-fallback partitioning fails and require
+    // the fallback to succeed.
+    TestCompilation cNoFallback(&model);
+    TestCompilation cWithFallback(&model);
+    TestCompilation *c2 = nullptr;
+    ASSERT_EQ(cNoFallback.setPartitioning(DeviceManager::kPartitioningWithoutFallback),
+              Result::NO_ERROR);
+    auto compilationResult = cNoFallback.finish(devices);
+    if (hasUnknownDimensions && compilationResult == Result::OP_FAILED &&
+        cNoFallback.getExecutionPlan().forTest_hasSubModelOutputsOfUnknownSize()) {
+        ASSERT_EQ(cWithFallback.setPartitioning(DeviceManager::kPartitioningWithFallback),
+                  Result::NO_ERROR);
+        ASSERT_EQ(cWithFallback.finish(devices), Result::NO_ERROR);
+        c2 = &cWithFallback;
+    } else {
+        ASSERT_EQ(compilationResult, Result::NO_ERROR);
+        c2 = &cNoFallback;
+    }
 
 #ifdef VERBOSE
     {
         std::cout << "signatures = " << signatures.size()
                   << ", devices = " << devices.size() << std::endl;
-        const ExecutionPlan& plan = c2.getExecutionPlan();
+        const ExecutionPlan& plan = c2->getExecutionPlan();
         switch (plan.forTest_getKind()) {
             case ExecutionPlan::Kind::SIMPLE:
                 std::cout << "plan: simple" << std::endl;
@@ -1035,7 +1072,7 @@ TEST_P(RandomPartitioningTest, Test) {
     // and telling the WrapperExecution about them).
     auto prepareForExecution =
             [&model, &ioDescriptors, &ioMemories,
-             &masterInputs, &masterOutput, problemSize](WrapperExecution *e) {
+             &masterInputs, &masterOutput, problemSize, &problemType](WrapperExecution *e) {
         uint32_t inputIndex = 0, outputIndex = 0;
         for (auto &desc : ioDescriptors) {
             if (desc.getLocation() == InputOutputDescriptor::VECTOR) {
@@ -1051,7 +1088,8 @@ TEST_P(RandomPartitioningTest, Test) {
                               desc.mVector.begin() + problemSize * problemSize,
                               masterOutput);
                     e->setOutput(outputIndex++, desc.mVector.data(),
-                                 desc.mVector.size() * sizeof(float));
+                                 desc.mVector.size() * sizeof(float),
+                                 &problemType.operandType);
                 }
             } else {
                 const WrapperMemory* memory;
@@ -1070,7 +1108,8 @@ TEST_P(RandomPartitioningTest, Test) {
                     std::fill(region,
                               region + problemSize * problemSize,
                               masterOutput);
-                    e->setOutputFromMemory(outputIndex++, memory, offset, length);
+                    e->setOutputFromMemory(outputIndex++, memory, offset, length,
+                                           &problemType.operandType);
                 }
             }
         };
@@ -1119,7 +1158,7 @@ TEST_P(RandomPartitioningTest, Test) {
     }
 
     // Partitioned execution.
-    WrapperExecution e2(&c2);
+    WrapperExecution e2(c2);
     ASSERT_NO_FATAL_FAILURE(prepareForExecution(&e2));
     ASSERT_EQ(e2.compute(), Result::NO_ERROR);
 
diff --git a/nn/runtime/test/TestUnknownDimensions.cpp b/nn/runtime/test/TestUnknownDimensions.cpp
index 28e66f6a7..32aea0a14 100644
--- a/nn/runtime/test/TestUnknownDimensions.cpp
+++ b/nn/runtime/test/TestUnknownDimensions.cpp
@@ -30,7 +30,10 @@ namespace {
 const uint32_t INTENDED_SIZE = 3;
 const uint32_t OTHER_SIZE    = 2;
 const uint32_t UNKNOWN_SIZE  = 0;
-typedef float IntendedMatrix[INTENDED_SIZE][INTENDED_SIZE];
+typedef uint8_t IntendedMatrix[INTENDED_SIZE][INTENDED_SIZE];
+
+// TODO: add a float version of this test for use against drivers that don't
+// support quantized add. b/72448000
 
 // We test three basic scenarios for each tensor dimension:
 //     INTENDED_AT_COMPILE_AND_EXECUTE: set the dimension at compile
@@ -72,13 +75,13 @@ auto constantDimensionValues = testing::Values(
         DimensionKind::UNKNOWN_AT_COMPILE_INTENDED_AT_EXECUTE);
 auto ioValues = testing::Combine(ioDimensionValues, ioDimensionValues);
 auto constantValues = testing::Combine(constantDimensionValues, constantDimensionValues);
-
+auto combinedValues = testing::Combine(ioValues, ioValues, constantValues, ioValues);
 
 class UnknownDimensionsTest : public ::testing::TestWithParam<TestParams> {
 protected:
-    const IntendedMatrix ones = { { 1.f, 1.f, 1.f }, { 1.f, 1.f, 1.f }, { 1.f, 1.f, 1.f } };
-    const IntendedMatrix twos = { { 2.f, 2.f, 2.f }, { 2.f, 2.f, 2.f }, { 2.f, 2.f, 2.f } };
-    const IntendedMatrix fives = { { 5.f, 5.f, 5.f }, { 5.f, 5.f, 5.f }, { 5.f, 5.f, 5.f } };
+    const IntendedMatrix ones = { { 1, 1, 1 }, { 1, 1, 1 }, { 1, 1, 1 } };
+    const IntendedMatrix twos = { { 2, 2, 2 }, { 2, 2, 2 }, { 2, 2, 2 } };
+    const IntendedMatrix fives = { { 5, 5, 5 }, { 5, 5, 5 }, { 5, 5, 5 } };
 };
 
 TEST_P(UnknownDimensionsTest, UnknownDimensions) {
@@ -111,9 +114,10 @@ TEST_P(UnknownDimensionsTest, UnknownDimensions) {
     auto addOperand = [&model, &getDimForCompile](OperandParams params,
                                                   std::string* scope = nullptr) {
         OperandType matrixTypeWithPotentiallyUnknownDims(
-                Type::TENSOR_FLOAT32,
+                Type::TENSOR_QUANT8_ASYMM,
                 { getDimForCompile(std::get<0>(params), scope),
-                  getDimForCompile(std::get<1>(params), scope) });
+                  getDimForCompile(std::get<1>(params), scope) },
+                1.0f);
         return model.addOperand(&matrixTypeWithPotentiallyUnknownDims);
     };
     auto inputOpd0 = addOperand(paramsForInput0, &input0Scope);
@@ -160,13 +164,13 @@ TEST_P(UnknownDimensionsTest, UnknownDimensions) {
     Compilation compilation(&model);
     ASSERT_EQ(compilation.finish(), Result::NO_ERROR);
 
-    IntendedMatrix actual = { { -1.f, -1.f, -1.f }, { -1.f, -1.f, -1.f }, { -1.f, -1.f, -1.f } };
+    IntendedMatrix actual = { { 10, 10, 10 }, { 10, 10, 10 }, { 10, 10, 10 } };
     Execution execution(&compilation);
 
-    OperandType matrixTypeIntended(Type::TENSOR_FLOAT32, {INTENDED_SIZE, INTENDED_SIZE});
-    OperandType matrixTypeFirstOther(Type::TENSOR_FLOAT32, {OTHER_SIZE, INTENDED_SIZE});
-    OperandType matrixTypeSecondOther(Type::TENSOR_FLOAT32, {INTENDED_SIZE, OTHER_SIZE});
-    OperandType matrixTypeBothOther(Type::TENSOR_FLOAT32, {OTHER_SIZE, OTHER_SIZE});
+    OperandType matrixTypeIntended(Type::TENSOR_QUANT8_ASYMM, {INTENDED_SIZE, INTENDED_SIZE}, 1.0f);
+    OperandType matrixTypeFirstOther(Type::TENSOR_QUANT8_ASYMM, {OTHER_SIZE, INTENDED_SIZE}, 1.0f);
+    OperandType matrixTypeSecondOther(Type::TENSOR_QUANT8_ASYMM, {INTENDED_SIZE, OTHER_SIZE}, 1.0f);
+    OperandType matrixTypeBothOther(Type::TENSOR_QUANT8_ASYMM, {OTHER_SIZE, OTHER_SIZE}, 1.0f);
     bool allAreIntendedSizeAtExecution = true;
 
     // Helper to return appropriate "type" parameter to setInput/setOutput based
@@ -201,7 +205,7 @@ TEST_P(UnknownDimensionsTest, UnknownDimensions) {
             OTHER_SIZE : INTENDED_SIZE;
         size_t secondDim = (second == DimensionKind::UNKNOWN_AT_COMPILE_OTHER_AT_EXECUTE) ?
             OTHER_SIZE : INTENDED_SIZE;
-        return firstDim * secondDim * sizeof(float);
+        return firstDim * secondDim * sizeof(fives[0][0]);
     };
     ASSERT_EQ(execution.setInput(0, ones, sizeAtSet(paramsForInput0), typeAtSet(paramsForInput0)),
               Result::NO_ERROR);
@@ -220,15 +224,14 @@ TEST_P(UnknownDimensionsTest, UnknownDimensions) {
         return;
     }
 
-    using fvec = std::vector<float>;
+    using qvec = std::vector<uint8_t>;
     constexpr size_t count = sizeof(fives) / sizeof(fives[0][0]);
-    compare(
-        MixedTyped{{{0, fvec{&fives[0][0], &fives[0][0] + count}}}, {}, {}},
-        MixedTyped{{{0, fvec{&actual[0][0], &actual[0][0] + count}}}, {}, {}});
+    Quant8Operands expected_opds{{0, qvec{&fives[0][0], &fives[0][0] + count}}};
+    Quant8Operands actual_opds{{0, qvec{&actual[0][0], &actual[0][0] + count}}};
+    compare(MixedTyped{ {}, {}, expected_opds }, MixedTyped{ {}, {}, actual_opds });
 }
 
 INSTANTIATE_TEST_CASE_P(UnknownCombinationsTest, UnknownDimensionsTest,
-                        testing::Combine(ioValues, ioValues,
-                                         constantValues, ioValues));
+                        combinedValues);
 
 }  // end namespace
author	Mika Raento <mikie@google.com>	2018-04-26 18:03:42 +0100
committer	Mika Raento <mikie@google.com>	2018-05-08 20:03:22 +0100
commit	bb255b6e87dc343eb90dec998be1cf153106ab65 (patch)
tree	655a670cf5236fb68176fc8e3d8c144cc3919e31
parent	35e4e2c28a589de70ced656c18df14d6d899237f (diff)
download	ml-bb255b6e87dc343eb90dec998be1cf153106ab65.tar.gz