5 files changed, 92 insertions, 30 deletions
diff --git a/nn/runtime/ExecutionPlan.cpp b/nn/runtime/ExecutionPlan.cpp
index 384de7840..c8f755069 100644
--- a/nn/runtime/ExecutionPlan.cpp
+++ b/nn/runtime/ExecutionPlan.cpp
@@ -749,6 +749,10 @@ const std::vector<std::shared_ptr<ExecutionStep>>& ExecutionPlan::forTest_compou
     return compound()->mSteps;
 }
 
+bool ExecutionPlan::forTest_hasSubModelOutputsOfUnknownSize() const {
+    return mBody->hasSubModelOutputsOfUnknownSize();
+}
+
 void ExecutionPlan::SimpleBody::dump() const {
     VLOG(COMPILATION) << "SIMPLE for " << (mDevice == nullptr ? "CPU" : mDevice->getName());
 }
@@ -944,8 +948,8 @@ int ModelBuilder::findBestDeviceForEachOperation(
         int bestChoice = -1;
         float bestPerfVal = 0.0;  // Do not check bestPerfVal if bestChoice < 0.
         for (size_t deviceIndex = 0; deviceIndex < nonCpuDeviceCount; deviceIndex++) {
+            const auto& device = devices[deviceIndex];
             if (canDo[deviceIndex].check(operationIndex)) {
-                const auto& device = devices[deviceIndex];
                 const PerformanceInfo perf = getPerformanceInfo(device, operationIndex);
                 const float perfVal =
                             (preference == ANEURALNETWORKS_PREFER_LOW_POWER ? perf.powerUsage
@@ -954,6 +958,15 @@ int ModelBuilder::findBestDeviceForEachOperation(
                     bestChoice = deviceIndex;
                     bestPerfVal = perfVal;
                 }
+            } else {
+                // Somewhat noisy logging, but only place where the user of
+                // NNAPI can get feedback on why an operation was not run on a
+                // specific device.
+                // Logs O(operationCount * nonCpuDeviceCount) times, but
+                // typically nonCpuDeviceCount is very small.
+                VLOG(COMPILATION) << "Device " << device->getName()
+                                  << " can't do operation "
+                                  << toString(getOperation(operationIndex).type);
             }
         }
         // If it's the OEM op, we'd better have a device able to do it.
diff --git a/nn/runtime/ExecutionPlan.h b/nn/runtime/ExecutionPlan.h
index 843447275..a2d018c0e 100644
--- a/nn/runtime/ExecutionPlan.h
+++ b/nn/runtime/ExecutionPlan.h
@@ -220,6 +220,7 @@ public:
     Kind forTest_getKind() const;
     std::shared_ptr<const Device> forTest_simpleGetDevice() const;
     const std::vector<std::shared_ptr<ExecutionStep>>& forTest_compoundGetSteps() const;
+    bool forTest_hasSubModelOutputsOfUnknownSize() const;
 
 private:
     void findTempsAsSubModelOutputs();
@@ -228,6 +229,7 @@ private:
         virtual ~Body() {}
         virtual void dump() const = 0;
         virtual int finish(const ModelBuilder* fromModel, int32_t executionPreference) = 0;
+        virtual bool hasSubModelOutputsOfUnknownSize() const = 0;
         bool mSuccessfulFinish = false;
     };
 
@@ -237,6 +239,7 @@ private:
 
         void dump() const override;
         int finish(const ModelBuilder* fromModel, int32_t executionPreference) override;
+        virtual bool hasSubModelOutputsOfUnknownSize() const override { return false; }
 
         std::shared_ptr<Device> mDevice;  // nullptr signifies CPU
         const ModelBuilder* mModel;
@@ -246,6 +249,9 @@ private:
     struct CompoundBody : Body {
         void dump() const override;
         int finish(const ModelBuilder* fromModel, int32_t executionPreference) override;
+        virtual bool hasSubModelOutputsOfUnknownSize() const override {
+            return mHasSubModelOutputOfUnknownSize;
+        }
 
         // TODO: Some of the data is working state information that
         // shouldn't be needed after we've constructed but not
diff --git a/nn/runtime/test/TestPartitioning.cpp b/nn/runtime/test/TestPartitioning.cpp
index 9b989b716..b653603cb 100644
--- a/nn/runtime/test/TestPartitioning.cpp
+++ b/nn/runtime/test/TestPartitioning.cpp
@@ -1014,6 +1014,7 @@ TEST_F(PartitioningTest, SetPartitioning) {
     PartitioningCompilation cPWithoutFallback(&model);
     ASSERT_EQ(cPWithoutFallback.setPartitioning(DeviceManager::kPartitioningWithoutFallback), Result::NO_ERROR);
     ASSERT_EQ(cPWithoutFallback.finish(devices), Result::OP_FAILED);
+    ASSERT_TRUE(cPWithoutFallback.getExecutionPlan().forTest_hasSubModelOutputsOfUnknownSize());
     ASSERT_EQ(cPWithoutFallback.getExecutionPlan().forTest_getKind(), ExecutionPlan::Kind::ERROR);
 }
 
diff --git a/nn/runtime/test/TestPartitioningRandom.cpp b/nn/runtime/test/TestPartitioningRandom.cpp
index c302b133f..8613a2b81 100644
--- a/nn/runtime/test/TestPartitioningRandom.cpp
+++ b/nn/runtime/test/TestPartitioningRandom.cpp
@@ -58,6 +58,8 @@
 //     data, randomly assign inputs and outputs to CPU memory or to shared
 //     memory.
 //
+//     Randomly leaves dimensions unset for intermediate operands.
+//
 // (2) Randomly generate drivers based on the sample driver, each of which
 //     executes models on the CPU.  They differ according to which operations
 //     they support.
@@ -124,7 +126,7 @@ static const unsigned kMaxProblemSize = 8;
 static const unsigned kFirstSeed = 0;
 
 // Number of test cases.
-static const unsigned kNumTestCases = 200;
+static const unsigned kNumTestCases = 225;
 
 // Force all graph weights into a single pool (as we recommend to users)
 // or allow them to be distributed across multiple pools (more stress
@@ -548,11 +550,13 @@ TEST_P(RandomPartitioningTest, Test) {
 
     const unsigned problemSize = 1+randUInt(kMaxProblemSize);
     const WrapperOperandType problemType(WrapperType::TENSOR_FLOAT32, { problemSize, problemSize });
+    const WrapperOperandType unknownDimensionsType(WrapperType::TENSOR_FLOAT32, { 0, 0 });
 
     static const WrapperOperandType activationFunctionType(WrapperType::INT32, { });
 
     const unsigned numOperations = 2+randUInt(kMaxNumOperations-1);
     const bool allowDeadOperations = (randFrac() < 0.2);
+    const bool allowUnknownDimensions = (randFrac() < 0.25);
 
     // TODO: The current algorithm builds the graph in a forward
     // direction (i.e., later-generated operations consume outputs
@@ -598,6 +602,11 @@ TEST_P(RandomPartitioningTest, Test) {
     // operations).
     unsigned rootOperationCount = 0;
 
+    // Track if we added operands with unknown dimensions. In this case,
+    // partitioned compilation will fail if such an operand is read in a
+    // different partition than it is written.
+    bool hasUnknownDimensions = false;
+
     // Generate operations.
     for (unsigned i = 0; i < numOperations; i++) {
         const unsigned operationPatternIndex =
@@ -788,7 +797,18 @@ TEST_P(RandomPartitioningTest, Test) {
 
         std::vector<uint32_t> operationOutputs(operationPattern.mNumOutputs);
         std::generate(operationOutputs.begin(), operationOutputs.end(),
-                      [&model, &problemType]{ return model.addOperand(&problemType); });
+                      [&model, &problemType, &unknownDimensionsType, &hasUnknownDimensions,
+                       allowUnknownDimensions, this]{
+                          // 3% unknowns causes ~35% of partitionings to fail
+                          // (determined by commenting out the fallback code,
+                          // running tests and noting number of failures).
+                          if (allowUnknownDimensions && randFrac() < 0.03) {
+                              hasUnknownDimensions = true;
+                              return model.addOperand(&unknownDimensionsType);
+                          } else {
+                              return model.addOperand(&problemType);
+                          }
+                      });
 
         // OPERATION ///////////////////////////////////////////////////////////////////////////////
 
@@ -921,15 +941,32 @@ TEST_P(RandomPartitioningTest, Test) {
     }
 
     // Partitioned compilation.
-    TestCompilation c2(&model);
-    ASSERT_EQ(c2.setPartitioning(DeviceManager::kPartitioningWithoutFallback), Result::NO_ERROR);
-    ASSERT_EQ(c2.finish(devices), Result::NO_ERROR);
+    // For test cases without unknown intermediate operand sizes we require the
+    // partitioning to succeed without CPU fallback. With unknown sizes we
+    // retry with a fallback if the non-fallback partitioning fails and require
+    // the fallback to succeed.
+    TestCompilation cNoFallback(&model);
+    TestCompilation cWithFallback(&model);
+    TestCompilation *c2 = nullptr;
+    ASSERT_EQ(cNoFallback.setPartitioning(DeviceManager::kPartitioningWithoutFallback),
+              Result::NO_ERROR);
+    auto compilationResult = cNoFallback.finish(devices);
+    if (hasUnknownDimensions && compilationResult == Result::OP_FAILED &&
+        cNoFallback.getExecutionPlan().forTest_hasSubModelOutputsOfUnknownSize()) {
+        ASSERT_EQ(cWithFallback.setPartitioning(DeviceManager::kPartitioningWithFallback),
+                  Result::NO_ERROR);
+        ASSERT_EQ(cWithFallback.finish(devices), Result::NO_ERROR);
+        c2 = &cWithFallback;
+    } else {
+        ASSERT_EQ(compilationResult, Result::NO_ERROR);
+        c2 = &cNoFallback;
+    }
 
 #ifdef VERBOSE
     {
         std::cout << "signatures = " << signatures.size()
                   << ", devices = " << devices.size() << std::endl;
-        const ExecutionPlan& plan = c2.getExecutionPlan();
+        const ExecutionPlan& plan = c2->getExecutionPlan();
         switch (plan.forTest_getKind()) {
             case ExecutionPlan::Kind::SIMPLE:
                 std::cout << "plan: simple" << std::endl;
@@ -1035,7 +1072,7 @@ TEST_P(RandomPartitioningTest, Test) {
     // and telling the WrapperExecution about them).
     auto prepareForExecution =
             [&model, &ioDescriptors, &ioMemories,
-             &masterInputs, &masterOutput, problemSize](WrapperExecution *e) {
+             &masterInputs, &masterOutput, problemSize, &problemType](WrapperExecution *e) {
         uint32_t inputIndex = 0, outputIndex = 0;
         for (auto &desc : ioDescriptors) {
             if (desc.getLocation() == InputOutputDescriptor::VECTOR) {
@@ -1051,7 +1088,8 @@ TEST_P(RandomPartitioningTest, Test) {
                               desc.mVector.begin() + problemSize * problemSize,
                               masterOutput);
                     e->setOutput(outputIndex++, desc.mVector.data(),
-                                 desc.mVector.size() * sizeof(float));
+                                 desc.mVector.size() * sizeof(float),
+                                 &problemType.operandType);
                 }
             } else {
                 const WrapperMemory* memory;
@@ -1070,7 +1108,8 @@ TEST_P(RandomPartitioningTest, Test) {
                     std::fill(region,
                               region + problemSize * problemSize,
                               masterOutput);
-                    e->setOutputFromMemory(outputIndex++, memory, offset, length);
+                    e->setOutputFromMemory(outputIndex++, memory, offset, length,
+                                           &problemType.operandType);
                 }
             }
         };
@@ -1119,7 +1158,7 @@ TEST_P(RandomPartitioningTest, Test) {
     }
 
     // Partitioned execution.
-    WrapperExecution e2(&c2);
+    WrapperExecution e2(c2);
     ASSERT_NO_FATAL_FAILURE(prepareForExecution(&e2));
     ASSERT_EQ(e2.compute(), Result::NO_ERROR);
 
diff --git a/nn/runtime/test/TestUnknownDimensions.cpp b/nn/runtime/test/TestUnknownDimensions.cpp
index 28e66f6a7..32aea0a14 100644
--- a/nn/runtime/test/TestUnknownDimensions.cpp
+++ b/nn/runtime/test/TestUnknownDimensions.cpp
@@ -30,7 +30,10 @@ namespace {
 const uint32_t INTENDED_SIZE = 3;
 const uint32_t OTHER_SIZE    = 2;
 const uint32_t UNKNOWN_SIZE  = 0;
-typedef float IntendedMatrix[INTENDED_SIZE][INTENDED_SIZE];
+typedef uint8_t IntendedMatrix[INTENDED_SIZE][INTENDED_SIZE];
+
+// TODO: add a float version of this test for use against drivers that don't
+// support quantized add. b/72448000
 
 // We test three basic scenarios for each tensor dimension:
 //     INTENDED_AT_COMPILE_AND_EXECUTE: set the dimension at compile
@@ -72,13 +75,13 @@ auto constantDimensionValues = testing::Values(
         DimensionKind::UNKNOWN_AT_COMPILE_INTENDED_AT_EXECUTE);
 auto ioValues = testing::Combine(ioDimensionValues, ioDimensionValues);
 auto constantValues = testing::Combine(constantDimensionValues, constantDimensionValues);
-
+auto combinedValues = testing::Combine(ioValues, ioValues, constantValues, ioValues);
 
 class UnknownDimensionsTest : public ::testing::TestWithParam<TestParams> {
 protected:
-    const IntendedMatrix ones = { { 1.f, 1.f, 1.f }, { 1.f, 1.f, 1.f }, { 1.f, 1.f, 1.f } };
-    const IntendedMatrix twos = { { 2.f, 2.f, 2.f }, { 2.f, 2.f, 2.f }, { 2.f, 2.f, 2.f } };
-    const IntendedMatrix fives = { { 5.f, 5.f, 5.f }, { 5.f, 5.f, 5.f }, { 5.f, 5.f, 5.f } };
+    const IntendedMatrix ones = { { 1, 1, 1 }, { 1, 1, 1 }, { 1, 1, 1 } };
+    const IntendedMatrix twos = { { 2, 2, 2 }, { 2, 2, 2 }, { 2, 2, 2 } };
+    const IntendedMatrix fives = { { 5, 5, 5 }, { 5, 5, 5 }, { 5, 5, 5 } };
 };
 
 TEST_P(UnknownDimensionsTest, UnknownDimensions) {
@@ -111,9 +114,10 @@ TEST_P(UnknownDimensionsTest, UnknownDimensions) {
     auto addOperand = [&model, &getDimForCompile](OperandParams params,
                                                   std::string* scope = nullptr) {
         OperandType matrixTypeWithPotentiallyUnknownDims(
-                Type::TENSOR_FLOAT32,
+                Type::TENSOR_QUANT8_ASYMM,
                 { getDimForCompile(std::get<0>(params), scope),
-                  getDimForCompile(std::get<1>(params), scope) });
+                  getDimForCompile(std::get<1>(params), scope) },
+                1.0f);
         return model.addOperand(&matrixTypeWithPotentiallyUnknownDims);
     };
     auto inputOpd0 = addOperand(paramsForInput0, &input0Scope);
@@ -160,13 +164,13 @@ TEST_P(UnknownDimensionsTest, UnknownDimensions) {
     Compilation compilation(&model);
     ASSERT_EQ(compilation.finish(), Result::NO_ERROR);
 
-    IntendedMatrix actual = { { -1.f, -1.f, -1.f }, { -1.f, -1.f, -1.f }, { -1.f, -1.f, -1.f } };
+    IntendedMatrix actual = { { 10, 10, 10 }, { 10, 10, 10 }, { 10, 10, 10 } };
     Execution execution(&compilation);
 
-    OperandType matrixTypeIntended(Type::TENSOR_FLOAT32, {INTENDED_SIZE, INTENDED_SIZE});
-    OperandType matrixTypeFirstOther(Type::TENSOR_FLOAT32, {OTHER_SIZE, INTENDED_SIZE});
-    OperandType matrixTypeSecondOther(Type::TENSOR_FLOAT32, {INTENDED_SIZE, OTHER_SIZE});
-    OperandType matrixTypeBothOther(Type::TENSOR_FLOAT32, {OTHER_SIZE, OTHER_SIZE});
+    OperandType matrixTypeIntended(Type::TENSOR_QUANT8_ASYMM, {INTENDED_SIZE, INTENDED_SIZE}, 1.0f);
+    OperandType matrixTypeFirstOther(Type::TENSOR_QUANT8_ASYMM, {OTHER_SIZE, INTENDED_SIZE}, 1.0f);
+    OperandType matrixTypeSecondOther(Type::TENSOR_QUANT8_ASYMM, {INTENDED_SIZE, OTHER_SIZE}, 1.0f);
+    OperandType matrixTypeBothOther(Type::TENSOR_QUANT8_ASYMM, {OTHER_SIZE, OTHER_SIZE}, 1.0f);
     bool allAreIntendedSizeAtExecution = true;
 
     // Helper to return appropriate "type" parameter to setInput/setOutput based
@@ -201,7 +205,7 @@ TEST_P(UnknownDimensionsTest, UnknownDimensions) {
             OTHER_SIZE : INTENDED_SIZE;
         size_t secondDim = (second == DimensionKind::UNKNOWN_AT_COMPILE_OTHER_AT_EXECUTE) ?
             OTHER_SIZE : INTENDED_SIZE;
-        return firstDim * secondDim * sizeof(float);
+        return firstDim * secondDim * sizeof(fives[0][0]);
     };
     ASSERT_EQ(execution.setInput(0, ones, sizeAtSet(paramsForInput0), typeAtSet(paramsForInput0)),
               Result::NO_ERROR);
@@ -220,15 +224,14 @@ TEST_P(UnknownDimensionsTest, UnknownDimensions) {
         return;
     }
 
-    using fvec = std::vector<float>;
+    using qvec = std::vector<uint8_t>;
     constexpr size_t count = sizeof(fives) / sizeof(fives[0][0]);
-    compare(
-        MixedTyped{{{0, fvec{&fives[0][0], &fives[0][0] + count}}}, {}, {}},
-        MixedTyped{{{0, fvec{&actual[0][0], &actual[0][0] + count}}}, {}, {}});
+    Quant8Operands expected_opds{{0, qvec{&fives[0][0], &fives[0][0] + count}}};
+    Quant8Operands actual_opds{{0, qvec{&actual[0][0], &actual[0][0] + count}}};
+    compare(MixedTyped{ {}, {}, expected_opds }, MixedTyped{ {}, {}, actual_opds });
 }
 
 INSTANTIATE_TEST_CASE_P(UnknownCombinationsTest, UnknownDimensionsTest,
-                        testing::Combine(ioValues, ioValues,
-                                         constantValues, ioValues));
+                        combinedValues);
 
 }  // end namespace