diff options
-rw-r--r-- | nn/runtime/ExecutionPlan.cpp | 15 | ||||
-rw-r--r-- | nn/runtime/ExecutionPlan.h | 6 | ||||
-rw-r--r-- | nn/runtime/test/TestPartitioning.cpp | 1 | ||||
-rw-r--r-- | nn/runtime/test/TestPartitioningRandom.cpp | 59 | ||||
-rw-r--r-- | nn/runtime/test/TestUnknownDimensions.cpp | 41 |
5 files changed, 92 insertions, 30 deletions
diff --git a/nn/runtime/ExecutionPlan.cpp b/nn/runtime/ExecutionPlan.cpp index 384de7840..c8f755069 100644 --- a/nn/runtime/ExecutionPlan.cpp +++ b/nn/runtime/ExecutionPlan.cpp @@ -749,6 +749,10 @@ const std::vector<std::shared_ptr<ExecutionStep>>& ExecutionPlan::forTest_compou return compound()->mSteps; } +bool ExecutionPlan::forTest_hasSubModelOutputsOfUnknownSize() const { + return mBody->hasSubModelOutputsOfUnknownSize(); +} + void ExecutionPlan::SimpleBody::dump() const { VLOG(COMPILATION) << "SIMPLE for " << (mDevice == nullptr ? "CPU" : mDevice->getName()); } @@ -944,8 +948,8 @@ int ModelBuilder::findBestDeviceForEachOperation( int bestChoice = -1; float bestPerfVal = 0.0; // Do not check bestPerfVal if bestChoice < 0. for (size_t deviceIndex = 0; deviceIndex < nonCpuDeviceCount; deviceIndex++) { + const auto& device = devices[deviceIndex]; if (canDo[deviceIndex].check(operationIndex)) { - const auto& device = devices[deviceIndex]; const PerformanceInfo perf = getPerformanceInfo(device, operationIndex); const float perfVal = (preference == ANEURALNETWORKS_PREFER_LOW_POWER ? perf.powerUsage @@ -954,6 +958,15 @@ int ModelBuilder::findBestDeviceForEachOperation( bestChoice = deviceIndex; bestPerfVal = perfVal; } + } else { + // Somewhat noisy logging, but only place where the user of + // NNAPI can get feedback on why an operation was not run on a + // specific device. + // Logs O(operationCount * nonCpuDeviceCount) times, but + // typically nonCpuDeviceCount is very small. + VLOG(COMPILATION) << "Device " << device->getName() + << " can't do operation " + << toString(getOperation(operationIndex).type); } } // If it's the OEM op, we'd better have a device able to do it. diff --git a/nn/runtime/ExecutionPlan.h b/nn/runtime/ExecutionPlan.h index 843447275..a2d018c0e 100644 --- a/nn/runtime/ExecutionPlan.h +++ b/nn/runtime/ExecutionPlan.h @@ -220,6 +220,7 @@ public: Kind forTest_getKind() const; std::shared_ptr<const Device> forTest_simpleGetDevice() const; const std::vector<std::shared_ptr<ExecutionStep>>& forTest_compoundGetSteps() const; + bool forTest_hasSubModelOutputsOfUnknownSize() const; private: void findTempsAsSubModelOutputs(); @@ -228,6 +229,7 @@ private: virtual ~Body() {} virtual void dump() const = 0; virtual int finish(const ModelBuilder* fromModel, int32_t executionPreference) = 0; + virtual bool hasSubModelOutputsOfUnknownSize() const = 0; bool mSuccessfulFinish = false; }; @@ -237,6 +239,7 @@ private: void dump() const override; int finish(const ModelBuilder* fromModel, int32_t executionPreference) override; + virtual bool hasSubModelOutputsOfUnknownSize() const override { return false; } std::shared_ptr<Device> mDevice; // nullptr signifies CPU const ModelBuilder* mModel; @@ -246,6 +249,9 @@ private: struct CompoundBody : Body { void dump() const override; int finish(const ModelBuilder* fromModel, int32_t executionPreference) override; + virtual bool hasSubModelOutputsOfUnknownSize() const override { + return mHasSubModelOutputOfUnknownSize; + } // TODO: Some of the data is working state information that // shouldn't be needed after we've constructed but not diff --git a/nn/runtime/test/TestPartitioning.cpp b/nn/runtime/test/TestPartitioning.cpp index 9b989b716..b653603cb 100644 --- a/nn/runtime/test/TestPartitioning.cpp +++ b/nn/runtime/test/TestPartitioning.cpp @@ -1014,6 +1014,7 @@ TEST_F(PartitioningTest, SetPartitioning) { PartitioningCompilation cPWithoutFallback(&model); ASSERT_EQ(cPWithoutFallback.setPartitioning(DeviceManager::kPartitioningWithoutFallback), Result::NO_ERROR); ASSERT_EQ(cPWithoutFallback.finish(devices), Result::OP_FAILED); + ASSERT_TRUE(cPWithoutFallback.getExecutionPlan().forTest_hasSubModelOutputsOfUnknownSize()); ASSERT_EQ(cPWithoutFallback.getExecutionPlan().forTest_getKind(), ExecutionPlan::Kind::ERROR); } diff --git a/nn/runtime/test/TestPartitioningRandom.cpp b/nn/runtime/test/TestPartitioningRandom.cpp index c302b133f..8613a2b81 100644 --- a/nn/runtime/test/TestPartitioningRandom.cpp +++ b/nn/runtime/test/TestPartitioningRandom.cpp @@ -58,6 +58,8 @@ // data, randomly assign inputs and outputs to CPU memory or to shared // memory. // +// Randomly leaves dimensions unset for intermediate operands. +// // (2) Randomly generate drivers based on the sample driver, each of which // executes models on the CPU. They differ according to which operations // they support. @@ -124,7 +126,7 @@ static const unsigned kMaxProblemSize = 8; static const unsigned kFirstSeed = 0; // Number of test cases. -static const unsigned kNumTestCases = 200; +static const unsigned kNumTestCases = 225; // Force all graph weights into a single pool (as we recommend to users) // or allow them to be distributed across multiple pools (more stress @@ -548,11 +550,13 @@ TEST_P(RandomPartitioningTest, Test) { const unsigned problemSize = 1+randUInt(kMaxProblemSize); const WrapperOperandType problemType(WrapperType::TENSOR_FLOAT32, { problemSize, problemSize }); + const WrapperOperandType unknownDimensionsType(WrapperType::TENSOR_FLOAT32, { 0, 0 }); static const WrapperOperandType activationFunctionType(WrapperType::INT32, { }); const unsigned numOperations = 2+randUInt(kMaxNumOperations-1); const bool allowDeadOperations = (randFrac() < 0.2); + const bool allowUnknownDimensions = (randFrac() < 0.25); // TODO: The current algorithm builds the graph in a forward // direction (i.e., later-generated operations consume outputs @@ -598,6 +602,11 @@ TEST_P(RandomPartitioningTest, Test) { // operations). unsigned rootOperationCount = 0; + // Track if we added operands with unknown dimensions. In this case, + // partitioned compilation will fail if such an operand is read in a + // different partition than it is written. + bool hasUnknownDimensions = false; + // Generate operations. for (unsigned i = 0; i < numOperations; i++) { const unsigned operationPatternIndex = @@ -788,7 +797,18 @@ TEST_P(RandomPartitioningTest, Test) { std::vector<uint32_t> operationOutputs(operationPattern.mNumOutputs); std::generate(operationOutputs.begin(), operationOutputs.end(), - [&model, &problemType]{ return model.addOperand(&problemType); }); + [&model, &problemType, &unknownDimensionsType, &hasUnknownDimensions, + allowUnknownDimensions, this]{ + // 3% unknowns causes ~35% of partitionings to fail + // (determined by commenting out the fallback code, + // running tests and noting number of failures). + if (allowUnknownDimensions && randFrac() < 0.03) { + hasUnknownDimensions = true; + return model.addOperand(&unknownDimensionsType); + } else { + return model.addOperand(&problemType); + } + }); // OPERATION /////////////////////////////////////////////////////////////////////////////// @@ -921,15 +941,32 @@ TEST_P(RandomPartitioningTest, Test) { } // Partitioned compilation. - TestCompilation c2(&model); - ASSERT_EQ(c2.setPartitioning(DeviceManager::kPartitioningWithoutFallback), Result::NO_ERROR); - ASSERT_EQ(c2.finish(devices), Result::NO_ERROR); + // For test cases without unknown intermediate operand sizes we require the + // partitioning to succeed without CPU fallback. With unknown sizes we + // retry with a fallback if the non-fallback partitioning fails and require + // the fallback to succeed. + TestCompilation cNoFallback(&model); + TestCompilation cWithFallback(&model); + TestCompilation *c2 = nullptr; + ASSERT_EQ(cNoFallback.setPartitioning(DeviceManager::kPartitioningWithoutFallback), + Result::NO_ERROR); + auto compilationResult = cNoFallback.finish(devices); + if (hasUnknownDimensions && compilationResult == Result::OP_FAILED && + cNoFallback.getExecutionPlan().forTest_hasSubModelOutputsOfUnknownSize()) { + ASSERT_EQ(cWithFallback.setPartitioning(DeviceManager::kPartitioningWithFallback), + Result::NO_ERROR); + ASSERT_EQ(cWithFallback.finish(devices), Result::NO_ERROR); + c2 = &cWithFallback; + } else { + ASSERT_EQ(compilationResult, Result::NO_ERROR); + c2 = &cNoFallback; + } #ifdef VERBOSE { std::cout << "signatures = " << signatures.size() << ", devices = " << devices.size() << std::endl; - const ExecutionPlan& plan = c2.getExecutionPlan(); + const ExecutionPlan& plan = c2->getExecutionPlan(); switch (plan.forTest_getKind()) { case ExecutionPlan::Kind::SIMPLE: std::cout << "plan: simple" << std::endl; @@ -1035,7 +1072,7 @@ TEST_P(RandomPartitioningTest, Test) { // and telling the WrapperExecution about them). auto prepareForExecution = [&model, &ioDescriptors, &ioMemories, - &masterInputs, &masterOutput, problemSize](WrapperExecution *e) { + &masterInputs, &masterOutput, problemSize, &problemType](WrapperExecution *e) { uint32_t inputIndex = 0, outputIndex = 0; for (auto &desc : ioDescriptors) { if (desc.getLocation() == InputOutputDescriptor::VECTOR) { @@ -1051,7 +1088,8 @@ TEST_P(RandomPartitioningTest, Test) { desc.mVector.begin() + problemSize * problemSize, masterOutput); e->setOutput(outputIndex++, desc.mVector.data(), - desc.mVector.size() * sizeof(float)); + desc.mVector.size() * sizeof(float), + &problemType.operandType); } } else { const WrapperMemory* memory; @@ -1070,7 +1108,8 @@ TEST_P(RandomPartitioningTest, Test) { std::fill(region, region + problemSize * problemSize, masterOutput); - e->setOutputFromMemory(outputIndex++, memory, offset, length); + e->setOutputFromMemory(outputIndex++, memory, offset, length, + &problemType.operandType); } } }; @@ -1119,7 +1158,7 @@ TEST_P(RandomPartitioningTest, Test) { } // Partitioned execution. - WrapperExecution e2(&c2); + WrapperExecution e2(c2); ASSERT_NO_FATAL_FAILURE(prepareForExecution(&e2)); ASSERT_EQ(e2.compute(), Result::NO_ERROR); diff --git a/nn/runtime/test/TestUnknownDimensions.cpp b/nn/runtime/test/TestUnknownDimensions.cpp index 28e66f6a7..32aea0a14 100644 --- a/nn/runtime/test/TestUnknownDimensions.cpp +++ b/nn/runtime/test/TestUnknownDimensions.cpp @@ -30,7 +30,10 @@ namespace { const uint32_t INTENDED_SIZE = 3; const uint32_t OTHER_SIZE = 2; const uint32_t UNKNOWN_SIZE = 0; -typedef float IntendedMatrix[INTENDED_SIZE][INTENDED_SIZE]; +typedef uint8_t IntendedMatrix[INTENDED_SIZE][INTENDED_SIZE]; + +// TODO: add a float version of this test for use against drivers that don't +// support quantized add. b/72448000 // We test three basic scenarios for each tensor dimension: // INTENDED_AT_COMPILE_AND_EXECUTE: set the dimension at compile @@ -72,13 +75,13 @@ auto constantDimensionValues = testing::Values( DimensionKind::UNKNOWN_AT_COMPILE_INTENDED_AT_EXECUTE); auto ioValues = testing::Combine(ioDimensionValues, ioDimensionValues); auto constantValues = testing::Combine(constantDimensionValues, constantDimensionValues); - +auto combinedValues = testing::Combine(ioValues, ioValues, constantValues, ioValues); class UnknownDimensionsTest : public ::testing::TestWithParam<TestParams> { protected: - const IntendedMatrix ones = { { 1.f, 1.f, 1.f }, { 1.f, 1.f, 1.f }, { 1.f, 1.f, 1.f } }; - const IntendedMatrix twos = { { 2.f, 2.f, 2.f }, { 2.f, 2.f, 2.f }, { 2.f, 2.f, 2.f } }; - const IntendedMatrix fives = { { 5.f, 5.f, 5.f }, { 5.f, 5.f, 5.f }, { 5.f, 5.f, 5.f } }; + const IntendedMatrix ones = { { 1, 1, 1 }, { 1, 1, 1 }, { 1, 1, 1 } }; + const IntendedMatrix twos = { { 2, 2, 2 }, { 2, 2, 2 }, { 2, 2, 2 } }; + const IntendedMatrix fives = { { 5, 5, 5 }, { 5, 5, 5 }, { 5, 5, 5 } }; }; TEST_P(UnknownDimensionsTest, UnknownDimensions) { @@ -111,9 +114,10 @@ TEST_P(UnknownDimensionsTest, UnknownDimensions) { auto addOperand = [&model, &getDimForCompile](OperandParams params, std::string* scope = nullptr) { OperandType matrixTypeWithPotentiallyUnknownDims( - Type::TENSOR_FLOAT32, + Type::TENSOR_QUANT8_ASYMM, { getDimForCompile(std::get<0>(params), scope), - getDimForCompile(std::get<1>(params), scope) }); + getDimForCompile(std::get<1>(params), scope) }, + 1.0f); return model.addOperand(&matrixTypeWithPotentiallyUnknownDims); }; auto inputOpd0 = addOperand(paramsForInput0, &input0Scope); @@ -160,13 +164,13 @@ TEST_P(UnknownDimensionsTest, UnknownDimensions) { Compilation compilation(&model); ASSERT_EQ(compilation.finish(), Result::NO_ERROR); - IntendedMatrix actual = { { -1.f, -1.f, -1.f }, { -1.f, -1.f, -1.f }, { -1.f, -1.f, -1.f } }; + IntendedMatrix actual = { { 10, 10, 10 }, { 10, 10, 10 }, { 10, 10, 10 } }; Execution execution(&compilation); - OperandType matrixTypeIntended(Type::TENSOR_FLOAT32, {INTENDED_SIZE, INTENDED_SIZE}); - OperandType matrixTypeFirstOther(Type::TENSOR_FLOAT32, {OTHER_SIZE, INTENDED_SIZE}); - OperandType matrixTypeSecondOther(Type::TENSOR_FLOAT32, {INTENDED_SIZE, OTHER_SIZE}); - OperandType matrixTypeBothOther(Type::TENSOR_FLOAT32, {OTHER_SIZE, OTHER_SIZE}); + OperandType matrixTypeIntended(Type::TENSOR_QUANT8_ASYMM, {INTENDED_SIZE, INTENDED_SIZE}, 1.0f); + OperandType matrixTypeFirstOther(Type::TENSOR_QUANT8_ASYMM, {OTHER_SIZE, INTENDED_SIZE}, 1.0f); + OperandType matrixTypeSecondOther(Type::TENSOR_QUANT8_ASYMM, {INTENDED_SIZE, OTHER_SIZE}, 1.0f); + OperandType matrixTypeBothOther(Type::TENSOR_QUANT8_ASYMM, {OTHER_SIZE, OTHER_SIZE}, 1.0f); bool allAreIntendedSizeAtExecution = true; // Helper to return appropriate "type" parameter to setInput/setOutput based @@ -201,7 +205,7 @@ TEST_P(UnknownDimensionsTest, UnknownDimensions) { OTHER_SIZE : INTENDED_SIZE; size_t secondDim = (second == DimensionKind::UNKNOWN_AT_COMPILE_OTHER_AT_EXECUTE) ? OTHER_SIZE : INTENDED_SIZE; - return firstDim * secondDim * sizeof(float); + return firstDim * secondDim * sizeof(fives[0][0]); }; ASSERT_EQ(execution.setInput(0, ones, sizeAtSet(paramsForInput0), typeAtSet(paramsForInput0)), Result::NO_ERROR); @@ -220,15 +224,14 @@ TEST_P(UnknownDimensionsTest, UnknownDimensions) { return; } - using fvec = std::vector<float>; + using qvec = std::vector<uint8_t>; constexpr size_t count = sizeof(fives) / sizeof(fives[0][0]); - compare( - MixedTyped{{{0, fvec{&fives[0][0], &fives[0][0] + count}}}, {}, {}}, - MixedTyped{{{0, fvec{&actual[0][0], &actual[0][0] + count}}}, {}, {}}); + Quant8Operands expected_opds{{0, qvec{&fives[0][0], &fives[0][0] + count}}}; + Quant8Operands actual_opds{{0, qvec{&actual[0][0], &actual[0][0] + count}}}; + compare(MixedTyped{ {}, {}, expected_opds }, MixedTyped{ {}, {}, actual_opds }); } INSTANTIATE_TEST_CASE_P(UnknownCombinationsTest, UnknownDimensionsTest, - testing::Combine(ioValues, ioValues, - constantValues, ioValues)); + combinedValues); } // end namespace |