diff options
author | Xusong Wang <xusongw@google.com> | 2019-01-14 18:53:00 -0800 |
---|---|---|
committer | Xusong Wang <xusongw@google.com> | 2019-01-29 18:27:27 -0800 |
commit | d01b6d6523d5c5a65ee6599635e4f7787832505f (patch) | |
tree | b1da5a197abf1eeff46493584c670da56c50d68b /nn/runtime/Manager.cpp | |
parent | 3d6dd125b74514c2e2d14c18a57a00263bef747b (diff) | |
download | ml-d01b6d6523d5c5a65ee6599635e4f7787832505f.tar.gz |
Implement NNAPI compilation caching runtime.
Implement compilation caching in runtime and add compilation caching
tests.
The token from user is transformed during the partitioning process by
SHA256 hasher based on
- device name
- device version string
- execution preference
- which operation (index) is included in the partition
Add tests to test compilation caching and the partitioner.
Bug: 119616526
Test: NeuralNetworksTest_static
Change-Id: I41e9dce10d95f744382894299c0d68bb8d10c08c
Diffstat (limited to 'nn/runtime/Manager.cpp')
-rw-r--r-- | nn/runtime/Manager.cpp | 86 |
1 files changed, 64 insertions, 22 deletions
diff --git a/nn/runtime/Manager.cpp b/nn/runtime/Manager.cpp index e3439a5a7..3bd827b9e 100644 --- a/nn/runtime/Manager.cpp +++ b/nn/runtime/Manager.cpp @@ -32,6 +32,7 @@ using ::android::hardware::neuralnetworks::V1_2::implementation::ExecutionCallback; using ::android::hardware::neuralnetworks::V1_2::implementation::PreparedModelCallback; +using HidlToken = hidl_array<uint8_t, ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN>; namespace android { namespace nn { @@ -93,11 +94,21 @@ class DriverDevice : public Device { PerformanceInfo getRelaxedFloat32toFloat16Performance() const override { return mRelaxedFloat32toFloat16Performance; } + bool isCachingSupported() const override { return mIsCachingSupported; } int prepareModel(const Model& hidlModel, ExecutionPreference executionPreference, std::shared_ptr<VersionedIPreparedModel>* preparedModel) override; + int prepareModelFromCache(const hidl_handle& cache1, const hidl_handle& cache2, + const HidlToken& token, + std::shared_ptr<VersionedIPreparedModel>* preparedModel); private: + int prepareModelHelper( + const std::function<Return<ErrorStatus>(const sp<IPreparedModelCallback>& callback)>& + prepare, + const std::string& prepareName, + std::shared_ptr<VersionedIPreparedModel>* preparedModel); + std::string mName; std::string mVersionString; VersionedIDevice mInterface; @@ -105,6 +116,7 @@ class DriverDevice : public Device { PerformanceInfo mQuantized8Performance; PerformanceInfo mRelaxedFloat32toFloat16Performance; hidl_vec<Extension> mSupportedExtensions; + bool mIsCachingSupported = false; #ifdef NN_DEBUGGABLE // For debugging: behavior of IDevice::getSupportedOperations for SampleDriver. @@ -127,37 +139,40 @@ bool DriverDevice::initialize() { : 0; #endif // NN_DEBUGGABLE - bool success = true; ErrorStatus status = ErrorStatus::GENERAL_FAILURE; Capabilities capabilities; std::tie(status, capabilities) = mInterface.getCapabilities(); if (status != ErrorStatus::NONE) { LOG(ERROR) << "IDevice::getCapabilities returned the error " << toString(status); - success = false; - } else { - VLOG(MANAGER) << "Capab " << capabilities.float32Performance.execTime; - VLOG(MANAGER) << "Capab " << capabilities.quantized8Performance.execTime; - VLOG(MANAGER) << "Capab " << capabilities.relaxedFloat32toFloat16Performance.execTime; - mFloat32Performance = capabilities.float32Performance; - mQuantized8Performance = capabilities.quantized8Performance; - mRelaxedFloat32toFloat16Performance = capabilities.relaxedFloat32toFloat16Performance; + return false; } + VLOG(MANAGER) << "Capab " << capabilities.float32Performance.execTime; + VLOG(MANAGER) << "Capab " << capabilities.quantized8Performance.execTime; + VLOG(MANAGER) << "Capab " << capabilities.relaxedFloat32toFloat16Performance.execTime; + mFloat32Performance = capabilities.float32Performance; + mQuantized8Performance = capabilities.quantized8Performance; + mRelaxedFloat32toFloat16Performance = capabilities.relaxedFloat32toFloat16Performance; std::tie(status, mVersionString) = mInterface.getVersionString(); // TODO(miaowang): add a validation test case for in case of error. if (status != ErrorStatus::NONE) { LOG(ERROR) << "IDevice::getVersionString returned the error " << toString(status); - success = false; + return false; } std::tie(status, mSupportedExtensions) = mInterface.getSupportedExtensions(); if (status != ErrorStatus::NONE) { LOG(ERROR) << "IDevice::getSupportedExtensions returned the error " << toString(status); - success = false; + return false; } - return success; + std::tie(status, mIsCachingSupported) = mInterface.isCachingSupported(); + if (status != ErrorStatus::NONE) { + LOG(WARNING) << "IDevice::isCachingSupported returned the error " << toString(status); + mIsCachingSupported = false; + } + return true; } hidl_vec<Extension> DriverDevice::getSupportedExtensions() const { @@ -229,23 +244,21 @@ void DriverDevice::getSupportedOperations(const Model& hidlModel, } // Compilation logic copied from StepExecutor::startComputeOnDevice(). -int DriverDevice::prepareModel(const Model& hidlModel, ExecutionPreference executionPreference, - std::shared_ptr<VersionedIPreparedModel>* preparedModel) { +int DriverDevice::prepareModelHelper( + const std::function<Return<ErrorStatus>(const sp<IPreparedModelCallback>& callback)>& + prepare, + const std::string& prepareName, std::shared_ptr<VersionedIPreparedModel>* preparedModel) { *preparedModel = nullptr; sp<PreparedModelCallback> preparedModelCallback = new PreparedModelCallback(); - // Note that some work within VersionedIDevice will be subtracted from the - // IPC layer - NNTRACE_FULL(NNTRACE_LAYER_IPC, NNTRACE_PHASE_COMPILATION, "prepareModel"); - Return<ErrorStatus> prepareLaunchStatus = - mInterface.prepareModel(hidlModel, executionPreference, preparedModelCallback); + Return<ErrorStatus> prepareLaunchStatus = prepare(preparedModelCallback); if (!prepareLaunchStatus.isOk()) { - LOG(ERROR) << "ExecutionStep::finishSubModel compilation failed due to transport error: " + LOG(ERROR) << prepareName << " compilation failed due to transport error: " << prepareLaunchStatus.description(); return ANEURALNETWORKS_OP_FAILED; } if (prepareLaunchStatus != ErrorStatus::NONE) { - LOG(ERROR) << "ExecutionStep::finishSubModel compilation failed with error: " + LOG(ERROR) << prepareName << " compilation failed with error: " << toString(static_cast<ErrorStatus>(prepareLaunchStatus)); return ANEURALNETWORKS_OP_FAILED; } @@ -256,7 +269,7 @@ int DriverDevice::prepareModel(const Model& hidlModel, ExecutionPreference execu *preparedModel = std::make_shared<VersionedIPreparedModel>(returnedPreparedModel); } if (prepareReturnStatus != ErrorStatus::NONE || *preparedModel == nullptr) { - LOG(ERROR) << "ExecutionPlan compilation on " << getName() << " failed:" + LOG(ERROR) << prepareName << " on " << getName() << " failed:" << " prepareReturnStatus=" << toString(prepareReturnStatus) << ", preparedModel=" << preparedModel->get(); return ANEURALNETWORKS_OP_FAILED; @@ -264,6 +277,29 @@ int DriverDevice::prepareModel(const Model& hidlModel, ExecutionPreference execu return ANEURALNETWORKS_NO_ERROR; } +int DriverDevice::prepareModel(const Model& hidlModel, ExecutionPreference executionPreference, + std::shared_ptr<VersionedIPreparedModel>* preparedModel) { + // Note that some work within VersionedIDevice will be subtracted from the IPC layer + NNTRACE_FULL(NNTRACE_LAYER_IPC, NNTRACE_PHASE_COMPILATION, "prepareModel"); + return prepareModelHelper( + [this, &hidlModel, &executionPreference](const sp<IPreparedModelCallback>& callback) { + return mInterface.prepareModel(hidlModel, executionPreference, callback); + }, + "prepareModel", preparedModel); +} + +int DriverDevice::prepareModelFromCache(const hidl_handle& cache1, const hidl_handle& cache2, + const HidlToken& token, + std::shared_ptr<VersionedIPreparedModel>* preparedModel) { + // Note that some work within VersionedIDevice will be subtracted from the IPC layer + NNTRACE_FULL(NNTRACE_LAYER_IPC, NNTRACE_PHASE_COMPILATION, "prepareModelFromCache"); + return prepareModelHelper( + [this, &cache1, &cache2, &token](const sp<IPreparedModelCallback>& callback) { + return mInterface.prepareModelFromCache(cache1, cache2, token, callback); + }, + "prepareModelFromCache", preparedModel); +} + // A special abstracted device for the CPU. Only one instance of this class will exist. // Use get() to retrieve it. class CpuDevice : public Device { @@ -287,9 +323,15 @@ class CpuDevice : public Device { PerformanceInfo getFloat32Performance() const override { return kPerformance; } PerformanceInfo getQuantized8Performance() const override { return kPerformance; } PerformanceInfo getRelaxedFloat32toFloat16Performance() const override { return kPerformance; } + bool isCachingSupported() const override { return false; } int prepareModel(const Model& hidlModel, ExecutionPreference executionPreference, std::shared_ptr<VersionedIPreparedModel>* preparedModel) override; + int prepareModelFromCache(const hidl_handle&, const hidl_handle&, const HidlToken&, + std::shared_ptr<VersionedIPreparedModel>*) override { + CHECK(false) << "Should never call prepareModelFromCache on CpuDevice"; + return ANEURALNETWORKS_OP_FAILED; + } private: CpuDevice() = default; |