Implement NNAPI compilation caching runtime.

Implement compilation caching in runtime and add compilation caching tests. The token from user is transformed during the partitioning process by SHA256 hasher based on - device name - device version string - execution preference - which operation (index) is included in the partition Add tests to test compilation caching and the partitioner. Bug: 119616526 Test: NeuralNetworksTest_static Change-Id: I41e9dce10d95f744382894299c0d68bb8d10c08c
author: Xusong Wang <xusongw@google.com> 2019-01-14 18:53:00 -0800
committer: Xusong Wang <xusongw@google.com> 2019-01-29 18:27:27 -0800
commit: d01b6d6523d5c5a65ee6599635e4f7787832505f (patch)
tree: b1da5a197abf1eeff46493584c670da56c50d68b /nn/runtime/Manager.cpp
parent: 3d6dd125b74514c2e2d14c18a57a00263bef747b (diff)
download: ml-d01b6d6523d5c5a65ee6599635e4f7787832505f.tar.gz
1 files changed, 64 insertions, 22 deletions
diff --git a/nn/runtime/Manager.cpp b/nn/runtime/Manager.cpp
index e3439a5a7..3bd827b9e 100644
--- a/nn/runtime/Manager.cpp
+++ b/nn/runtime/Manager.cpp
@@ -32,6 +32,7 @@
 
 using ::android::hardware::neuralnetworks::V1_2::implementation::ExecutionCallback;
 using ::android::hardware::neuralnetworks::V1_2::implementation::PreparedModelCallback;
+using HidlToken = hidl_array<uint8_t, ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN>;
 
 namespace android {
 namespace nn {
@@ -93,11 +94,21 @@ class DriverDevice : public Device {
     PerformanceInfo getRelaxedFloat32toFloat16Performance() const override {
         return mRelaxedFloat32toFloat16Performance;
     }
+    bool isCachingSupported() const override { return mIsCachingSupported; }
 
     int prepareModel(const Model& hidlModel, ExecutionPreference executionPreference,
                      std::shared_ptr<VersionedIPreparedModel>* preparedModel) override;
+    int prepareModelFromCache(const hidl_handle& cache1, const hidl_handle& cache2,
+                              const HidlToken& token,
+                              std::shared_ptr<VersionedIPreparedModel>* preparedModel);
 
    private:
+    int prepareModelHelper(
+            const std::function<Return<ErrorStatus>(const sp<IPreparedModelCallback>& callback)>&
+                    prepare,
+            const std::string& prepareName,
+            std::shared_ptr<VersionedIPreparedModel>* preparedModel);
+
     std::string mName;
     std::string mVersionString;
     VersionedIDevice mInterface;
@@ -105,6 +116,7 @@ class DriverDevice : public Device {
     PerformanceInfo mQuantized8Performance;
     PerformanceInfo mRelaxedFloat32toFloat16Performance;
     hidl_vec<Extension> mSupportedExtensions;
+    bool mIsCachingSupported = false;
 
 #ifdef NN_DEBUGGABLE
     // For debugging: behavior of IDevice::getSupportedOperations for SampleDriver.
@@ -127,37 +139,40 @@ bool DriverDevice::initialize() {
                          : 0;
 #endif  // NN_DEBUGGABLE
 
-    bool success = true;
     ErrorStatus status = ErrorStatus::GENERAL_FAILURE;
 
     Capabilities capabilities;
     std::tie(status, capabilities) = mInterface.getCapabilities();
     if (status != ErrorStatus::NONE) {
         LOG(ERROR) << "IDevice::getCapabilities returned the error " << toString(status);
-        success = false;
-    } else {
-        VLOG(MANAGER) << "Capab " << capabilities.float32Performance.execTime;
-        VLOG(MANAGER) << "Capab " << capabilities.quantized8Performance.execTime;
-        VLOG(MANAGER) << "Capab " << capabilities.relaxedFloat32toFloat16Performance.execTime;
-        mFloat32Performance = capabilities.float32Performance;
-        mQuantized8Performance = capabilities.quantized8Performance;
-        mRelaxedFloat32toFloat16Performance = capabilities.relaxedFloat32toFloat16Performance;
+        return false;
     }
+    VLOG(MANAGER) << "Capab " << capabilities.float32Performance.execTime;
+    VLOG(MANAGER) << "Capab " << capabilities.quantized8Performance.execTime;
+    VLOG(MANAGER) << "Capab " << capabilities.relaxedFloat32toFloat16Performance.execTime;
+    mFloat32Performance = capabilities.float32Performance;
+    mQuantized8Performance = capabilities.quantized8Performance;
+    mRelaxedFloat32toFloat16Performance = capabilities.relaxedFloat32toFloat16Performance;
 
     std::tie(status, mVersionString) = mInterface.getVersionString();
     // TODO(miaowang): add a validation test case for in case of error.
     if (status != ErrorStatus::NONE) {
         LOG(ERROR) << "IDevice::getVersionString returned the error " << toString(status);
-        success = false;
+        return false;
     }
 
     std::tie(status, mSupportedExtensions) = mInterface.getSupportedExtensions();
     if (status != ErrorStatus::NONE) {
         LOG(ERROR) << "IDevice::getSupportedExtensions returned the error " << toString(status);
-        success = false;
+        return false;
     }
 
-    return success;
+    std::tie(status, mIsCachingSupported) = mInterface.isCachingSupported();
+    if (status != ErrorStatus::NONE) {
+        LOG(WARNING) << "IDevice::isCachingSupported returned the error " << toString(status);
+        mIsCachingSupported = false;
+    }
+    return true;
 }
 
 hidl_vec<Extension> DriverDevice::getSupportedExtensions() const {
@@ -229,23 +244,21 @@ void DriverDevice::getSupportedOperations(const Model& hidlModel,
 }
 
 // Compilation logic copied from StepExecutor::startComputeOnDevice().
-int DriverDevice::prepareModel(const Model& hidlModel, ExecutionPreference executionPreference,
-                               std::shared_ptr<VersionedIPreparedModel>* preparedModel) {
+int DriverDevice::prepareModelHelper(
+        const std::function<Return<ErrorStatus>(const sp<IPreparedModelCallback>& callback)>&
+                prepare,
+        const std::string& prepareName, std::shared_ptr<VersionedIPreparedModel>* preparedModel) {
     *preparedModel = nullptr;
     sp<PreparedModelCallback> preparedModelCallback = new PreparedModelCallback();
 
-    // Note that some work within VersionedIDevice will be subtracted from the
-    // IPC layer
-    NNTRACE_FULL(NNTRACE_LAYER_IPC, NNTRACE_PHASE_COMPILATION, "prepareModel");
-    Return<ErrorStatus> prepareLaunchStatus =
-            mInterface.prepareModel(hidlModel, executionPreference, preparedModelCallback);
+    Return<ErrorStatus> prepareLaunchStatus = prepare(preparedModelCallback);
     if (!prepareLaunchStatus.isOk()) {
-        LOG(ERROR) << "ExecutionStep::finishSubModel compilation failed due to transport error: "
+        LOG(ERROR) << prepareName << " compilation failed due to transport error: "
                    << prepareLaunchStatus.description();
         return ANEURALNETWORKS_OP_FAILED;
     }
     if (prepareLaunchStatus != ErrorStatus::NONE) {
-        LOG(ERROR) << "ExecutionStep::finishSubModel compilation failed with error: "
+        LOG(ERROR) << prepareName << " compilation failed with error: "
                    << toString(static_cast<ErrorStatus>(prepareLaunchStatus));
         return ANEURALNETWORKS_OP_FAILED;
     }
@@ -256,7 +269,7 @@ int DriverDevice::prepareModel(const Model& hidlModel, ExecutionPreference execu
         *preparedModel = std::make_shared<VersionedIPreparedModel>(returnedPreparedModel);
     }
     if (prepareReturnStatus != ErrorStatus::NONE || *preparedModel == nullptr) {
-        LOG(ERROR) << "ExecutionPlan compilation on " << getName() << " failed:"
+        LOG(ERROR) << prepareName << " on " << getName() << " failed:"
                    << " prepareReturnStatus=" << toString(prepareReturnStatus)
                    << ", preparedModel=" << preparedModel->get();
         return ANEURALNETWORKS_OP_FAILED;
@@ -264,6 +277,29 @@ int DriverDevice::prepareModel(const Model& hidlModel, ExecutionPreference execu
     return ANEURALNETWORKS_NO_ERROR;
 }
 
+int DriverDevice::prepareModel(const Model& hidlModel, ExecutionPreference executionPreference,
+                               std::shared_ptr<VersionedIPreparedModel>* preparedModel) {
+    // Note that some work within VersionedIDevice will be subtracted from the IPC layer
+    NNTRACE_FULL(NNTRACE_LAYER_IPC, NNTRACE_PHASE_COMPILATION, "prepareModel");
+    return prepareModelHelper(
+            [this, &hidlModel, &executionPreference](const sp<IPreparedModelCallback>& callback) {
+                return mInterface.prepareModel(hidlModel, executionPreference, callback);
+            },
+            "prepareModel", preparedModel);
+}
+
+int DriverDevice::prepareModelFromCache(const hidl_handle& cache1, const hidl_handle& cache2,
+                                        const HidlToken& token,
+                                        std::shared_ptr<VersionedIPreparedModel>* preparedModel) {
+    // Note that some work within VersionedIDevice will be subtracted from the IPC layer
+    NNTRACE_FULL(NNTRACE_LAYER_IPC, NNTRACE_PHASE_COMPILATION, "prepareModelFromCache");
+    return prepareModelHelper(
+            [this, &cache1, &cache2, &token](const sp<IPreparedModelCallback>& callback) {
+                return mInterface.prepareModelFromCache(cache1, cache2, token, callback);
+            },
+            "prepareModelFromCache", preparedModel);
+}
+
 // A special abstracted device for the CPU. Only one instance of this class will exist.
 // Use get() to retrieve it.
 class CpuDevice : public Device {
@@ -287,9 +323,15 @@ class CpuDevice : public Device {
     PerformanceInfo getFloat32Performance() const override { return kPerformance; }
     PerformanceInfo getQuantized8Performance() const override { return kPerformance; }
     PerformanceInfo getRelaxedFloat32toFloat16Performance() const override { return kPerformance; }
+    bool isCachingSupported() const override { return false; }
 
     int prepareModel(const Model& hidlModel, ExecutionPreference executionPreference,
                      std::shared_ptr<VersionedIPreparedModel>* preparedModel) override;
+    int prepareModelFromCache(const hidl_handle&, const hidl_handle&, const HidlToken&,
+                              std::shared_ptr<VersionedIPreparedModel>*) override {
+        CHECK(false) << "Should never call prepareModelFromCache on CpuDevice";
+        return ANEURALNETWORKS_OP_FAILED;
+    }
 
    private:
     CpuDevice() = default;
author	Xusong Wang <xusongw@google.com>	2019-01-14 18:53:00 -0800
committer	Xusong Wang <xusongw@google.com>	2019-01-29 18:27:27 -0800
commit	d01b6d6523d5c5a65ee6599635e4f7787832505f (patch)
tree	b1da5a197abf1eeff46493584c670da56c50d68b /nn/runtime/Manager.cpp
parent	3d6dd125b74514c2e2d14c18a57a00263bef747b (diff)
download	ml-d01b6d6523d5c5a65ee6599635e4f7787832505f.tar.gz