Merge "Improve performance of Burst executions" am: 5faee8ea5a am: 87dd1a37c2

am: 6f2aab7acc Change-Id: Ib78f32f1324896091dfdbf23f5f1b0bbd936c04a
author: Michael Butler <butlermichael@google.com> 2019-10-28 15:25:58 -0700
committer: android-build-merger <android-build-merger@google.com> 2019-10-28 15:25:58 -0700
commit: bd86742ff4244cd721652c4b52ee5a43426cac82 (patch)
tree: a8e32e59a98b3cd2e4e290bdefdd0511ff2bd234
parent: 8c28763a2a18faad99d0e7eed992d50dd9e09b02 (diff)
parent: 6f2aab7acc36c2b5b273b50d3c86c1ed5f0db3ed (diff)
download: ml-bd86742ff4244cd721652c4b52ee5a43426cac82.tar.gz
13 files changed, 307 insertions, 172 deletions
diff --git a/nn/common/ExecutionBurstController.cpp b/nn/common/ExecutionBurstController.cpp
index f3a771b11..4456ed10d 100644
--- a/nn/common/ExecutionBurstController.cpp
+++ b/nn/common/ExecutionBurstController.cpp
@@ -19,9 +19,16 @@
 #include "ExecutionBurstController.h"
 
 #include <android-base/logging.h>
+
+#include <algorithm>
 #include <cstring>
 #include <limits>
+#include <memory>
 #include <string>
+#include <tuple>
+#include <utility>
+#include <vector>
+
 #include "Tracing.h"
 #include "Utils.h"
 
@@ -30,9 +37,8 @@ namespace {
 
 using namespace hal;
 
-using hardware::MQDescriptorSync;
-using FmqRequestDescriptor = MQDescriptorSync<FmqRequestDatum>;
-using FmqResultDescriptor = MQDescriptorSync<FmqResultDatum>;
+using FmqRequestDescriptor = hardware::MQDescriptorSync<FmqRequestDatum>;
+using FmqResultDescriptor = hardware::MQDescriptorSync<FmqResultDatum>;
 
 constexpr Timing kNoTiming = {std::numeric_limits<uint64_t>::max(),
                               std::numeric_limits<uint64_t>::max()};
@@ -221,22 +227,23 @@ std::optional<std::tuple<ErrorStatus, std::vector<OutputShape>, Timing>> deseria
 }
 
 std::pair<std::unique_ptr<ResultChannelReceiver>, const FmqResultDescriptor*>
-ResultChannelReceiver::create(size_t channelLength, bool blocking) {
+ResultChannelReceiver::create(size_t channelLength, std::chrono::microseconds pollingTimeWindow) {
     std::unique_ptr<FmqResultChannel> fmqResultChannel =
-            std::make_unique<FmqResultChannel>(channelLength, /*confEventFlag=*/blocking);
+            std::make_unique<FmqResultChannel>(channelLength, /*confEventFlag=*/true);
     if (!fmqResultChannel->isValid()) {
         LOG(ERROR) << "Unable to create ResultChannelReceiver";
         return {nullptr, nullptr};
     }
+
     const FmqResultDescriptor* descriptor = fmqResultChannel->getDesc();
     return std::make_pair(
-            std::make_unique<ResultChannelReceiver>(std::move(fmqResultChannel), blocking),
+            std::make_unique<ResultChannelReceiver>(std::move(fmqResultChannel), pollingTimeWindow),
             descriptor);
 }
 
 ResultChannelReceiver::ResultChannelReceiver(std::unique_ptr<FmqResultChannel> fmqResultChannel,
-                                             bool blocking)
-    : mFmqResultChannel(std::move(fmqResultChannel)), mBlocking(blocking) {}
+                                             std::chrono::microseconds pollingTimeWindow)
+    : mFmqResultChannel(std::move(fmqResultChannel)), kPollingTimeWindow(pollingTimeWindow) {}
 
 std::optional<std::tuple<ErrorStatus, std::vector<OutputShape>, Timing>>
 ResultChannelReceiver::getBlocking() {
@@ -254,16 +261,14 @@ void ResultChannelReceiver::invalidate() {
     // force unblock
     // ExecutionBurstController waits on a result packet after sending a
     // request. If the driver containing ExecutionBurstServer crashes, the
-    // controller will still be waiting on the futex (assuming mBlocking is
-    // true). This force unblock wakes up any thread waiting on the futex.
-    if (mBlocking) {
-        // TODO: look for a different/better way to signal/notify the futex to
-        // wake up any thread waiting on it
-        FmqResultDatum datum;
-        datum.packetInformation({/*.packetSize=*/0, /*.errorStatus=*/ErrorStatus::GENERAL_FAILURE,
-                                 /*.numberOfOperands=*/0});
-        mFmqResultChannel->writeBlocking(&datum, 1);
-    }
+    // controller may be waiting on the futex. This force unblock wakes up any
+    // thread waiting on the futex.
+    // TODO: look for a different/better way to signal/notify the futex to
+    // wake up any thread waiting on it
+    FmqResultDatum datum;
+    datum.packetInformation({/*.packetSize=*/0, /*.errorStatus=*/ErrorStatus::GENERAL_FAILURE,
+                             /*.numberOfOperands=*/0});
+    mFmqResultChannel->writeBlocking(&datum, 1);
 }
 
 std::optional<std::vector<FmqResultDatum>> ResultChannelReceiver::getPacketBlocking() {
@@ -273,17 +278,42 @@ std::optional<std::vector<FmqResultDatum>> ResultChannelReceiver::getPacketBlock
         return std::nullopt;
     }
 
-    // wait for result packet and read first element of result packet
-    FmqResultDatum datum;
-    bool success = true;
-    if (mBlocking) {
-        success = mFmqResultChannel->readBlocking(&datum, 1);
-    } else {
-        while ((success = mValid.load(std::memory_order_relaxed)) &&
-               !mFmqResultChannel->read(&datum, 1)) {
+    // First spend time polling if results are available in FMQ instead of
+    // waiting on the futex. Polling is more responsive (yielding lower
+    // latencies), but can take up more power, so only poll for a limited period
+    // of time.
+
+    auto& getCurrentTime = std::chrono::high_resolution_clock::now;
+    const auto timeToStopPolling = getCurrentTime() + kPollingTimeWindow;
+
+    while (getCurrentTime() < timeToStopPolling) {
+        // if class is being torn down, immediately return
+        if (!mValid.load(std::memory_order_relaxed)) {
+            return std::nullopt;
+        }
+
+        // Check if data is available. If it is, immediately retrieve it and
+        // return.
+        const size_t available = mFmqResultChannel->availableToRead();
+        if (available > 0) {
+            std::vector<FmqResultDatum> packet(available);
+            const bool success = mFmqResultChannel->read(packet.data(), available);
+            if (!success) {
+                LOG(ERROR) << "Error receiving packet";
+                return std::nullopt;
+            }
+            return std::make_optional(std::move(packet));
         }
     }
 
+    // If we get to this point, we either stopped polling because it was taking
+    // too long or polling was not allowed. Instead, perform a blocking call
+    // which uses a futex to save power.
+
+    // wait for result packet and read first element of result packet
+    FmqResultDatum datum;
+    bool success = mFmqResultChannel->readBlocking(&datum, 1);
+
     // retrieve remaining elements
     // NOTE: all of the data is already available at this point, so there's no
     // need to do a blocking wait to wait for more data. This is known because
@@ -310,22 +340,21 @@ std::optional<std::vector<FmqResultDatum>> ResultChannelReceiver::getPacketBlock
 }
 
 std::pair<std::unique_ptr<RequestChannelSender>, const FmqRequestDescriptor*>
-RequestChannelSender::create(size_t channelLength, bool blocking) {
+RequestChannelSender::create(size_t channelLength) {
     std::unique_ptr<FmqRequestChannel> fmqRequestChannel =
-            std::make_unique<FmqRequestChannel>(channelLength, /*confEventFlag=*/blocking);
+            std::make_unique<FmqRequestChannel>(channelLength, /*confEventFlag=*/true);
     if (!fmqRequestChannel->isValid()) {
         LOG(ERROR) << "Unable to create RequestChannelSender";
         return {nullptr, nullptr};
     }
+
     const FmqRequestDescriptor* descriptor = fmqRequestChannel->getDesc();
-    return std::make_pair(
-            std::make_unique<RequestChannelSender>(std::move(fmqRequestChannel), blocking),
-            descriptor);
+    return std::make_pair(std::make_unique<RequestChannelSender>(std::move(fmqRequestChannel)),
+                          descriptor);
 }
 
-RequestChannelSender::RequestChannelSender(std::unique_ptr<FmqRequestChannel> fmqRequestChannel,
-                                           bool blocking)
-    : mFmqRequestChannel(std::move(fmqRequestChannel)), mBlocking(blocking) {}
+RequestChannelSender::RequestChannelSender(std::unique_ptr<FmqRequestChannel> fmqRequestChannel)
+    : mFmqRequestChannel(std::move(fmqRequestChannel)) {}
 
 bool RequestChannelSender::send(const Request& request, MeasureTiming measure,
                                 const std::vector<int32_t>& slots) {
@@ -344,11 +373,9 @@ bool RequestChannelSender::sendPacket(const std::vector<FmqRequestDatum>& packet
         return false;
     }
 
-    if (mBlocking) {
-        return mFmqRequestChannel->writeBlocking(packet.data(), packet.size());
-    } else {
-        return mFmqRequestChannel->write(packet.data(), packet.size());
-    }
+    // Always send the packet with "blocking" because this signals the futex and
+    // unblocks the consumer if it is waiting on the futex.
+    return mFmqRequestChannel->writeBlocking(packet.data(), packet.size());
 }
 
 void RequestChannelSender::invalidate() {
@@ -438,7 +465,7 @@ int32_t ExecutionBurstController::ExecutionBurstCallback::allocateSlotLocked() {
 }
 
 std::unique_ptr<ExecutionBurstController> ExecutionBurstController::create(
-        const sp<IPreparedModel>& preparedModel, bool blocking) {
+        const sp<IPreparedModel>& preparedModel, std::chrono::microseconds pollingTimeWindow) {
     // check inputs
     if (preparedModel == nullptr) {
         LOG(ERROR) << "ExecutionBurstController::create passed a nullptr";
@@ -450,9 +477,9 @@ std::unique_ptr<ExecutionBurstController> ExecutionBurstController::create(
 
     // create FMQ objects
     auto [requestChannelSenderTemp, requestChannelDescriptor] =
-            RequestChannelSender::create(kExecutionBurstChannelLength, blocking);
+            RequestChannelSender::create(kExecutionBurstChannelLength);
     auto [resultChannelReceiverTemp, resultChannelDescriptor] =
-            ResultChannelReceiver::create(kExecutionBurstChannelLength, blocking);
+            ResultChannelReceiver::create(kExecutionBurstChannelLength, pollingTimeWindow);
     std::shared_ptr<RequestChannelSender> requestChannelSender =
             std::move(requestChannelSenderTemp);
     std::shared_ptr<ResultChannelReceiver> resultChannelReceiver =
@@ -543,15 +570,13 @@ static std::tuple<int, std::vector<OutputShape>, Timing, bool> getExecutionResul
     return {n, std::move(checkedOutputShapes), checkedTiming, fallback};
 }
 
-std::tuple<ErrorStatus, std::vector<OutputShape>, Timing> ExecutionBurstController::compute(
-        const Request& request, MeasureTiming measure, const std::vector<intptr_t>& memoryIds) {
-    auto [status, outputShapes, timing, fallback] = tryCompute(request, measure, memoryIds);
-    (void)fallback;  // ignore fallback field
-    return {convertResultCodeToErrorStatus(status), std::move(outputShapes), timing};
-}
-
-std::tuple<int, std::vector<OutputShape>, Timing, bool> ExecutionBurstController::tryCompute(
+std::tuple<int, std::vector<OutputShape>, Timing, bool> ExecutionBurstController::compute(
         const Request& request, MeasureTiming measure, const std::vector<intptr_t>& memoryIds) {
+    // This is the first point when we know an execution is occurring, so begin
+    // to collect systraces. Note that the first point we can begin collecting
+    // systraces in ExecutionBurstServer is when the RequestChannelReceiver
+    // realizes there is data in the FMQ, so ExecutionBurstServer collects
+    // systraces at different points in the code.
     NNTRACE_FULL(NNTRACE_LAYER_IPC, NNTRACE_PHASE_EXECUTION, "ExecutionBurstController::compute");
 
     std::lock_guard<std::mutex> guard(mMutex);
diff --git a/nn/common/ExecutionBurstServer.cpp b/nn/common/ExecutionBurstServer.cpp
index 74bc34058..ec935dad6 100644
--- a/nn/common/ExecutionBurstServer.cpp
+++ b/nn/common/ExecutionBurstServer.cpp
@@ -20,9 +20,14 @@
 
 #include <android-base/logging.h>
 
+#include <algorithm>
 #include <cstring>
 #include <limits>
 #include <map>
+#include <memory>
+#include <tuple>
+#include <utility>
+#include <vector>
 
 #include "Tracing.h"
 
@@ -31,6 +36,8 @@ namespace {
 
 using namespace hal;
 
+using hardware::MQDescriptorSync;
+
 constexpr Timing kNoTiming = {std::numeric_limits<uint64_t>::max(),
                               std::numeric_limits<uint64_t>::max()};
 
@@ -298,20 +305,27 @@ std::optional<std::tuple<Request, std::vector<int32_t>, MeasureTiming>> deserial
 // RequestChannelReceiver methods
 
 std::unique_ptr<RequestChannelReceiver> RequestChannelReceiver::create(
-        const FmqRequestDescriptor& requestChannel) {
+        const FmqRequestDescriptor& requestChannel, std::chrono::microseconds pollingTimeWindow) {
     std::unique_ptr<FmqRequestChannel> fmqRequestChannel =
             std::make_unique<FmqRequestChannel>(requestChannel);
+
     if (!fmqRequestChannel->isValid()) {
         LOG(ERROR) << "Unable to create RequestChannelReceiver";
         return nullptr;
     }
-    const bool blocking = fmqRequestChannel->getEventFlagWord() != nullptr;
-    return std::make_unique<RequestChannelReceiver>(std::move(fmqRequestChannel), blocking);
+    if (fmqRequestChannel->getEventFlagWord() == nullptr) {
+        LOG(ERROR)
+                << "RequestChannelReceiver::create was passed an MQDescriptor without an EventFlag";
+        return nullptr;
+    }
+
+    return std::make_unique<RequestChannelReceiver>(std::move(fmqRequestChannel),
+                                                    pollingTimeWindow);
 }
 
 RequestChannelReceiver::RequestChannelReceiver(std::unique_ptr<FmqRequestChannel> fmqRequestChannel,
-                                               bool blocking)
-    : mFmqRequestChannel(std::move(fmqRequestChannel)), mBlocking(blocking) {}
+                                               std::chrono::microseconds pollingTimeWindow)
+    : mFmqRequestChannel(std::move(fmqRequestChannel)), kPollingTimeWindow(pollingTimeWindow) {}
 
 std::optional<std::tuple<Request, std::vector<int32_t>, MeasureTiming>>
 RequestChannelReceiver::getBlocking() {
@@ -328,17 +342,15 @@ void RequestChannelReceiver::invalidate() {
 
     // force unblock
     // ExecutionBurstServer is by default waiting on a request packet. If the
-    // client process destroys its burst object, the server will still be
-    // waiting on the futex (assuming mBlocking is true). This force unblock
-    // wakes up any thread waiting on the futex.
-    if (mBlocking) {
-        // TODO: look for a different/better way to signal/notify the futex to
-        // wake up any thread waiting on it
-        FmqRequestDatum datum;
-        datum.packetInformation({/*.packetSize=*/0, /*.numberOfInputOperands=*/0,
-                                 /*.numberOfOutputOperands=*/0, /*.numberOfPools=*/0});
-        mFmqRequestChannel->writeBlocking(&datum, 1);
-    }
+    // client process destroys its burst object, the server may still be waiting
+    // on the futex. This force unblock wakes up any thread waiting on the
+    // futex.
+    // TODO: look for a different/better way to signal/notify the futex to wake
+    // up any thread waiting on it
+    FmqRequestDatum datum;
+    datum.packetInformation({/*.packetSize=*/0, /*.numberOfInputOperands=*/0,
+                             /*.numberOfOutputOperands=*/0, /*.numberOfPools=*/0});
+    mFmqRequestChannel->writeBlocking(&datum, 1);
 }
 
 std::optional<std::vector<FmqRequestDatum>> RequestChannelReceiver::getPacketBlocking() {
@@ -348,17 +360,53 @@ std::optional<std::vector<FmqRequestDatum>> RequestChannelReceiver::getPacketBlo
         return std::nullopt;
     }
 
-    // wait for request packet and read first element of request packet
-    FmqRequestDatum datum;
-    bool success = false;
-    if (mBlocking) {
-        success = mFmqRequestChannel->readBlocking(&datum, 1);
-    } else {
-        while ((success = !mTeardown.load(std::memory_order_relaxed)) &&
-               !mFmqRequestChannel->read(&datum, 1)) {
+    // First spend time polling if results are available in FMQ instead of
+    // waiting on the futex. Polling is more responsive (yielding lower
+    // latencies), but can take up more power, so only poll for a limited period
+    // of time.
+
+    auto& getCurrentTime = std::chrono::high_resolution_clock::now;
+    const auto timeToStopPolling = getCurrentTime() + kPollingTimeWindow;
+
+    while (getCurrentTime() < timeToStopPolling) {
+        // if class is being torn down, immediately return
+        if (mTeardown.load(std::memory_order_relaxed)) {
+            return std::nullopt;
+        }
+
+        // Check if data is available. If it is, immediately retrieve it and
+        // return.
+        const size_t available = mFmqRequestChannel->availableToRead();
+        if (available > 0) {
+            // This is the first point when we know an execution is occurring,
+            // so begin to collect systraces. Note that a similar systrace does
+            // not exist at the corresponding point in
+            // ResultChannelReceiver::getPacketBlocking because the execution is
+            // already in flight.
+            NNTRACE_FULL(NNTRACE_LAYER_IPC, NNTRACE_PHASE_EXECUTION,
+                         "ExecutionBurstServer getting packet");
+            std::vector<FmqRequestDatum> packet(available);
+            const bool success = mFmqRequestChannel->read(packet.data(), available);
+            if (!success) {
+                LOG(ERROR) << "Error receiving packet";
+                return std::nullopt;
+            }
+            return std::make_optional(std::move(packet));
         }
     }
 
+    // If we get to this point, we either stopped polling because it was taking
+    // too long or polling was not allowed. Instead, perform a blocking call
+    // which uses a futex to save power.
+
+    // wait for request packet and read first element of request packet
+    FmqRequestDatum datum;
+    bool success = mFmqRequestChannel->readBlocking(&datum, 1);
+
+    // This is the first point when we know an execution is occurring, so begin
+    // to collect systraces. Note that a similar systrace does not exist at the
+    // corresponding point in ResultChannelReceiver::getPacketBlocking because
+    // the execution is already in flight.
     NNTRACE_FULL(NNTRACE_LAYER_IPC, NNTRACE_PHASE_EXECUTION, "ExecutionBurstServer getting packet");
 
     // retrieve remaining elements
@@ -393,17 +441,21 @@ std::unique_ptr<ResultChannelSender> ResultChannelSender::create(
         const FmqResultDescriptor& resultChannel) {
     std::unique_ptr<FmqResultChannel> fmqResultChannel =
             std::make_unique<FmqResultChannel>(resultChannel);
+
     if (!fmqResultChannel->isValid()) {
         LOG(ERROR) << "Unable to create RequestChannelSender";
         return nullptr;
     }
-    const bool blocking = fmqResultChannel->getEventFlagWord() != nullptr;
-    return std::make_unique<ResultChannelSender>(std::move(fmqResultChannel), blocking);
+    if (fmqResultChannel->getEventFlagWord() == nullptr) {
+        LOG(ERROR) << "ResultChannelSender::create was passed an MQDescriptor without an EventFlag";
+        return nullptr;
+    }
+
+    return std::make_unique<ResultChannelSender>(std::move(fmqResultChannel));
 }
 
-ResultChannelSender::ResultChannelSender(std::unique_ptr<FmqResultChannel> fmqResultChannel,
-                                         bool blocking)
-    : mFmqResultChannel(std::move(fmqResultChannel)), mBlocking(blocking) {}
+ResultChannelSender::ResultChannelSender(std::unique_ptr<FmqResultChannel> fmqResultChannel)
+    : mFmqResultChannel(std::move(fmqResultChannel)) {}
 
 bool ResultChannelSender::send(ErrorStatus errorStatus,
                                const std::vector<OutputShape>& outputShapes, Timing timing) {
@@ -417,18 +469,15 @@ bool ResultChannelSender::sendPacket(const std::vector<FmqResultDatum>& packet)
                 << "ResultChannelSender::sendPacket -- packet size exceeds size available in FMQ";
         const std::vector<FmqResultDatum> errorPacket =
                 serialize(ErrorStatus::GENERAL_FAILURE, {}, kNoTiming);
-        if (mBlocking) {
-            return mFmqResultChannel->writeBlocking(errorPacket.data(), errorPacket.size());
-        } else {
-            return mFmqResultChannel->write(errorPacket.data(), errorPacket.size());
-        }
-    }
 
-    if (mBlocking) {
-        return mFmqResultChannel->writeBlocking(packet.data(), packet.size());
-    } else {
-        return mFmqResultChannel->write(packet.data(), packet.size());
+        // Always send the packet with "blocking" because this signals the futex
+        // and unblocks the consumer if it is waiting on the futex.
+        return mFmqResultChannel->writeBlocking(errorPacket.data(), errorPacket.size());
     }
+
+    // Always send the packet with "blocking" because this signals the futex and
+    // unblocks the consumer if it is waiting on the futex.
+    return mFmqResultChannel->writeBlocking(packet.data(), packet.size());
 }
 
 // ExecutionBurstServer methods
@@ -436,7 +485,8 @@ bool ResultChannelSender::sendPacket(const std::vector<FmqResultDatum>& packet)
 sp<ExecutionBurstServer> ExecutionBurstServer::create(
         const sp<IBurstCallback>& callback, const MQDescriptorSync<FmqRequestDatum>& requestChannel,
         const MQDescriptorSync<FmqResultDatum>& resultChannel,
-        std::shared_ptr<IBurstExecutorWithCache> executorWithCache) {
+        std::shared_ptr<IBurstExecutorWithCache> executorWithCache,
+        std::chrono::microseconds pollingTimeWindow) {
     // check inputs
     if (callback == nullptr || executorWithCache == nullptr) {
         LOG(ERROR) << "ExecutionBurstServer::create passed a nullptr";
@@ -445,7 +495,7 @@ sp<ExecutionBurstServer> ExecutionBurstServer::create(
 
     // create FMQ objects
     std::unique_ptr<RequestChannelReceiver> requestChannelReceiver =
-            RequestChannelReceiver::create(requestChannel);
+            RequestChannelReceiver::create(requestChannel, pollingTimeWindow);
     std::unique_ptr<ResultChannelSender> resultChannelSender =
             ResultChannelSender::create(resultChannel);
 
@@ -462,7 +512,8 @@ sp<ExecutionBurstServer> ExecutionBurstServer::create(
 
 sp<ExecutionBurstServer> ExecutionBurstServer::create(
         const sp<IBurstCallback>& callback, const MQDescriptorSync<FmqRequestDatum>& requestChannel,
-        const MQDescriptorSync<FmqResultDatum>& resultChannel, IPreparedModel* preparedModel) {
+        const MQDescriptorSync<FmqResultDatum>& resultChannel, IPreparedModel* preparedModel,
+        std::chrono::microseconds pollingTimeWindow) {
     // check relevant input
     if (preparedModel == nullptr) {
         LOG(ERROR) << "ExecutionBurstServer::create passed a nullptr";
@@ -475,7 +526,7 @@ sp<ExecutionBurstServer> ExecutionBurstServer::create(
 
     // make and return context
     return ExecutionBurstServer::create(callback, requestChannel, resultChannel,
-                                        preparedModelAdapter);
+                                        preparedModelAdapter, pollingTimeWindow);
 }
 
 ExecutionBurstServer::ExecutionBurstServer(
diff --git a/nn/common/include/ExecutionBurstController.h b/nn/common/include/ExecutionBurstController.h
index 6328096b0..652b0d911 100644
--- a/nn/common/include/ExecutionBurstController.h
+++ b/nn/common/include/ExecutionBurstController.h
@@ -17,18 +17,21 @@
 #ifndef ANDROID_FRAMEWORKS_ML_NN_COMMON_EXECUTION_BURST_CONTROLLER_H
 #define ANDROID_FRAMEWORKS_ML_NN_COMMON_EXECUTION_BURST_CONTROLLER_H
 
-#include "HalInterfaces.h"
-
 #include <android-base/macros.h>
 #include <fmq/MessageQueue.h>
 #include <hidl/MQDescriptor.h>
 
 #include <atomic>
+#include <chrono>
 #include <map>
 #include <memory>
 #include <mutex>
 #include <stack>
 #include <tuple>
+#include <utility>
+#include <vector>
+
+#include "HalInterfaces.h"
 
 namespace android::nn {
 
@@ -70,10 +73,10 @@ std::optional<std::tuple<hal::ErrorStatus, std::vector<hal::OutputShape>, hal::T
  *
  * Because the receiver can wait on a packet that may never come (e.g., because
  * the sending side of the packet has been closed), this object can be
- * invalidating, unblocking the receiver.
+ * invalidated, unblocking the receiver.
  */
 class ResultChannelReceiver {
-    using FmqResultDescriptor = ::android::hardware::MQDescriptorSync<hal::FmqResultDatum>;
+    using FmqResultDescriptor = hardware::MQDescriptorSync<hal::FmqResultDatum>;
     using FmqResultChannel =
             hardware::MessageQueue<hal::FmqResultDatum, hardware::kSynchronizedReadWrite>;
 
@@ -84,13 +87,15 @@ class ResultChannelReceiver {
      * Prefer this call over the constructor.
      *
      * @param channelLength Number of elements in the FMQ.
-     * @param blocking 'true' if FMQ should use futex, 'false' if it should
-     *     spin-wait.
+     * @param pollingTimeWindow How much time (in microseconds) the
+     *     ResultChannelReceiver is allowed to poll the FMQ before waiting on
+     *     the blocking futex. Polling may result in lower latencies at the
+     *     potential cost of more power usage.
      * @return A pair of ResultChannelReceiver and the FMQ descriptor on
      *     successful creation, both nullptr otherwise.
      */
     static std::pair<std::unique_ptr<ResultChannelReceiver>, const FmqResultDescriptor*> create(
-            size_t channelLength, bool blocking);
+            size_t channelLength, std::chrono::microseconds pollingTimeWindow);
 
     /**
      * Get the result from the channel.
@@ -114,12 +119,13 @@ class ResultChannelReceiver {
     // prefer calling ResultChannelReceiver::getBlocking
     std::optional<std::vector<hal::FmqResultDatum>> getPacketBlocking();
 
-    ResultChannelReceiver(std::unique_ptr<FmqResultChannel> fmqResultChannel, bool blocking);
+    ResultChannelReceiver(std::unique_ptr<FmqResultChannel> fmqResultChannel,
+                          std::chrono::microseconds pollingTimeWindow);
 
    private:
     const std::unique_ptr<FmqResultChannel> mFmqResultChannel;
     std::atomic<bool> mValid{true};
-    const bool mBlocking;
+    const std::chrono::microseconds kPollingTimeWindow;
 };
 
 /**
@@ -128,7 +134,7 @@ class ResultChannelReceiver {
  * available.
  */
 class RequestChannelSender {
-    using FmqRequestDescriptor = ::android::hardware::MQDescriptorSync<hal::FmqRequestDatum>;
+    using FmqRequestDescriptor = hardware::MQDescriptorSync<hal::FmqRequestDatum>;
     using FmqRequestChannel =
             hardware::MessageQueue<hal::FmqRequestDatum, hardware::kSynchronizedReadWrite>;
 
@@ -139,13 +145,11 @@ class RequestChannelSender {
      * Prefer this call over the constructor.
      *
      * @param channelLength Number of elements in the FMQ.
-     * @param blocking 'true' if FMQ should use futex, 'false' if it should
-     *     spin-wait.
      * @return A pair of ResultChannelReceiver and the FMQ descriptor on
      *     successful creation, both nullptr otherwise.
      */
     static std::pair<std::unique_ptr<RequestChannelSender>, const FmqRequestDescriptor*> create(
-            size_t channelLength, bool blocking);
+            size_t channelLength);
 
     /**
      * Send the request to the channel.
@@ -169,12 +173,11 @@ class RequestChannelSender {
     // prefer calling RequestChannelSender::send
     bool sendPacket(const std::vector<hal::FmqRequestDatum>& packet);
 
-    RequestChannelSender(std::unique_ptr<FmqRequestChannel> fmqRequestChannel, bool blocking);
+    RequestChannelSender(std::unique_ptr<FmqRequestChannel> fmqRequestChannel);
 
    private:
     const std::unique_ptr<FmqRequestChannel> mFmqRequestChannel;
     std::atomic<bool> mValid{true};
-    const bool mBlocking;
 };
 
 /**
@@ -260,15 +263,15 @@ class ExecutionBurstController {
      * Prefer this over ExecutionBurstController's constructor.
      *
      * @param preparedModel Model prepared for execution to execute on.
-     * @param blocking 'true' if the FMQ should use a futex to perform blocking
-     *     until data is available in a less responsive, but more energy
-     *     efficient manner. 'false' if the FMQ should use spin-looping to
-     *     wait until data is available in a more responsive, but less energy
-     *     efficient manner.
+     * @param pollingTimeWindow How much time (in microseconds) the
+     *     ExecutionBurstController is allowed to poll the FMQ before waiting on
+     *     the blocking futex. Polling may result in lower latencies at the
+     *     potential cost of more power usage.
      * @return ExecutionBurstController Execution burst controller object.
      */
     static std::unique_ptr<ExecutionBurstController> create(
-            const sp<hal::IPreparedModel>& preparedModel, bool blocking);
+            const sp<hal::IPreparedModel>& preparedModel,
+            std::chrono::microseconds pollingTimeWindow);
 
     // prefer calling ExecutionBurstController::create
     ExecutionBurstController(const std::shared_ptr<RequestChannelSender>& requestChannelSender,
@@ -288,34 +291,13 @@ class ExecutionBurstController {
      * @param memoryIds Identifiers corresponding to each memory object in the
      *     request's pools.
      * @return A tuple of:
-     *     - status of the execution
-     *     - dynamic output shapes from the execution
-     *     - any execution time measurements of the execution
-     */
-    std::tuple<hal::ErrorStatus, std::vector<hal::OutputShape>, hal::Timing> compute(
-            const hal::Request& request, hal::MeasureTiming measure,
-            const std::vector<intptr_t>& memoryIds);
-
-    // TODO: combine "compute" and "tryCompute" back into a single function.
-    // "tryCompute" was created later to return the "fallback" boolean. This
-    // could not be done directly in "compute" because the VTS test cases (which
-    // test burst using "compute") had already been locked down and could not be
-    // changed.
-    /**
-     * Execute a request on a model.
-     *
-     * @param request Arguments to be executed on a model.
-     * @param measure Whether to collect timing measurements, either YES or NO
-     * @param memoryIds Identifiers corresponding to each memory object in the
-     *     request's pools.
-     * @return A tuple of:
      *     - result code of the execution
      *     - dynamic output shapes from the execution
      *     - any execution time measurements of the execution
      *     - whether or not a failed burst execution should be re-run using a
      *       different path (e.g., IPreparedModel::executeSynchronously)
      */
-    std::tuple<int, std::vector<hal::OutputShape>, hal::Timing, bool> tryCompute(
+    std::tuple<int, std::vector<hal::OutputShape>, hal::Timing, bool> compute(
             const hal::Request& request, hal::MeasureTiming measure,
             const std::vector<intptr_t>& memoryIds);
 
diff --git a/nn/common/include/ExecutionBurstServer.h b/nn/common/include/ExecutionBurstServer.h
index 977d0d375..9da0dc742 100644
--- a/nn/common/include/ExecutionBurstServer.h
+++ b/nn/common/include/ExecutionBurstServer.h
@@ -17,23 +17,24 @@
 #ifndef ANDROID_FRAMEWORKS_ML_NN_COMMON_EXECUTION_BURST_SERVER_H
 #define ANDROID_FRAMEWORKS_ML_NN_COMMON_EXECUTION_BURST_SERVER_H
 
-#include "HalInterfaces.h"
-
 #include <android-base/macros.h>
 #include <fmq/MessageQueue.h>
 #include <hidl/MQDescriptor.h>
 
 #include <atomic>
+#include <chrono>
 #include <memory>
 #include <optional>
 #include <thread>
+#include <tuple>
 #include <vector>
 
+#include "HalInterfaces.h"
+
 namespace android::nn {
 
-using hardware::MQDescriptorSync;
-using FmqRequestDescriptor = MQDescriptorSync<hal::FmqRequestDatum>;
-using FmqResultDescriptor = MQDescriptorSync<hal::FmqResultDatum>;
+using FmqRequestDescriptor = hardware::MQDescriptorSync<hal::FmqRequestDatum>;
+using FmqResultDescriptor = hardware::MQDescriptorSync<hal::FmqResultDatum>;
 
 /**
  * Function to serialize results.
@@ -69,7 +70,7 @@ std::optional<std::tuple<hal::Request, std::vector<int32_t>, hal::MeasureTiming>
  *
  * Because the receiver can wait on a packet that may never come (e.g., because
  * the sending side of the packet has been closed), this object can be
- * invalidating, unblocking the receiver.
+ * invalidated, unblocking the receiver.
  */
 class RequestChannelReceiver {
     using FmqRequestChannel =
@@ -82,10 +83,15 @@ class RequestChannelReceiver {
      * Prefer this call over the constructor.
      *
      * @param requestChannel Descriptor for the request channel.
+     * @param pollingTimeWindow How much time (in microseconds) the
+     *     RequestChannelReceiver is allowed to poll the FMQ before waiting on
+     *     the blocking futex. Polling may result in lower latencies at the
+     *     potential cost of more power usage.
      * @return RequestChannelReceiver on successful creation, nullptr otherwise.
      */
     static std::unique_ptr<RequestChannelReceiver> create(
-            const FmqRequestDescriptor& requestChannel);
+            const FmqRequestDescriptor& requestChannel,
+            std::chrono::microseconds pollingTimeWindow);
 
     /**
      * Get the request from the channel.
@@ -105,14 +111,15 @@ class RequestChannelReceiver {
      */
     void invalidate();
 
-    RequestChannelReceiver(std::unique_ptr<FmqRequestChannel> fmqRequestChannel, bool blocking);
+    RequestChannelReceiver(std::unique_ptr<FmqRequestChannel> fmqRequestChannel,
+                           std::chrono::microseconds pollingTimeWindow);
 
    private:
     std::optional<std::vector<hal::FmqRequestDatum>> getPacketBlocking();
 
     const std::unique_ptr<FmqRequestChannel> mFmqRequestChannel;
     std::atomic<bool> mTeardown{false};
-    const bool mBlocking;
+    const std::chrono::microseconds kPollingTimeWindow;
 };
 
 /**
@@ -149,11 +156,10 @@ class ResultChannelSender {
     // prefer calling ResultChannelSender::send
     bool sendPacket(const std::vector<hal::FmqResultDatum>& packet);
 
-    ResultChannelSender(std::unique_ptr<FmqResultChannel> fmqResultChannel, bool blocking);
+    ResultChannelSender(std::unique_ptr<FmqResultChannel> fmqResultChannel);
 
    private:
     const std::unique_ptr<FmqResultChannel> mFmqResultChannel;
-    const bool mBlocking;
 };
 
 /**
@@ -247,12 +253,17 @@ class ExecutionBurstServer : public hal::IBurstContext {
      *     the result of the execution.
      * @param executorWithCache Object which maintains a local cache of the
      *     memory pools and executes using the cached memory pools.
+     * @param pollingTimeWindow How much time (in microseconds) the
+     *     ExecutionBurstServer is allowed to poll the FMQ before waiting on
+     *     the blocking futex. Polling may result in lower latencies at the
+     *     potential cost of more power usage.
      * @result IBurstContext Handle to the burst context.
      */
     static sp<ExecutionBurstServer> create(
             const sp<hal::IBurstCallback>& callback, const FmqRequestDescriptor& requestChannel,
             const FmqResultDescriptor& resultChannel,
-            std::shared_ptr<IBurstExecutorWithCache> executorWithCache);
+            std::shared_ptr<IBurstExecutorWithCache> executorWithCache,
+            std::chrono::microseconds pollingTimeWindow = std::chrono::microseconds{0});
 
     /**
      * Create automated context to manage FMQ-based executions.
@@ -271,12 +282,16 @@ class ExecutionBurstServer : public hal::IBurstContext {
      * @param preparedModel PreparedModel that the burst object was created from.
      *     IPreparedModel::executeSynchronously will be used to perform the
      *     execution.
+     * @param pollingTimeWindow How much time (in microseconds) the
+     *     ExecutionBurstServer is allowed to poll the FMQ before waiting on
+     *     the blocking futex. Polling may result in lower latencies at the
+     *     potential cost of more power usage.
      * @result IBurstContext Handle to the burst context.
      */
-    static sp<ExecutionBurstServer> create(const sp<hal::IBurstCallback>& callback,
-                                           const FmqRequestDescriptor& requestChannel,
-                                           const FmqResultDescriptor& resultChannel,
-                                           hal::IPreparedModel* preparedModel);
+    static sp<ExecutionBurstServer> create(
+            const sp<hal::IBurstCallback>& callback, const FmqRequestDescriptor& requestChannel,
+            const FmqResultDescriptor& resultChannel, hal::IPreparedModel* preparedModel,
+            std::chrono::microseconds pollingTimeWindow = std::chrono::microseconds{0});
 
     ExecutionBurstServer(const sp<hal::IBurstCallback>& callback,
                          std::unique_ptr<RequestChannelReceiver> requestChannel,
diff --git a/nn/driver/sample/SampleDriver.cpp b/nn/driver/sample/SampleDriver.cpp
index 0448c2d79..50cb7729a 100644
--- a/nn/driver/sample/SampleDriver.cpp
+++ b/nn/driver/sample/SampleDriver.cpp
@@ -19,6 +19,7 @@
 #include "SampleDriver.h"
 
 #include <android-base/logging.h>
+#include <android-base/properties.h>
 #include <hidl/LegacySupport.h>
 
 #include <algorithm>
@@ -185,9 +186,9 @@ Return<ErrorStatus> prepareModelBase(const T_Model& model, const SampleDriver* d
     }
 
     // asynchronously prepare the model from a new, detached thread
-    std::thread([model, driver, callback] {
+    std::thread([model, driver, preference, callback] {
         sp<SamplePreparedModel> preparedModel =
-                new SamplePreparedModel(convertToV1_3(model), driver);
+                new SamplePreparedModel(convertToV1_3(model), driver, preference);
         if (!preparedModel->initialize()) {
             notify(callback, ErrorStatus::INVALID_ARGUMENT, nullptr);
             return;
@@ -472,6 +473,22 @@ class BurstExecutorWithCache : public ExecutionBurstServer::IBurstExecutorWithCa
     std::map<int32_t, std::optional<RunTimePoolInfo>> mMemoryCache;  // cached requestPoolInfos
 };
 
+// This is the amount of time the ExecutionBurstServer should spend polling the
+// FMQ to see if it has data available before it should fall back to waiting on
+// the futex.
+static std::chrono::microseconds getPollingTimeWindow() {
+    constexpr int32_t defaultPollingTimeWindow = 50;
+#ifdef NN_DEBUGGABLE
+    constexpr int32_t minPollingTimeWindow = 0;
+    const int32_t selectedPollingTimeWindow =
+            base::GetIntProperty("debug.nn.sample-driver-burst-polling-window",
+                                 defaultPollingTimeWindow, minPollingTimeWindow);
+    return std::chrono::microseconds{selectedPollingTimeWindow};
+#else
+    return std::chrono::microseconds{defaultPollingTimeWindow};
+#endif  // NN_DEBUGGABLE
+}
+
 Return<void> SamplePreparedModel::configureExecutionBurst(
         const sp<V1_2::IBurstCallback>& callback,
         const MQDescriptorSync<V1_2::FmqRequestDatum>& requestChannel,
@@ -480,17 +497,22 @@ Return<void> SamplePreparedModel::configureExecutionBurst(
     NNTRACE_FULL(NNTRACE_LAYER_DRIVER, NNTRACE_PHASE_EXECUTION,
                  "SampleDriver::configureExecutionBurst");
 
+    const bool preferPowerOverLatency = (kPreference == hal::ExecutionPreference::LOW_POWER);
+    const auto pollingTimeWindow =
+            (preferPowerOverLatency ? std::chrono::microseconds{0} : getPollingTimeWindow());
+
     // Alternatively, the burst could be configured via:
     // const sp<V1_2::IBurstContext> burst =
     //         ExecutionBurstServer::create(callback, requestChannel,
-    //                                      resultChannel, this);
+    //                                      resultChannel, this,
+    //                                      pollingTimeWindow);
     //
     // However, this alternative representation does not include a memory map
     // caching optimization, and adds overhead.
     const std::shared_ptr<BurstExecutorWithCache> executorWithCache =
             std::make_shared<BurstExecutorWithCache>(mModel, mDriver, mPoolInfos);
     const sp<V1_2::IBurstContext> burst = ExecutionBurstServer::create(
-            callback, requestChannel, resultChannel, executorWithCache);
+            callback, requestChannel, resultChannel, executorWithCache, pollingTimeWindow);
 
     if (burst == nullptr) {
         cb(ErrorStatus::GENERAL_FAILURE, {});
diff --git a/nn/driver/sample/SampleDriver.h b/nn/driver/sample/SampleDriver.h
index a85dcd5ea..8788ed3a8 100644
--- a/nn/driver/sample/SampleDriver.h
+++ b/nn/driver/sample/SampleDriver.h
@@ -91,8 +91,9 @@ class SampleDriver : public hal::IDevice {
 
 class SamplePreparedModel : public hal::IPreparedModel {
    public:
-    SamplePreparedModel(const hal::Model& model, const SampleDriver* driver)
-        : mModel(model), mDriver(driver) {}
+    SamplePreparedModel(const hal::Model& model, const SampleDriver* driver,
+                        hal::ExecutionPreference preference)
+        : mModel(model), mDriver(driver), kPreference(preference) {}
     ~SamplePreparedModel() override {}
     bool initialize();
     hal::Return<hal::ErrorStatus> execute(
@@ -113,6 +114,7 @@ class SamplePreparedModel : public hal::IPreparedModel {
     hal::Model mModel;
     const SampleDriver* mDriver;
     std::vector<RunTimePoolInfo> mPoolInfos;
+    const hal::ExecutionPreference kPreference;
 };
 
 }  // namespace sample_driver
diff --git a/nn/runtime/CompilationBuilder.cpp b/nn/runtime/CompilationBuilder.cpp
index 912f0087b..be0260ca2 100644
--- a/nn/runtime/CompilationBuilder.cpp
+++ b/nn/runtime/CompilationBuilder.cpp
@@ -18,6 +18,11 @@
 
 #include "CompilationBuilder.h"
 
+#include <algorithm>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
 #include "BurstBuilder.h"
 #include "ExecutionBuilder.h"
 #include "ExecutionBurstController.h"
@@ -156,7 +161,8 @@ int CompilationBuilder::createBurst(BurstBuilder** burst) {
         *burst = nullptr;
         return ANEURALNETWORKS_BAD_STATE;
     }
-    std::vector<std::shared_ptr<ExecutionBurstController>> burstControllers = mPlan.makeBursts();
+    std::vector<std::shared_ptr<ExecutionBurstController>> burstControllers =
+            mPlan.makeBursts(mPreference);
     *burst = new (std::nothrow) BurstBuilder(this, std::move(burstControllers));
     return (*burst ? ANEURALNETWORKS_NO_ERROR : ANEURALNETWORKS_OUT_OF_MEMORY);
 }
diff --git a/nn/runtime/ExecutionPlan.cpp b/nn/runtime/ExecutionPlan.cpp
index 5f656e1c2..901305216 100644
--- a/nn/runtime/ExecutionPlan.cpp
+++ b/nn/runtime/ExecutionPlan.cpp
@@ -557,7 +557,8 @@ ExecutionPlan::Controller::Controller(
 // indicate the regular execution path should be used. This can occur either
 // because PreparedModel was nullptr (cpu was best choice), or because the
 // IPreparedModel was of insufficient version or failed to configure the burst.
-std::vector<std::shared_ptr<ExecutionBurstController>> ExecutionPlan::makeBursts() const {
+std::vector<std::shared_ptr<ExecutionBurstController>> ExecutionPlan::makeBursts(
+        int preference) const {
     switch (mState) {
         // burst object for each partition in the compound case
         case COMPOUND: {
@@ -565,7 +566,10 @@ std::vector<std::shared_ptr<ExecutionBurstController>> ExecutionPlan::makeBursts
             bursts.reserve(compound()->mSteps.size());
             for (const auto& step : compound()->mSteps) {
                 if (const auto preparedModel = step->getPreparedSubModel()) {
-                    bursts.push_back(preparedModel->configureExecutionBurst(/*blocking=*/true));
+                    const bool preferPowerOverLatency =
+                            (preference == ANEURALNETWORKS_PREFER_LOW_POWER);
+                    bursts.push_back(
+                            preparedModel->configureExecutionBurst(preferPowerOverLatency));
                 } else {
                     bursts.push_back(nullptr);
                 }
@@ -577,7 +581,9 @@ std::vector<std::shared_ptr<ExecutionBurstController>> ExecutionPlan::makeBursts
             std::vector<std::shared_ptr<ExecutionBurstController>> burst;
             auto simpleBody = simple();
             if (const auto preparedModel = simpleBody->mPreparedModel) {
-                burst.push_back(preparedModel->configureExecutionBurst(/*blocking=*/true));
+                const bool preferPowerOverLatency =
+                        (preference == ANEURALNETWORKS_PREFER_LOW_POWER);
+                burst.push_back(preparedModel->configureExecutionBurst(preferPowerOverLatency));
             } else {
                 burst.push_back(nullptr);
             }
diff --git a/nn/runtime/ExecutionPlan.h b/nn/runtime/ExecutionPlan.h
index cd3c01848..8fa7083c1 100644
--- a/nn/runtime/ExecutionPlan.h
+++ b/nn/runtime/ExecutionPlan.h
@@ -209,7 +209,7 @@ class ExecutionPlan {
         size_t mNextStepIndex;
     };
 
-    std::vector<std::shared_ptr<ExecutionBurstController>> makeBursts() const;
+    std::vector<std::shared_ptr<ExecutionBurstController>> makeBursts(int preference) const;
 
     std::shared_ptr<Controller> makeController(ExecutionBuilder* executionBuilder,
                                                const BurstBuilder* burstBuilder) const;
diff --git a/nn/runtime/Manager.cpp b/nn/runtime/Manager.cpp
index 6a3882dc1..34378b3fd 100644
--- a/nn/runtime/Manager.cpp
+++ b/nn/runtime/Manager.cpp
@@ -524,10 +524,10 @@ std::tuple<int, std::vector<OutputShape>, Timing> DriverPreparedModel::execute(
             memoryIds.push_back(memory->getKey());
         }
 
-        VLOG(EXECUTION) << "Before ExecutionBurstController->tryCompute() "
+        VLOG(EXECUTION) << "Before ExecutionBurstController->compute() "
                         << SHOW_IF_DEBUG(toString(request));
         std::tie(n, outputShapes, timing, burstFallback) =
-                burstController->tryCompute(request, measure, memoryIds);
+                burstController->compute(request, measure, memoryIds);
     }
 
     // compute from IPreparedModel if either:
diff --git a/nn/runtime/VersionedInterfaces.cpp b/nn/runtime/VersionedInterfaces.cpp
index 325b75c0e..ba6e2af7c 100644
--- a/nn/runtime/VersionedInterfaces.cpp
+++ b/nn/runtime/VersionedInterfaces.cpp
@@ -19,9 +19,11 @@
 #include "VersionedInterfaces.h"
 
 #include <android-base/logging.h>
+#include <android-base/properties.h>
 #include <android-base/scopeguard.h>
 #include <android-base/thread_annotations.h>
 
+#include <chrono>
 #include <functional>
 #include <memory>
 #include <string>
@@ -276,12 +278,30 @@ std::tuple<int, std::vector<OutputShape>, Timing> VersionedIPreparedModel::execu
     return executeAsynchronously(request, measure);
 }
 
+// This is the amount of time the ExecutionBurstController should spend polling
+// the FMQ to see if it has data available before it should fall back to
+// waiting on the futex.
+static std::chrono::microseconds getPollingTimeWindow() {
+    constexpr int32_t defaultPollingTimeWindow = 50;
+#ifdef NN_DEBUGGABLE
+    constexpr int32_t minPollingTimeWindow = 0;
+    const int32_t selectedPollingTimeWindow =
+            base::GetIntProperty("debug.nn.burst-conrtoller-polling-window",
+                                 defaultPollingTimeWindow, minPollingTimeWindow);
+    return std::chrono::microseconds{selectedPollingTimeWindow};
+#else
+    return std::chrono::microseconds{defaultPollingTimeWindow};
+#endif  // NN_DEBUGGABLE
+}
+
 std::shared_ptr<ExecutionBurstController> VersionedIPreparedModel::configureExecutionBurst(
-        bool blocking) const {
+        bool preferPowerOverLatency) const {
     if (mPreparedModelV1_2 == nullptr) {
         return nullptr;
     }
-    return ExecutionBurstController::create(mPreparedModelV1_2, blocking);
+    const auto pollingTimeWindow =
+            (preferPowerOverLatency ? std::chrono::microseconds{0} : getPollingTimeWindow());
+    return ExecutionBurstController::create(mPreparedModelV1_2, pollingTimeWindow);
 }
 
 std::shared_ptr<VersionedIDevice> VersionedIDevice::create(std::string serviceName,
diff --git a/nn/runtime/VersionedInterfaces.h b/nn/runtime/VersionedInterfaces.h
index 8665745b6..87e776507 100644
--- a/nn/runtime/VersionedInterfaces.h
+++ b/nn/runtime/VersionedInterfaces.h
@@ -687,12 +687,16 @@ class VersionedIPreparedModel {
     /**
      * Creates a burst controller on a prepared model.
      *
-     * @param blocking 'true' if the FMQ should block until data is available.
+     * @param preferPowerOverLatency 'true' if the Burst object should run in a
+     *                               more power efficient mode, 'false' if more
+     *                               power can be used to possibly reduce
+     *                               burst compute latency.
      * @return ExecutionBurstController Execution burst controller object.
      *                                  nullptr is returned if the burst cannot
      *                                  be configured for any reason.
      */
-    std::shared_ptr<ExecutionBurstController> configureExecutionBurst(bool blocking) const;
+    std::shared_ptr<ExecutionBurstController> configureExecutionBurst(
+            bool preferPowerOverLatency) const;
 
    private:
     std::tuple<int, std::vector<hal::OutputShape>, hal::Timing> executeAsynchronously(
diff --git a/nn/runtime/test/TestIntrospectionControl.cpp b/nn/runtime/test/TestIntrospectionControl.cpp
index 84617d9b0..9d0cbe6c3 100644
--- a/nn/runtime/test/TestIntrospectionControl.cpp
+++ b/nn/runtime/test/TestIntrospectionControl.cpp
@@ -16,6 +16,7 @@
 
 #include <gtest/gtest.h>
 
+#include <chrono>
 #include <iterator>
 #include <map>
 #include <queue>
@@ -309,7 +310,8 @@ std::set<Success> expectedPassSet = {Success::PASS_NEITHER, Success::PASS_DEVICE
 class TestPreparedModel12 : public SamplePreparedModel {
    public:
     TestPreparedModel12(const HidlModel& model, const SampleDriver* driver, Success success)
-        : SamplePreparedModel(model, driver), mSuccess(success) {}
+        : SamplePreparedModel(model, driver, ExecutionPreference::FAST_SINGLE_ANSWER),
+          mSuccess(success) {}
 
     Return<ErrorStatus> execute(const Request&,
                                 const sp<V1_0::IExecutionCallback>& callback) override {
@@ -384,8 +386,8 @@ class TestPreparedModel12 : public SamplePreparedModel {
             const MQDescriptorSync<V1_2::FmqRequestDatum>& requestChannel,
             const MQDescriptorSync<V1_2::FmqResultDatum>& resultChannel,
             configureExecutionBurst_cb cb) override {
-        const sp<V1_2::IBurstContext> burst =
-                ExecutionBurstServer::create(callback, requestChannel, resultChannel, this);
+        const sp<V1_2::IBurstContext> burst = ExecutionBurstServer::create(
+                callback, requestChannel, resultChannel, this, std::chrono::microseconds{0});
 
         cb(burst == nullptr ? ErrorStatus::GENERAL_FAILURE : ErrorStatus::NONE, burst);
         return Void();
author	Michael Butler <butlermichael@google.com>	2019-10-28 15:25:58 -0700
committer	android-build-merger <android-build-merger@google.com>	2019-10-28 15:25:58 -0700
commit	bd86742ff4244cd721652c4b52ee5a43426cac82 (patch)
tree	a8e32e59a98b3cd2e4e290bdefdd0511ff2bd234
parent	8c28763a2a18faad99d0e7eed992d50dd9e09b02 (diff)
parent	6f2aab7acc36c2b5b273b50d3c86c1ed5f0db3ed (diff)
download	ml-bd86742ff4244cd721652c4b52ee5a43426cac82.tar.gz