summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael Butler <butlermichael@google.com>2019-10-28 15:25:58 -0700
committerandroid-build-merger <android-build-merger@google.com>2019-10-28 15:25:58 -0700
commitbd86742ff4244cd721652c4b52ee5a43426cac82 (patch)
treea8e32e59a98b3cd2e4e290bdefdd0511ff2bd234
parent8c28763a2a18faad99d0e7eed992d50dd9e09b02 (diff)
parent6f2aab7acc36c2b5b273b50d3c86c1ed5f0db3ed (diff)
downloadml-bd86742ff4244cd721652c4b52ee5a43426cac82.tar.gz
Merge "Improve performance of Burst executions" am: 5faee8ea5a am: 87dd1a37c2
am: 6f2aab7acc Change-Id: Ib78f32f1324896091dfdbf23f5f1b0bbd936c04a
-rw-r--r--nn/common/ExecutionBurstController.cpp125
-rw-r--r--nn/common/ExecutionBurstServer.cpp137
-rw-r--r--nn/common/include/ExecutionBurstController.h68
-rw-r--r--nn/common/include/ExecutionBurstServer.h47
-rw-r--r--nn/driver/sample/SampleDriver.cpp30
-rw-r--r--nn/driver/sample/SampleDriver.h6
-rw-r--r--nn/runtime/CompilationBuilder.cpp8
-rw-r--r--nn/runtime/ExecutionPlan.cpp12
-rw-r--r--nn/runtime/ExecutionPlan.h2
-rw-r--r--nn/runtime/Manager.cpp4
-rw-r--r--nn/runtime/VersionedInterfaces.cpp24
-rw-r--r--nn/runtime/VersionedInterfaces.h8
-rw-r--r--nn/runtime/test/TestIntrospectionControl.cpp8
13 files changed, 307 insertions, 172 deletions
diff --git a/nn/common/ExecutionBurstController.cpp b/nn/common/ExecutionBurstController.cpp
index f3a771b11..4456ed10d 100644
--- a/nn/common/ExecutionBurstController.cpp
+++ b/nn/common/ExecutionBurstController.cpp
@@ -19,9 +19,16 @@
#include "ExecutionBurstController.h"
#include <android-base/logging.h>
+
+#include <algorithm>
#include <cstring>
#include <limits>
+#include <memory>
#include <string>
+#include <tuple>
+#include <utility>
+#include <vector>
+
#include "Tracing.h"
#include "Utils.h"
@@ -30,9 +37,8 @@ namespace {
using namespace hal;
-using hardware::MQDescriptorSync;
-using FmqRequestDescriptor = MQDescriptorSync<FmqRequestDatum>;
-using FmqResultDescriptor = MQDescriptorSync<FmqResultDatum>;
+using FmqRequestDescriptor = hardware::MQDescriptorSync<FmqRequestDatum>;
+using FmqResultDescriptor = hardware::MQDescriptorSync<FmqResultDatum>;
constexpr Timing kNoTiming = {std::numeric_limits<uint64_t>::max(),
std::numeric_limits<uint64_t>::max()};
@@ -221,22 +227,23 @@ std::optional<std::tuple<ErrorStatus, std::vector<OutputShape>, Timing>> deseria
}
std::pair<std::unique_ptr<ResultChannelReceiver>, const FmqResultDescriptor*>
-ResultChannelReceiver::create(size_t channelLength, bool blocking) {
+ResultChannelReceiver::create(size_t channelLength, std::chrono::microseconds pollingTimeWindow) {
std::unique_ptr<FmqResultChannel> fmqResultChannel =
- std::make_unique<FmqResultChannel>(channelLength, /*confEventFlag=*/blocking);
+ std::make_unique<FmqResultChannel>(channelLength, /*confEventFlag=*/true);
if (!fmqResultChannel->isValid()) {
LOG(ERROR) << "Unable to create ResultChannelReceiver";
return {nullptr, nullptr};
}
+
const FmqResultDescriptor* descriptor = fmqResultChannel->getDesc();
return std::make_pair(
- std::make_unique<ResultChannelReceiver>(std::move(fmqResultChannel), blocking),
+ std::make_unique<ResultChannelReceiver>(std::move(fmqResultChannel), pollingTimeWindow),
descriptor);
}
ResultChannelReceiver::ResultChannelReceiver(std::unique_ptr<FmqResultChannel> fmqResultChannel,
- bool blocking)
- : mFmqResultChannel(std::move(fmqResultChannel)), mBlocking(blocking) {}
+ std::chrono::microseconds pollingTimeWindow)
+ : mFmqResultChannel(std::move(fmqResultChannel)), kPollingTimeWindow(pollingTimeWindow) {}
std::optional<std::tuple<ErrorStatus, std::vector<OutputShape>, Timing>>
ResultChannelReceiver::getBlocking() {
@@ -254,16 +261,14 @@ void ResultChannelReceiver::invalidate() {
// force unblock
// ExecutionBurstController waits on a result packet after sending a
// request. If the driver containing ExecutionBurstServer crashes, the
- // controller will still be waiting on the futex (assuming mBlocking is
- // true). This force unblock wakes up any thread waiting on the futex.
- if (mBlocking) {
- // TODO: look for a different/better way to signal/notify the futex to
- // wake up any thread waiting on it
- FmqResultDatum datum;
- datum.packetInformation({/*.packetSize=*/0, /*.errorStatus=*/ErrorStatus::GENERAL_FAILURE,
- /*.numberOfOperands=*/0});
- mFmqResultChannel->writeBlocking(&datum, 1);
- }
+ // controller may be waiting on the futex. This force unblock wakes up any
+ // thread waiting on the futex.
+ // TODO: look for a different/better way to signal/notify the futex to
+ // wake up any thread waiting on it
+ FmqResultDatum datum;
+ datum.packetInformation({/*.packetSize=*/0, /*.errorStatus=*/ErrorStatus::GENERAL_FAILURE,
+ /*.numberOfOperands=*/0});
+ mFmqResultChannel->writeBlocking(&datum, 1);
}
std::optional<std::vector<FmqResultDatum>> ResultChannelReceiver::getPacketBlocking() {
@@ -273,17 +278,42 @@ std::optional<std::vector<FmqResultDatum>> ResultChannelReceiver::getPacketBlock
return std::nullopt;
}
- // wait for result packet and read first element of result packet
- FmqResultDatum datum;
- bool success = true;
- if (mBlocking) {
- success = mFmqResultChannel->readBlocking(&datum, 1);
- } else {
- while ((success = mValid.load(std::memory_order_relaxed)) &&
- !mFmqResultChannel->read(&datum, 1)) {
+ // First spend time polling if results are available in FMQ instead of
+ // waiting on the futex. Polling is more responsive (yielding lower
+ // latencies), but can take up more power, so only poll for a limited period
+ // of time.
+
+ auto& getCurrentTime = std::chrono::high_resolution_clock::now;
+ const auto timeToStopPolling = getCurrentTime() + kPollingTimeWindow;
+
+ while (getCurrentTime() < timeToStopPolling) {
+ // if class is being torn down, immediately return
+ if (!mValid.load(std::memory_order_relaxed)) {
+ return std::nullopt;
+ }
+
+ // Check if data is available. If it is, immediately retrieve it and
+ // return.
+ const size_t available = mFmqResultChannel->availableToRead();
+ if (available > 0) {
+ std::vector<FmqResultDatum> packet(available);
+ const bool success = mFmqResultChannel->read(packet.data(), available);
+ if (!success) {
+ LOG(ERROR) << "Error receiving packet";
+ return std::nullopt;
+ }
+ return std::make_optional(std::move(packet));
}
}
+ // If we get to this point, we either stopped polling because it was taking
+ // too long or polling was not allowed. Instead, perform a blocking call
+ // which uses a futex to save power.
+
+ // wait for result packet and read first element of result packet
+ FmqResultDatum datum;
+ bool success = mFmqResultChannel->readBlocking(&datum, 1);
+
// retrieve remaining elements
// NOTE: all of the data is already available at this point, so there's no
// need to do a blocking wait to wait for more data. This is known because
@@ -310,22 +340,21 @@ std::optional<std::vector<FmqResultDatum>> ResultChannelReceiver::getPacketBlock
}
std::pair<std::unique_ptr<RequestChannelSender>, const FmqRequestDescriptor*>
-RequestChannelSender::create(size_t channelLength, bool blocking) {
+RequestChannelSender::create(size_t channelLength) {
std::unique_ptr<FmqRequestChannel> fmqRequestChannel =
- std::make_unique<FmqRequestChannel>(channelLength, /*confEventFlag=*/blocking);
+ std::make_unique<FmqRequestChannel>(channelLength, /*confEventFlag=*/true);
if (!fmqRequestChannel->isValid()) {
LOG(ERROR) << "Unable to create RequestChannelSender";
return {nullptr, nullptr};
}
+
const FmqRequestDescriptor* descriptor = fmqRequestChannel->getDesc();
- return std::make_pair(
- std::make_unique<RequestChannelSender>(std::move(fmqRequestChannel), blocking),
- descriptor);
+ return std::make_pair(std::make_unique<RequestChannelSender>(std::move(fmqRequestChannel)),
+ descriptor);
}
-RequestChannelSender::RequestChannelSender(std::unique_ptr<FmqRequestChannel> fmqRequestChannel,
- bool blocking)
- : mFmqRequestChannel(std::move(fmqRequestChannel)), mBlocking(blocking) {}
+RequestChannelSender::RequestChannelSender(std::unique_ptr<FmqRequestChannel> fmqRequestChannel)
+ : mFmqRequestChannel(std::move(fmqRequestChannel)) {}
bool RequestChannelSender::send(const Request& request, MeasureTiming measure,
const std::vector<int32_t>& slots) {
@@ -344,11 +373,9 @@ bool RequestChannelSender::sendPacket(const std::vector<FmqRequestDatum>& packet
return false;
}
- if (mBlocking) {
- return mFmqRequestChannel->writeBlocking(packet.data(), packet.size());
- } else {
- return mFmqRequestChannel->write(packet.data(), packet.size());
- }
+ // Always send the packet with "blocking" because this signals the futex and
+ // unblocks the consumer if it is waiting on the futex.
+ return mFmqRequestChannel->writeBlocking(packet.data(), packet.size());
}
void RequestChannelSender::invalidate() {
@@ -438,7 +465,7 @@ int32_t ExecutionBurstController::ExecutionBurstCallback::allocateSlotLocked() {
}
std::unique_ptr<ExecutionBurstController> ExecutionBurstController::create(
- const sp<IPreparedModel>& preparedModel, bool blocking) {
+ const sp<IPreparedModel>& preparedModel, std::chrono::microseconds pollingTimeWindow) {
// check inputs
if (preparedModel == nullptr) {
LOG(ERROR) << "ExecutionBurstController::create passed a nullptr";
@@ -450,9 +477,9 @@ std::unique_ptr<ExecutionBurstController> ExecutionBurstController::create(
// create FMQ objects
auto [requestChannelSenderTemp, requestChannelDescriptor] =
- RequestChannelSender::create(kExecutionBurstChannelLength, blocking);
+ RequestChannelSender::create(kExecutionBurstChannelLength);
auto [resultChannelReceiverTemp, resultChannelDescriptor] =
- ResultChannelReceiver::create(kExecutionBurstChannelLength, blocking);
+ ResultChannelReceiver::create(kExecutionBurstChannelLength, pollingTimeWindow);
std::shared_ptr<RequestChannelSender> requestChannelSender =
std::move(requestChannelSenderTemp);
std::shared_ptr<ResultChannelReceiver> resultChannelReceiver =
@@ -543,15 +570,13 @@ static std::tuple<int, std::vector<OutputShape>, Timing, bool> getExecutionResul
return {n, std::move(checkedOutputShapes), checkedTiming, fallback};
}
-std::tuple<ErrorStatus, std::vector<OutputShape>, Timing> ExecutionBurstController::compute(
- const Request& request, MeasureTiming measure, const std::vector<intptr_t>& memoryIds) {
- auto [status, outputShapes, timing, fallback] = tryCompute(request, measure, memoryIds);
- (void)fallback; // ignore fallback field
- return {convertResultCodeToErrorStatus(status), std::move(outputShapes), timing};
-}
-
-std::tuple<int, std::vector<OutputShape>, Timing, bool> ExecutionBurstController::tryCompute(
+std::tuple<int, std::vector<OutputShape>, Timing, bool> ExecutionBurstController::compute(
const Request& request, MeasureTiming measure, const std::vector<intptr_t>& memoryIds) {
+ // This is the first point when we know an execution is occurring, so begin
+ // to collect systraces. Note that the first point we can begin collecting
+ // systraces in ExecutionBurstServer is when the RequestChannelReceiver
+ // realizes there is data in the FMQ, so ExecutionBurstServer collects
+ // systraces at different points in the code.
NNTRACE_FULL(NNTRACE_LAYER_IPC, NNTRACE_PHASE_EXECUTION, "ExecutionBurstController::compute");
std::lock_guard<std::mutex> guard(mMutex);
diff --git a/nn/common/ExecutionBurstServer.cpp b/nn/common/ExecutionBurstServer.cpp
index 74bc34058..ec935dad6 100644
--- a/nn/common/ExecutionBurstServer.cpp
+++ b/nn/common/ExecutionBurstServer.cpp
@@ -20,9 +20,14 @@
#include <android-base/logging.h>
+#include <algorithm>
#include <cstring>
#include <limits>
#include <map>
+#include <memory>
+#include <tuple>
+#include <utility>
+#include <vector>
#include "Tracing.h"
@@ -31,6 +36,8 @@ namespace {
using namespace hal;
+using hardware::MQDescriptorSync;
+
constexpr Timing kNoTiming = {std::numeric_limits<uint64_t>::max(),
std::numeric_limits<uint64_t>::max()};
@@ -298,20 +305,27 @@ std::optional<std::tuple<Request, std::vector<int32_t>, MeasureTiming>> deserial
// RequestChannelReceiver methods
std::unique_ptr<RequestChannelReceiver> RequestChannelReceiver::create(
- const FmqRequestDescriptor& requestChannel) {
+ const FmqRequestDescriptor& requestChannel, std::chrono::microseconds pollingTimeWindow) {
std::unique_ptr<FmqRequestChannel> fmqRequestChannel =
std::make_unique<FmqRequestChannel>(requestChannel);
+
if (!fmqRequestChannel->isValid()) {
LOG(ERROR) << "Unable to create RequestChannelReceiver";
return nullptr;
}
- const bool blocking = fmqRequestChannel->getEventFlagWord() != nullptr;
- return std::make_unique<RequestChannelReceiver>(std::move(fmqRequestChannel), blocking);
+ if (fmqRequestChannel->getEventFlagWord() == nullptr) {
+ LOG(ERROR)
+ << "RequestChannelReceiver::create was passed an MQDescriptor without an EventFlag";
+ return nullptr;
+ }
+
+ return std::make_unique<RequestChannelReceiver>(std::move(fmqRequestChannel),
+ pollingTimeWindow);
}
RequestChannelReceiver::RequestChannelReceiver(std::unique_ptr<FmqRequestChannel> fmqRequestChannel,
- bool blocking)
- : mFmqRequestChannel(std::move(fmqRequestChannel)), mBlocking(blocking) {}
+ std::chrono::microseconds pollingTimeWindow)
+ : mFmqRequestChannel(std::move(fmqRequestChannel)), kPollingTimeWindow(pollingTimeWindow) {}
std::optional<std::tuple<Request, std::vector<int32_t>, MeasureTiming>>
RequestChannelReceiver::getBlocking() {
@@ -328,17 +342,15 @@ void RequestChannelReceiver::invalidate() {
// force unblock
// ExecutionBurstServer is by default waiting on a request packet. If the
- // client process destroys its burst object, the server will still be
- // waiting on the futex (assuming mBlocking is true). This force unblock
- // wakes up any thread waiting on the futex.
- if (mBlocking) {
- // TODO: look for a different/better way to signal/notify the futex to
- // wake up any thread waiting on it
- FmqRequestDatum datum;
- datum.packetInformation({/*.packetSize=*/0, /*.numberOfInputOperands=*/0,
- /*.numberOfOutputOperands=*/0, /*.numberOfPools=*/0});
- mFmqRequestChannel->writeBlocking(&datum, 1);
- }
+ // client process destroys its burst object, the server may still be waiting
+ // on the futex. This force unblock wakes up any thread waiting on the
+ // futex.
+ // TODO: look for a different/better way to signal/notify the futex to wake
+ // up any thread waiting on it
+ FmqRequestDatum datum;
+ datum.packetInformation({/*.packetSize=*/0, /*.numberOfInputOperands=*/0,
+ /*.numberOfOutputOperands=*/0, /*.numberOfPools=*/0});
+ mFmqRequestChannel->writeBlocking(&datum, 1);
}
std::optional<std::vector<FmqRequestDatum>> RequestChannelReceiver::getPacketBlocking() {
@@ -348,17 +360,53 @@ std::optional<std::vector<FmqRequestDatum>> RequestChannelReceiver::getPacketBlo
return std::nullopt;
}
- // wait for request packet and read first element of request packet
- FmqRequestDatum datum;
- bool success = false;
- if (mBlocking) {
- success = mFmqRequestChannel->readBlocking(&datum, 1);
- } else {
- while ((success = !mTeardown.load(std::memory_order_relaxed)) &&
- !mFmqRequestChannel->read(&datum, 1)) {
+ // First spend time polling if results are available in FMQ instead of
+ // waiting on the futex. Polling is more responsive (yielding lower
+ // latencies), but can take up more power, so only poll for a limited period
+ // of time.
+
+ auto& getCurrentTime = std::chrono::high_resolution_clock::now;
+ const auto timeToStopPolling = getCurrentTime() + kPollingTimeWindow;
+
+ while (getCurrentTime() < timeToStopPolling) {
+ // if class is being torn down, immediately return
+ if (mTeardown.load(std::memory_order_relaxed)) {
+ return std::nullopt;
+ }
+
+ // Check if data is available. If it is, immediately retrieve it and
+ // return.
+ const size_t available = mFmqRequestChannel->availableToRead();
+ if (available > 0) {
+ // This is the first point when we know an execution is occurring,
+ // so begin to collect systraces. Note that a similar systrace does
+ // not exist at the corresponding point in
+ // ResultChannelReceiver::getPacketBlocking because the execution is
+ // already in flight.
+ NNTRACE_FULL(NNTRACE_LAYER_IPC, NNTRACE_PHASE_EXECUTION,
+ "ExecutionBurstServer getting packet");
+ std::vector<FmqRequestDatum> packet(available);
+ const bool success = mFmqRequestChannel->read(packet.data(), available);
+ if (!success) {
+ LOG(ERROR) << "Error receiving packet";
+ return std::nullopt;
+ }
+ return std::make_optional(std::move(packet));
}
}
+ // If we get to this point, we either stopped polling because it was taking
+ // too long or polling was not allowed. Instead, perform a blocking call
+ // which uses a futex to save power.
+
+ // wait for request packet and read first element of request packet
+ FmqRequestDatum datum;
+ bool success = mFmqRequestChannel->readBlocking(&datum, 1);
+
+ // This is the first point when we know an execution is occurring, so begin
+ // to collect systraces. Note that a similar systrace does not exist at the
+ // corresponding point in ResultChannelReceiver::getPacketBlocking because
+ // the execution is already in flight.
NNTRACE_FULL(NNTRACE_LAYER_IPC, NNTRACE_PHASE_EXECUTION, "ExecutionBurstServer getting packet");
// retrieve remaining elements
@@ -393,17 +441,21 @@ std::unique_ptr<ResultChannelSender> ResultChannelSender::create(
const FmqResultDescriptor& resultChannel) {
std::unique_ptr<FmqResultChannel> fmqResultChannel =
std::make_unique<FmqResultChannel>(resultChannel);
+
if (!fmqResultChannel->isValid()) {
LOG(ERROR) << "Unable to create RequestChannelSender";
return nullptr;
}
- const bool blocking = fmqResultChannel->getEventFlagWord() != nullptr;
- return std::make_unique<ResultChannelSender>(std::move(fmqResultChannel), blocking);
+ if (fmqResultChannel->getEventFlagWord() == nullptr) {
+ LOG(ERROR) << "ResultChannelSender::create was passed an MQDescriptor without an EventFlag";
+ return nullptr;
+ }
+
+ return std::make_unique<ResultChannelSender>(std::move(fmqResultChannel));
}
-ResultChannelSender::ResultChannelSender(std::unique_ptr<FmqResultChannel> fmqResultChannel,
- bool blocking)
- : mFmqResultChannel(std::move(fmqResultChannel)), mBlocking(blocking) {}
+ResultChannelSender::ResultChannelSender(std::unique_ptr<FmqResultChannel> fmqResultChannel)
+ : mFmqResultChannel(std::move(fmqResultChannel)) {}
bool ResultChannelSender::send(ErrorStatus errorStatus,
const std::vector<OutputShape>& outputShapes, Timing timing) {
@@ -417,18 +469,15 @@ bool ResultChannelSender::sendPacket(const std::vector<FmqResultDatum>& packet)
<< "ResultChannelSender::sendPacket -- packet size exceeds size available in FMQ";
const std::vector<FmqResultDatum> errorPacket =
serialize(ErrorStatus::GENERAL_FAILURE, {}, kNoTiming);
- if (mBlocking) {
- return mFmqResultChannel->writeBlocking(errorPacket.data(), errorPacket.size());
- } else {
- return mFmqResultChannel->write(errorPacket.data(), errorPacket.size());
- }
- }
- if (mBlocking) {
- return mFmqResultChannel->writeBlocking(packet.data(), packet.size());
- } else {
- return mFmqResultChannel->write(packet.data(), packet.size());
+ // Always send the packet with "blocking" because this signals the futex
+ // and unblocks the consumer if it is waiting on the futex.
+ return mFmqResultChannel->writeBlocking(errorPacket.data(), errorPacket.size());
}
+
+ // Always send the packet with "blocking" because this signals the futex and
+ // unblocks the consumer if it is waiting on the futex.
+ return mFmqResultChannel->writeBlocking(packet.data(), packet.size());
}
// ExecutionBurstServer methods
@@ -436,7 +485,8 @@ bool ResultChannelSender::sendPacket(const std::vector<FmqResultDatum>& packet)
sp<ExecutionBurstServer> ExecutionBurstServer::create(
const sp<IBurstCallback>& callback, const MQDescriptorSync<FmqRequestDatum>& requestChannel,
const MQDescriptorSync<FmqResultDatum>& resultChannel,
- std::shared_ptr<IBurstExecutorWithCache> executorWithCache) {
+ std::shared_ptr<IBurstExecutorWithCache> executorWithCache,
+ std::chrono::microseconds pollingTimeWindow) {
// check inputs
if (callback == nullptr || executorWithCache == nullptr) {
LOG(ERROR) << "ExecutionBurstServer::create passed a nullptr";
@@ -445,7 +495,7 @@ sp<ExecutionBurstServer> ExecutionBurstServer::create(
// create FMQ objects
std::unique_ptr<RequestChannelReceiver> requestChannelReceiver =
- RequestChannelReceiver::create(requestChannel);
+ RequestChannelReceiver::create(requestChannel, pollingTimeWindow);
std::unique_ptr<ResultChannelSender> resultChannelSender =
ResultChannelSender::create(resultChannel);
@@ -462,7 +512,8 @@ sp<ExecutionBurstServer> ExecutionBurstServer::create(
sp<ExecutionBurstServer> ExecutionBurstServer::create(
const sp<IBurstCallback>& callback, const MQDescriptorSync<FmqRequestDatum>& requestChannel,
- const MQDescriptorSync<FmqResultDatum>& resultChannel, IPreparedModel* preparedModel) {
+ const MQDescriptorSync<FmqResultDatum>& resultChannel, IPreparedModel* preparedModel,
+ std::chrono::microseconds pollingTimeWindow) {
// check relevant input
if (preparedModel == nullptr) {
LOG(ERROR) << "ExecutionBurstServer::create passed a nullptr";
@@ -475,7 +526,7 @@ sp<ExecutionBurstServer> ExecutionBurstServer::create(
// make and return context
return ExecutionBurstServer::create(callback, requestChannel, resultChannel,
- preparedModelAdapter);
+ preparedModelAdapter, pollingTimeWindow);
}
ExecutionBurstServer::ExecutionBurstServer(
diff --git a/nn/common/include/ExecutionBurstController.h b/nn/common/include/ExecutionBurstController.h
index 6328096b0..652b0d911 100644
--- a/nn/common/include/ExecutionBurstController.h
+++ b/nn/common/include/ExecutionBurstController.h
@@ -17,18 +17,21 @@
#ifndef ANDROID_FRAMEWORKS_ML_NN_COMMON_EXECUTION_BURST_CONTROLLER_H
#define ANDROID_FRAMEWORKS_ML_NN_COMMON_EXECUTION_BURST_CONTROLLER_H
-#include "HalInterfaces.h"
-
#include <android-base/macros.h>
#include <fmq/MessageQueue.h>
#include <hidl/MQDescriptor.h>
#include <atomic>
+#include <chrono>
#include <map>
#include <memory>
#include <mutex>
#include <stack>
#include <tuple>
+#include <utility>
+#include <vector>
+
+#include "HalInterfaces.h"
namespace android::nn {
@@ -70,10 +73,10 @@ std::optional<std::tuple<hal::ErrorStatus, std::vector<hal::OutputShape>, hal::T
*
* Because the receiver can wait on a packet that may never come (e.g., because
* the sending side of the packet has been closed), this object can be
- * invalidating, unblocking the receiver.
+ * invalidated, unblocking the receiver.
*/
class ResultChannelReceiver {
- using FmqResultDescriptor = ::android::hardware::MQDescriptorSync<hal::FmqResultDatum>;
+ using FmqResultDescriptor = hardware::MQDescriptorSync<hal::FmqResultDatum>;
using FmqResultChannel =
hardware::MessageQueue<hal::FmqResultDatum, hardware::kSynchronizedReadWrite>;
@@ -84,13 +87,15 @@ class ResultChannelReceiver {
* Prefer this call over the constructor.
*
* @param channelLength Number of elements in the FMQ.
- * @param blocking 'true' if FMQ should use futex, 'false' if it should
- * spin-wait.
+ * @param pollingTimeWindow How much time (in microseconds) the
+ * ResultChannelReceiver is allowed to poll the FMQ before waiting on
+ * the blocking futex. Polling may result in lower latencies at the
+ * potential cost of more power usage.
* @return A pair of ResultChannelReceiver and the FMQ descriptor on
* successful creation, both nullptr otherwise.
*/
static std::pair<std::unique_ptr<ResultChannelReceiver>, const FmqResultDescriptor*> create(
- size_t channelLength, bool blocking);
+ size_t channelLength, std::chrono::microseconds pollingTimeWindow);
/**
* Get the result from the channel.
@@ -114,12 +119,13 @@ class ResultChannelReceiver {
// prefer calling ResultChannelReceiver::getBlocking
std::optional<std::vector<hal::FmqResultDatum>> getPacketBlocking();
- ResultChannelReceiver(std::unique_ptr<FmqResultChannel> fmqResultChannel, bool blocking);
+ ResultChannelReceiver(std::unique_ptr<FmqResultChannel> fmqResultChannel,
+ std::chrono::microseconds pollingTimeWindow);
private:
const std::unique_ptr<FmqResultChannel> mFmqResultChannel;
std::atomic<bool> mValid{true};
- const bool mBlocking;
+ const std::chrono::microseconds kPollingTimeWindow;
};
/**
@@ -128,7 +134,7 @@ class ResultChannelReceiver {
* available.
*/
class RequestChannelSender {
- using FmqRequestDescriptor = ::android::hardware::MQDescriptorSync<hal::FmqRequestDatum>;
+ using FmqRequestDescriptor = hardware::MQDescriptorSync<hal::FmqRequestDatum>;
using FmqRequestChannel =
hardware::MessageQueue<hal::FmqRequestDatum, hardware::kSynchronizedReadWrite>;
@@ -139,13 +145,11 @@ class RequestChannelSender {
* Prefer this call over the constructor.
*
* @param channelLength Number of elements in the FMQ.
- * @param blocking 'true' if FMQ should use futex, 'false' if it should
- * spin-wait.
* @return A pair of ResultChannelReceiver and the FMQ descriptor on
* successful creation, both nullptr otherwise.
*/
static std::pair<std::unique_ptr<RequestChannelSender>, const FmqRequestDescriptor*> create(
- size_t channelLength, bool blocking);
+ size_t channelLength);
/**
* Send the request to the channel.
@@ -169,12 +173,11 @@ class RequestChannelSender {
// prefer calling RequestChannelSender::send
bool sendPacket(const std::vector<hal::FmqRequestDatum>& packet);
- RequestChannelSender(std::unique_ptr<FmqRequestChannel> fmqRequestChannel, bool blocking);
+ RequestChannelSender(std::unique_ptr<FmqRequestChannel> fmqRequestChannel);
private:
const std::unique_ptr<FmqRequestChannel> mFmqRequestChannel;
std::atomic<bool> mValid{true};
- const bool mBlocking;
};
/**
@@ -260,15 +263,15 @@ class ExecutionBurstController {
* Prefer this over ExecutionBurstController's constructor.
*
* @param preparedModel Model prepared for execution to execute on.
- * @param blocking 'true' if the FMQ should use a futex to perform blocking
- * until data is available in a less responsive, but more energy
- * efficient manner. 'false' if the FMQ should use spin-looping to
- * wait until data is available in a more responsive, but less energy
- * efficient manner.
+ * @param pollingTimeWindow How much time (in microseconds) the
+ * ExecutionBurstController is allowed to poll the FMQ before waiting on
+ * the blocking futex. Polling may result in lower latencies at the
+ * potential cost of more power usage.
* @return ExecutionBurstController Execution burst controller object.
*/
static std::unique_ptr<ExecutionBurstController> create(
- const sp<hal::IPreparedModel>& preparedModel, bool blocking);
+ const sp<hal::IPreparedModel>& preparedModel,
+ std::chrono::microseconds pollingTimeWindow);
// prefer calling ExecutionBurstController::create
ExecutionBurstController(const std::shared_ptr<RequestChannelSender>& requestChannelSender,
@@ -288,34 +291,13 @@ class ExecutionBurstController {
* @param memoryIds Identifiers corresponding to each memory object in the
* request's pools.
* @return A tuple of:
- * - status of the execution
- * - dynamic output shapes from the execution
- * - any execution time measurements of the execution
- */
- std::tuple<hal::ErrorStatus, std::vector<hal::OutputShape>, hal::Timing> compute(
- const hal::Request& request, hal::MeasureTiming measure,
- const std::vector<intptr_t>& memoryIds);
-
- // TODO: combine "compute" and "tryCompute" back into a single function.
- // "tryCompute" was created later to return the "fallback" boolean. This
- // could not be done directly in "compute" because the VTS test cases (which
- // test burst using "compute") had already been locked down and could not be
- // changed.
- /**
- * Execute a request on a model.
- *
- * @param request Arguments to be executed on a model.
- * @param measure Whether to collect timing measurements, either YES or NO
- * @param memoryIds Identifiers corresponding to each memory object in the
- * request's pools.
- * @return A tuple of:
* - result code of the execution
* - dynamic output shapes from the execution
* - any execution time measurements of the execution
* - whether or not a failed burst execution should be re-run using a
* different path (e.g., IPreparedModel::executeSynchronously)
*/
- std::tuple<int, std::vector<hal::OutputShape>, hal::Timing, bool> tryCompute(
+ std::tuple<int, std::vector<hal::OutputShape>, hal::Timing, bool> compute(
const hal::Request& request, hal::MeasureTiming measure,
const std::vector<intptr_t>& memoryIds);
diff --git a/nn/common/include/ExecutionBurstServer.h b/nn/common/include/ExecutionBurstServer.h
index 977d0d375..9da0dc742 100644
--- a/nn/common/include/ExecutionBurstServer.h
+++ b/nn/common/include/ExecutionBurstServer.h
@@ -17,23 +17,24 @@
#ifndef ANDROID_FRAMEWORKS_ML_NN_COMMON_EXECUTION_BURST_SERVER_H
#define ANDROID_FRAMEWORKS_ML_NN_COMMON_EXECUTION_BURST_SERVER_H
-#include "HalInterfaces.h"
-
#include <android-base/macros.h>
#include <fmq/MessageQueue.h>
#include <hidl/MQDescriptor.h>
#include <atomic>
+#include <chrono>
#include <memory>
#include <optional>
#include <thread>
+#include <tuple>
#include <vector>
+#include "HalInterfaces.h"
+
namespace android::nn {
-using hardware::MQDescriptorSync;
-using FmqRequestDescriptor = MQDescriptorSync<hal::FmqRequestDatum>;
-using FmqResultDescriptor = MQDescriptorSync<hal::FmqResultDatum>;
+using FmqRequestDescriptor = hardware::MQDescriptorSync<hal::FmqRequestDatum>;
+using FmqResultDescriptor = hardware::MQDescriptorSync<hal::FmqResultDatum>;
/**
* Function to serialize results.
@@ -69,7 +70,7 @@ std::optional<std::tuple<hal::Request, std::vector<int32_t>, hal::MeasureTiming>
*
* Because the receiver can wait on a packet that may never come (e.g., because
* the sending side of the packet has been closed), this object can be
- * invalidating, unblocking the receiver.
+ * invalidated, unblocking the receiver.
*/
class RequestChannelReceiver {
using FmqRequestChannel =
@@ -82,10 +83,15 @@ class RequestChannelReceiver {
* Prefer this call over the constructor.
*
* @param requestChannel Descriptor for the request channel.
+ * @param pollingTimeWindow How much time (in microseconds) the
+ * RequestChannelReceiver is allowed to poll the FMQ before waiting on
+ * the blocking futex. Polling may result in lower latencies at the
+ * potential cost of more power usage.
* @return RequestChannelReceiver on successful creation, nullptr otherwise.
*/
static std::unique_ptr<RequestChannelReceiver> create(
- const FmqRequestDescriptor& requestChannel);
+ const FmqRequestDescriptor& requestChannel,
+ std::chrono::microseconds pollingTimeWindow);
/**
* Get the request from the channel.
@@ -105,14 +111,15 @@ class RequestChannelReceiver {
*/
void invalidate();
- RequestChannelReceiver(std::unique_ptr<FmqRequestChannel> fmqRequestChannel, bool blocking);
+ RequestChannelReceiver(std::unique_ptr<FmqRequestChannel> fmqRequestChannel,
+ std::chrono::microseconds pollingTimeWindow);
private:
std::optional<std::vector<hal::FmqRequestDatum>> getPacketBlocking();
const std::unique_ptr<FmqRequestChannel> mFmqRequestChannel;
std::atomic<bool> mTeardown{false};
- const bool mBlocking;
+ const std::chrono::microseconds kPollingTimeWindow;
};
/**
@@ -149,11 +156,10 @@ class ResultChannelSender {
// prefer calling ResultChannelSender::send
bool sendPacket(const std::vector<hal::FmqResultDatum>& packet);
- ResultChannelSender(std::unique_ptr<FmqResultChannel> fmqResultChannel, bool blocking);
+ ResultChannelSender(std::unique_ptr<FmqResultChannel> fmqResultChannel);
private:
const std::unique_ptr<FmqResultChannel> mFmqResultChannel;
- const bool mBlocking;
};
/**
@@ -247,12 +253,17 @@ class ExecutionBurstServer : public hal::IBurstContext {
* the result of the execution.
* @param executorWithCache Object which maintains a local cache of the
* memory pools and executes using the cached memory pools.
+ * @param pollingTimeWindow How much time (in microseconds) the
+ * ExecutionBurstServer is allowed to poll the FMQ before waiting on
+ * the blocking futex. Polling may result in lower latencies at the
+ * potential cost of more power usage.
* @result IBurstContext Handle to the burst context.
*/
static sp<ExecutionBurstServer> create(
const sp<hal::IBurstCallback>& callback, const FmqRequestDescriptor& requestChannel,
const FmqResultDescriptor& resultChannel,
- std::shared_ptr<IBurstExecutorWithCache> executorWithCache);
+ std::shared_ptr<IBurstExecutorWithCache> executorWithCache,
+ std::chrono::microseconds pollingTimeWindow = std::chrono::microseconds{0});
/**
* Create automated context to manage FMQ-based executions.
@@ -271,12 +282,16 @@ class ExecutionBurstServer : public hal::IBurstContext {
* @param preparedModel PreparedModel that the burst object was created from.
* IPreparedModel::executeSynchronously will be used to perform the
* execution.
+ * @param pollingTimeWindow How much time (in microseconds) the
+ * ExecutionBurstServer is allowed to poll the FMQ before waiting on
+ * the blocking futex. Polling may result in lower latencies at the
+ * potential cost of more power usage.
* @result IBurstContext Handle to the burst context.
*/
- static sp<ExecutionBurstServer> create(const sp<hal::IBurstCallback>& callback,
- const FmqRequestDescriptor& requestChannel,
- const FmqResultDescriptor& resultChannel,
- hal::IPreparedModel* preparedModel);
+ static sp<ExecutionBurstServer> create(
+ const sp<hal::IBurstCallback>& callback, const FmqRequestDescriptor& requestChannel,
+ const FmqResultDescriptor& resultChannel, hal::IPreparedModel* preparedModel,
+ std::chrono::microseconds pollingTimeWindow = std::chrono::microseconds{0});
ExecutionBurstServer(const sp<hal::IBurstCallback>& callback,
std::unique_ptr<RequestChannelReceiver> requestChannel,
diff --git a/nn/driver/sample/SampleDriver.cpp b/nn/driver/sample/SampleDriver.cpp
index 0448c2d79..50cb7729a 100644
--- a/nn/driver/sample/SampleDriver.cpp
+++ b/nn/driver/sample/SampleDriver.cpp
@@ -19,6 +19,7 @@
#include "SampleDriver.h"
#include <android-base/logging.h>
+#include <android-base/properties.h>
#include <hidl/LegacySupport.h>
#include <algorithm>
@@ -185,9 +186,9 @@ Return<ErrorStatus> prepareModelBase(const T_Model& model, const SampleDriver* d
}
// asynchronously prepare the model from a new, detached thread
- std::thread([model, driver, callback] {
+ std::thread([model, driver, preference, callback] {
sp<SamplePreparedModel> preparedModel =
- new SamplePreparedModel(convertToV1_3(model), driver);
+ new SamplePreparedModel(convertToV1_3(model), driver, preference);
if (!preparedModel->initialize()) {
notify(callback, ErrorStatus::INVALID_ARGUMENT, nullptr);
return;
@@ -472,6 +473,22 @@ class BurstExecutorWithCache : public ExecutionBurstServer::IBurstExecutorWithCa
std::map<int32_t, std::optional<RunTimePoolInfo>> mMemoryCache; // cached requestPoolInfos
};
+// This is the amount of time the ExecutionBurstServer should spend polling the
+// FMQ to see if it has data available before it should fall back to waiting on
+// the futex.
+static std::chrono::microseconds getPollingTimeWindow() {
+ constexpr int32_t defaultPollingTimeWindow = 50;
+#ifdef NN_DEBUGGABLE
+ constexpr int32_t minPollingTimeWindow = 0;
+ const int32_t selectedPollingTimeWindow =
+ base::GetIntProperty("debug.nn.sample-driver-burst-polling-window",
+ defaultPollingTimeWindow, minPollingTimeWindow);
+ return std::chrono::microseconds{selectedPollingTimeWindow};
+#else
+ return std::chrono::microseconds{defaultPollingTimeWindow};
+#endif // NN_DEBUGGABLE
+}
+
Return<void> SamplePreparedModel::configureExecutionBurst(
const sp<V1_2::IBurstCallback>& callback,
const MQDescriptorSync<V1_2::FmqRequestDatum>& requestChannel,
@@ -480,17 +497,22 @@ Return<void> SamplePreparedModel::configureExecutionBurst(
NNTRACE_FULL(NNTRACE_LAYER_DRIVER, NNTRACE_PHASE_EXECUTION,
"SampleDriver::configureExecutionBurst");
+ const bool preferPowerOverLatency = (kPreference == hal::ExecutionPreference::LOW_POWER);
+ const auto pollingTimeWindow =
+ (preferPowerOverLatency ? std::chrono::microseconds{0} : getPollingTimeWindow());
+
// Alternatively, the burst could be configured via:
// const sp<V1_2::IBurstContext> burst =
// ExecutionBurstServer::create(callback, requestChannel,
- // resultChannel, this);
+ // resultChannel, this,
+ // pollingTimeWindow);
//
// However, this alternative representation does not include a memory map
// caching optimization, and adds overhead.
const std::shared_ptr<BurstExecutorWithCache> executorWithCache =
std::make_shared<BurstExecutorWithCache>(mModel, mDriver, mPoolInfos);
const sp<V1_2::IBurstContext> burst = ExecutionBurstServer::create(
- callback, requestChannel, resultChannel, executorWithCache);
+ callback, requestChannel, resultChannel, executorWithCache, pollingTimeWindow);
if (burst == nullptr) {
cb(ErrorStatus::GENERAL_FAILURE, {});
diff --git a/nn/driver/sample/SampleDriver.h b/nn/driver/sample/SampleDriver.h
index a85dcd5ea..8788ed3a8 100644
--- a/nn/driver/sample/SampleDriver.h
+++ b/nn/driver/sample/SampleDriver.h
@@ -91,8 +91,9 @@ class SampleDriver : public hal::IDevice {
class SamplePreparedModel : public hal::IPreparedModel {
public:
- SamplePreparedModel(const hal::Model& model, const SampleDriver* driver)
- : mModel(model), mDriver(driver) {}
+ SamplePreparedModel(const hal::Model& model, const SampleDriver* driver,
+ hal::ExecutionPreference preference)
+ : mModel(model), mDriver(driver), kPreference(preference) {}
~SamplePreparedModel() override {}
bool initialize();
hal::Return<hal::ErrorStatus> execute(
@@ -113,6 +114,7 @@ class SamplePreparedModel : public hal::IPreparedModel {
hal::Model mModel;
const SampleDriver* mDriver;
std::vector<RunTimePoolInfo> mPoolInfos;
+ const hal::ExecutionPreference kPreference;
};
} // namespace sample_driver
diff --git a/nn/runtime/CompilationBuilder.cpp b/nn/runtime/CompilationBuilder.cpp
index 912f0087b..be0260ca2 100644
--- a/nn/runtime/CompilationBuilder.cpp
+++ b/nn/runtime/CompilationBuilder.cpp
@@ -18,6 +18,11 @@
#include "CompilationBuilder.h"
+#include <algorithm>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
#include "BurstBuilder.h"
#include "ExecutionBuilder.h"
#include "ExecutionBurstController.h"
@@ -156,7 +161,8 @@ int CompilationBuilder::createBurst(BurstBuilder** burst) {
*burst = nullptr;
return ANEURALNETWORKS_BAD_STATE;
}
- std::vector<std::shared_ptr<ExecutionBurstController>> burstControllers = mPlan.makeBursts();
+ std::vector<std::shared_ptr<ExecutionBurstController>> burstControllers =
+ mPlan.makeBursts(mPreference);
*burst = new (std::nothrow) BurstBuilder(this, std::move(burstControllers));
return (*burst ? ANEURALNETWORKS_NO_ERROR : ANEURALNETWORKS_OUT_OF_MEMORY);
}
diff --git a/nn/runtime/ExecutionPlan.cpp b/nn/runtime/ExecutionPlan.cpp
index 5f656e1c2..901305216 100644
--- a/nn/runtime/ExecutionPlan.cpp
+++ b/nn/runtime/ExecutionPlan.cpp
@@ -557,7 +557,8 @@ ExecutionPlan::Controller::Controller(
// indicate the regular execution path should be used. This can occur either
// because PreparedModel was nullptr (cpu was best choice), or because the
// IPreparedModel was of insufficient version or failed to configure the burst.
-std::vector<std::shared_ptr<ExecutionBurstController>> ExecutionPlan::makeBursts() const {
+std::vector<std::shared_ptr<ExecutionBurstController>> ExecutionPlan::makeBursts(
+ int preference) const {
switch (mState) {
// burst object for each partition in the compound case
case COMPOUND: {
@@ -565,7 +566,10 @@ std::vector<std::shared_ptr<ExecutionBurstController>> ExecutionPlan::makeBursts
bursts.reserve(compound()->mSteps.size());
for (const auto& step : compound()->mSteps) {
if (const auto preparedModel = step->getPreparedSubModel()) {
- bursts.push_back(preparedModel->configureExecutionBurst(/*blocking=*/true));
+ const bool preferPowerOverLatency =
+ (preference == ANEURALNETWORKS_PREFER_LOW_POWER);
+ bursts.push_back(
+ preparedModel->configureExecutionBurst(preferPowerOverLatency));
} else {
bursts.push_back(nullptr);
}
@@ -577,7 +581,9 @@ std::vector<std::shared_ptr<ExecutionBurstController>> ExecutionPlan::makeBursts
std::vector<std::shared_ptr<ExecutionBurstController>> burst;
auto simpleBody = simple();
if (const auto preparedModel = simpleBody->mPreparedModel) {
- burst.push_back(preparedModel->configureExecutionBurst(/*blocking=*/true));
+ const bool preferPowerOverLatency =
+ (preference == ANEURALNETWORKS_PREFER_LOW_POWER);
+ burst.push_back(preparedModel->configureExecutionBurst(preferPowerOverLatency));
} else {
burst.push_back(nullptr);
}
diff --git a/nn/runtime/ExecutionPlan.h b/nn/runtime/ExecutionPlan.h
index cd3c01848..8fa7083c1 100644
--- a/nn/runtime/ExecutionPlan.h
+++ b/nn/runtime/ExecutionPlan.h
@@ -209,7 +209,7 @@ class ExecutionPlan {
size_t mNextStepIndex;
};
- std::vector<std::shared_ptr<ExecutionBurstController>> makeBursts() const;
+ std::vector<std::shared_ptr<ExecutionBurstController>> makeBursts(int preference) const;
std::shared_ptr<Controller> makeController(ExecutionBuilder* executionBuilder,
const BurstBuilder* burstBuilder) const;
diff --git a/nn/runtime/Manager.cpp b/nn/runtime/Manager.cpp
index 6a3882dc1..34378b3fd 100644
--- a/nn/runtime/Manager.cpp
+++ b/nn/runtime/Manager.cpp
@@ -524,10 +524,10 @@ std::tuple<int, std::vector<OutputShape>, Timing> DriverPreparedModel::execute(
memoryIds.push_back(memory->getKey());
}
- VLOG(EXECUTION) << "Before ExecutionBurstController->tryCompute() "
+ VLOG(EXECUTION) << "Before ExecutionBurstController->compute() "
<< SHOW_IF_DEBUG(toString(request));
std::tie(n, outputShapes, timing, burstFallback) =
- burstController->tryCompute(request, measure, memoryIds);
+ burstController->compute(request, measure, memoryIds);
}
// compute from IPreparedModel if either:
diff --git a/nn/runtime/VersionedInterfaces.cpp b/nn/runtime/VersionedInterfaces.cpp
index 325b75c0e..ba6e2af7c 100644
--- a/nn/runtime/VersionedInterfaces.cpp
+++ b/nn/runtime/VersionedInterfaces.cpp
@@ -19,9 +19,11 @@
#include "VersionedInterfaces.h"
#include <android-base/logging.h>
+#include <android-base/properties.h>
#include <android-base/scopeguard.h>
#include <android-base/thread_annotations.h>
+#include <chrono>
#include <functional>
#include <memory>
#include <string>
@@ -276,12 +278,30 @@ std::tuple<int, std::vector<OutputShape>, Timing> VersionedIPreparedModel::execu
return executeAsynchronously(request, measure);
}
+// This is the amount of time the ExecutionBurstController should spend polling
+// the FMQ to see if it has data available before it should fall back to
+// waiting on the futex.
+static std::chrono::microseconds getPollingTimeWindow() {
+ constexpr int32_t defaultPollingTimeWindow = 50;
+#ifdef NN_DEBUGGABLE
+ constexpr int32_t minPollingTimeWindow = 0;
+ const int32_t selectedPollingTimeWindow =
+ base::GetIntProperty("debug.nn.burst-conrtoller-polling-window",
+ defaultPollingTimeWindow, minPollingTimeWindow);
+ return std::chrono::microseconds{selectedPollingTimeWindow};
+#else
+ return std::chrono::microseconds{defaultPollingTimeWindow};
+#endif // NN_DEBUGGABLE
+}
+
std::shared_ptr<ExecutionBurstController> VersionedIPreparedModel::configureExecutionBurst(
- bool blocking) const {
+ bool preferPowerOverLatency) const {
if (mPreparedModelV1_2 == nullptr) {
return nullptr;
}
- return ExecutionBurstController::create(mPreparedModelV1_2, blocking);
+ const auto pollingTimeWindow =
+ (preferPowerOverLatency ? std::chrono::microseconds{0} : getPollingTimeWindow());
+ return ExecutionBurstController::create(mPreparedModelV1_2, pollingTimeWindow);
}
std::shared_ptr<VersionedIDevice> VersionedIDevice::create(std::string serviceName,
diff --git a/nn/runtime/VersionedInterfaces.h b/nn/runtime/VersionedInterfaces.h
index 8665745b6..87e776507 100644
--- a/nn/runtime/VersionedInterfaces.h
+++ b/nn/runtime/VersionedInterfaces.h
@@ -687,12 +687,16 @@ class VersionedIPreparedModel {
/**
* Creates a burst controller on a prepared model.
*
- * @param blocking 'true' if the FMQ should block until data is available.
+ * @param preferPowerOverLatency 'true' if the Burst object should run in a
+ * more power efficient mode, 'false' if more
+ * power can be used to possibly reduce
+ * burst compute latency.
* @return ExecutionBurstController Execution burst controller object.
* nullptr is returned if the burst cannot
* be configured for any reason.
*/
- std::shared_ptr<ExecutionBurstController> configureExecutionBurst(bool blocking) const;
+ std::shared_ptr<ExecutionBurstController> configureExecutionBurst(
+ bool preferPowerOverLatency) const;
private:
std::tuple<int, std::vector<hal::OutputShape>, hal::Timing> executeAsynchronously(
diff --git a/nn/runtime/test/TestIntrospectionControl.cpp b/nn/runtime/test/TestIntrospectionControl.cpp
index 84617d9b0..9d0cbe6c3 100644
--- a/nn/runtime/test/TestIntrospectionControl.cpp
+++ b/nn/runtime/test/TestIntrospectionControl.cpp
@@ -16,6 +16,7 @@
#include <gtest/gtest.h>
+#include <chrono>
#include <iterator>
#include <map>
#include <queue>
@@ -309,7 +310,8 @@ std::set<Success> expectedPassSet = {Success::PASS_NEITHER, Success::PASS_DEVICE
class TestPreparedModel12 : public SamplePreparedModel {
public:
TestPreparedModel12(const HidlModel& model, const SampleDriver* driver, Success success)
- : SamplePreparedModel(model, driver), mSuccess(success) {}
+ : SamplePreparedModel(model, driver, ExecutionPreference::FAST_SINGLE_ANSWER),
+ mSuccess(success) {}
Return<ErrorStatus> execute(const Request&,
const sp<V1_0::IExecutionCallback>& callback) override {
@@ -384,8 +386,8 @@ class TestPreparedModel12 : public SamplePreparedModel {
const MQDescriptorSync<V1_2::FmqRequestDatum>& requestChannel,
const MQDescriptorSync<V1_2::FmqResultDatum>& resultChannel,
configureExecutionBurst_cb cb) override {
- const sp<V1_2::IBurstContext> burst =
- ExecutionBurstServer::create(callback, requestChannel, resultChannel, this);
+ const sp<V1_2::IBurstContext> burst = ExecutionBurstServer::create(
+ callback, requestChannel, resultChannel, this, std::chrono::microseconds{0});
cb(burst == nullptr ? ErrorStatus::GENERAL_FAILURE : ErrorStatus::NONE, burst);
return Void();