diff options
author | Michael Butler <butlermichael@google.com> | 2019-10-28 15:25:58 -0700 |
---|---|---|
committer | android-build-merger <android-build-merger@google.com> | 2019-10-28 15:25:58 -0700 |
commit | bd86742ff4244cd721652c4b52ee5a43426cac82 (patch) | |
tree | a8e32e59a98b3cd2e4e290bdefdd0511ff2bd234 | |
parent | 8c28763a2a18faad99d0e7eed992d50dd9e09b02 (diff) | |
parent | 6f2aab7acc36c2b5b273b50d3c86c1ed5f0db3ed (diff) | |
download | ml-bd86742ff4244cd721652c4b52ee5a43426cac82.tar.gz |
Merge "Improve performance of Burst executions" am: 5faee8ea5a am: 87dd1a37c2
am: 6f2aab7acc
Change-Id: Ib78f32f1324896091dfdbf23f5f1b0bbd936c04a
-rw-r--r-- | nn/common/ExecutionBurstController.cpp | 125 | ||||
-rw-r--r-- | nn/common/ExecutionBurstServer.cpp | 137 | ||||
-rw-r--r-- | nn/common/include/ExecutionBurstController.h | 68 | ||||
-rw-r--r-- | nn/common/include/ExecutionBurstServer.h | 47 | ||||
-rw-r--r-- | nn/driver/sample/SampleDriver.cpp | 30 | ||||
-rw-r--r-- | nn/driver/sample/SampleDriver.h | 6 | ||||
-rw-r--r-- | nn/runtime/CompilationBuilder.cpp | 8 | ||||
-rw-r--r-- | nn/runtime/ExecutionPlan.cpp | 12 | ||||
-rw-r--r-- | nn/runtime/ExecutionPlan.h | 2 | ||||
-rw-r--r-- | nn/runtime/Manager.cpp | 4 | ||||
-rw-r--r-- | nn/runtime/VersionedInterfaces.cpp | 24 | ||||
-rw-r--r-- | nn/runtime/VersionedInterfaces.h | 8 | ||||
-rw-r--r-- | nn/runtime/test/TestIntrospectionControl.cpp | 8 |
13 files changed, 307 insertions, 172 deletions
diff --git a/nn/common/ExecutionBurstController.cpp b/nn/common/ExecutionBurstController.cpp index f3a771b11..4456ed10d 100644 --- a/nn/common/ExecutionBurstController.cpp +++ b/nn/common/ExecutionBurstController.cpp @@ -19,9 +19,16 @@ #include "ExecutionBurstController.h" #include <android-base/logging.h> + +#include <algorithm> #include <cstring> #include <limits> +#include <memory> #include <string> +#include <tuple> +#include <utility> +#include <vector> + #include "Tracing.h" #include "Utils.h" @@ -30,9 +37,8 @@ namespace { using namespace hal; -using hardware::MQDescriptorSync; -using FmqRequestDescriptor = MQDescriptorSync<FmqRequestDatum>; -using FmqResultDescriptor = MQDescriptorSync<FmqResultDatum>; +using FmqRequestDescriptor = hardware::MQDescriptorSync<FmqRequestDatum>; +using FmqResultDescriptor = hardware::MQDescriptorSync<FmqResultDatum>; constexpr Timing kNoTiming = {std::numeric_limits<uint64_t>::max(), std::numeric_limits<uint64_t>::max()}; @@ -221,22 +227,23 @@ std::optional<std::tuple<ErrorStatus, std::vector<OutputShape>, Timing>> deseria } std::pair<std::unique_ptr<ResultChannelReceiver>, const FmqResultDescriptor*> -ResultChannelReceiver::create(size_t channelLength, bool blocking) { +ResultChannelReceiver::create(size_t channelLength, std::chrono::microseconds pollingTimeWindow) { std::unique_ptr<FmqResultChannel> fmqResultChannel = - std::make_unique<FmqResultChannel>(channelLength, /*confEventFlag=*/blocking); + std::make_unique<FmqResultChannel>(channelLength, /*confEventFlag=*/true); if (!fmqResultChannel->isValid()) { LOG(ERROR) << "Unable to create ResultChannelReceiver"; return {nullptr, nullptr}; } + const FmqResultDescriptor* descriptor = fmqResultChannel->getDesc(); return std::make_pair( - std::make_unique<ResultChannelReceiver>(std::move(fmqResultChannel), blocking), + std::make_unique<ResultChannelReceiver>(std::move(fmqResultChannel), pollingTimeWindow), descriptor); } ResultChannelReceiver::ResultChannelReceiver(std::unique_ptr<FmqResultChannel> fmqResultChannel, - bool blocking) - : mFmqResultChannel(std::move(fmqResultChannel)), mBlocking(blocking) {} + std::chrono::microseconds pollingTimeWindow) + : mFmqResultChannel(std::move(fmqResultChannel)), kPollingTimeWindow(pollingTimeWindow) {} std::optional<std::tuple<ErrorStatus, std::vector<OutputShape>, Timing>> ResultChannelReceiver::getBlocking() { @@ -254,16 +261,14 @@ void ResultChannelReceiver::invalidate() { // force unblock // ExecutionBurstController waits on a result packet after sending a // request. If the driver containing ExecutionBurstServer crashes, the - // controller will still be waiting on the futex (assuming mBlocking is - // true). This force unblock wakes up any thread waiting on the futex. - if (mBlocking) { - // TODO: look for a different/better way to signal/notify the futex to - // wake up any thread waiting on it - FmqResultDatum datum; - datum.packetInformation({/*.packetSize=*/0, /*.errorStatus=*/ErrorStatus::GENERAL_FAILURE, - /*.numberOfOperands=*/0}); - mFmqResultChannel->writeBlocking(&datum, 1); - } + // controller may be waiting on the futex. This force unblock wakes up any + // thread waiting on the futex. + // TODO: look for a different/better way to signal/notify the futex to + // wake up any thread waiting on it + FmqResultDatum datum; + datum.packetInformation({/*.packetSize=*/0, /*.errorStatus=*/ErrorStatus::GENERAL_FAILURE, + /*.numberOfOperands=*/0}); + mFmqResultChannel->writeBlocking(&datum, 1); } std::optional<std::vector<FmqResultDatum>> ResultChannelReceiver::getPacketBlocking() { @@ -273,17 +278,42 @@ std::optional<std::vector<FmqResultDatum>> ResultChannelReceiver::getPacketBlock return std::nullopt; } - // wait for result packet and read first element of result packet - FmqResultDatum datum; - bool success = true; - if (mBlocking) { - success = mFmqResultChannel->readBlocking(&datum, 1); - } else { - while ((success = mValid.load(std::memory_order_relaxed)) && - !mFmqResultChannel->read(&datum, 1)) { + // First spend time polling if results are available in FMQ instead of + // waiting on the futex. Polling is more responsive (yielding lower + // latencies), but can take up more power, so only poll for a limited period + // of time. + + auto& getCurrentTime = std::chrono::high_resolution_clock::now; + const auto timeToStopPolling = getCurrentTime() + kPollingTimeWindow; + + while (getCurrentTime() < timeToStopPolling) { + // if class is being torn down, immediately return + if (!mValid.load(std::memory_order_relaxed)) { + return std::nullopt; + } + + // Check if data is available. If it is, immediately retrieve it and + // return. + const size_t available = mFmqResultChannel->availableToRead(); + if (available > 0) { + std::vector<FmqResultDatum> packet(available); + const bool success = mFmqResultChannel->read(packet.data(), available); + if (!success) { + LOG(ERROR) << "Error receiving packet"; + return std::nullopt; + } + return std::make_optional(std::move(packet)); } } + // If we get to this point, we either stopped polling because it was taking + // too long or polling was not allowed. Instead, perform a blocking call + // which uses a futex to save power. + + // wait for result packet and read first element of result packet + FmqResultDatum datum; + bool success = mFmqResultChannel->readBlocking(&datum, 1); + // retrieve remaining elements // NOTE: all of the data is already available at this point, so there's no // need to do a blocking wait to wait for more data. This is known because @@ -310,22 +340,21 @@ std::optional<std::vector<FmqResultDatum>> ResultChannelReceiver::getPacketBlock } std::pair<std::unique_ptr<RequestChannelSender>, const FmqRequestDescriptor*> -RequestChannelSender::create(size_t channelLength, bool blocking) { +RequestChannelSender::create(size_t channelLength) { std::unique_ptr<FmqRequestChannel> fmqRequestChannel = - std::make_unique<FmqRequestChannel>(channelLength, /*confEventFlag=*/blocking); + std::make_unique<FmqRequestChannel>(channelLength, /*confEventFlag=*/true); if (!fmqRequestChannel->isValid()) { LOG(ERROR) << "Unable to create RequestChannelSender"; return {nullptr, nullptr}; } + const FmqRequestDescriptor* descriptor = fmqRequestChannel->getDesc(); - return std::make_pair( - std::make_unique<RequestChannelSender>(std::move(fmqRequestChannel), blocking), - descriptor); + return std::make_pair(std::make_unique<RequestChannelSender>(std::move(fmqRequestChannel)), + descriptor); } -RequestChannelSender::RequestChannelSender(std::unique_ptr<FmqRequestChannel> fmqRequestChannel, - bool blocking) - : mFmqRequestChannel(std::move(fmqRequestChannel)), mBlocking(blocking) {} +RequestChannelSender::RequestChannelSender(std::unique_ptr<FmqRequestChannel> fmqRequestChannel) + : mFmqRequestChannel(std::move(fmqRequestChannel)) {} bool RequestChannelSender::send(const Request& request, MeasureTiming measure, const std::vector<int32_t>& slots) { @@ -344,11 +373,9 @@ bool RequestChannelSender::sendPacket(const std::vector<FmqRequestDatum>& packet return false; } - if (mBlocking) { - return mFmqRequestChannel->writeBlocking(packet.data(), packet.size()); - } else { - return mFmqRequestChannel->write(packet.data(), packet.size()); - } + // Always send the packet with "blocking" because this signals the futex and + // unblocks the consumer if it is waiting on the futex. + return mFmqRequestChannel->writeBlocking(packet.data(), packet.size()); } void RequestChannelSender::invalidate() { @@ -438,7 +465,7 @@ int32_t ExecutionBurstController::ExecutionBurstCallback::allocateSlotLocked() { } std::unique_ptr<ExecutionBurstController> ExecutionBurstController::create( - const sp<IPreparedModel>& preparedModel, bool blocking) { + const sp<IPreparedModel>& preparedModel, std::chrono::microseconds pollingTimeWindow) { // check inputs if (preparedModel == nullptr) { LOG(ERROR) << "ExecutionBurstController::create passed a nullptr"; @@ -450,9 +477,9 @@ std::unique_ptr<ExecutionBurstController> ExecutionBurstController::create( // create FMQ objects auto [requestChannelSenderTemp, requestChannelDescriptor] = - RequestChannelSender::create(kExecutionBurstChannelLength, blocking); + RequestChannelSender::create(kExecutionBurstChannelLength); auto [resultChannelReceiverTemp, resultChannelDescriptor] = - ResultChannelReceiver::create(kExecutionBurstChannelLength, blocking); + ResultChannelReceiver::create(kExecutionBurstChannelLength, pollingTimeWindow); std::shared_ptr<RequestChannelSender> requestChannelSender = std::move(requestChannelSenderTemp); std::shared_ptr<ResultChannelReceiver> resultChannelReceiver = @@ -543,15 +570,13 @@ static std::tuple<int, std::vector<OutputShape>, Timing, bool> getExecutionResul return {n, std::move(checkedOutputShapes), checkedTiming, fallback}; } -std::tuple<ErrorStatus, std::vector<OutputShape>, Timing> ExecutionBurstController::compute( - const Request& request, MeasureTiming measure, const std::vector<intptr_t>& memoryIds) { - auto [status, outputShapes, timing, fallback] = tryCompute(request, measure, memoryIds); - (void)fallback; // ignore fallback field - return {convertResultCodeToErrorStatus(status), std::move(outputShapes), timing}; -} - -std::tuple<int, std::vector<OutputShape>, Timing, bool> ExecutionBurstController::tryCompute( +std::tuple<int, std::vector<OutputShape>, Timing, bool> ExecutionBurstController::compute( const Request& request, MeasureTiming measure, const std::vector<intptr_t>& memoryIds) { + // This is the first point when we know an execution is occurring, so begin + // to collect systraces. Note that the first point we can begin collecting + // systraces in ExecutionBurstServer is when the RequestChannelReceiver + // realizes there is data in the FMQ, so ExecutionBurstServer collects + // systraces at different points in the code. NNTRACE_FULL(NNTRACE_LAYER_IPC, NNTRACE_PHASE_EXECUTION, "ExecutionBurstController::compute"); std::lock_guard<std::mutex> guard(mMutex); diff --git a/nn/common/ExecutionBurstServer.cpp b/nn/common/ExecutionBurstServer.cpp index 74bc34058..ec935dad6 100644 --- a/nn/common/ExecutionBurstServer.cpp +++ b/nn/common/ExecutionBurstServer.cpp @@ -20,9 +20,14 @@ #include <android-base/logging.h> +#include <algorithm> #include <cstring> #include <limits> #include <map> +#include <memory> +#include <tuple> +#include <utility> +#include <vector> #include "Tracing.h" @@ -31,6 +36,8 @@ namespace { using namespace hal; +using hardware::MQDescriptorSync; + constexpr Timing kNoTiming = {std::numeric_limits<uint64_t>::max(), std::numeric_limits<uint64_t>::max()}; @@ -298,20 +305,27 @@ std::optional<std::tuple<Request, std::vector<int32_t>, MeasureTiming>> deserial // RequestChannelReceiver methods std::unique_ptr<RequestChannelReceiver> RequestChannelReceiver::create( - const FmqRequestDescriptor& requestChannel) { + const FmqRequestDescriptor& requestChannel, std::chrono::microseconds pollingTimeWindow) { std::unique_ptr<FmqRequestChannel> fmqRequestChannel = std::make_unique<FmqRequestChannel>(requestChannel); + if (!fmqRequestChannel->isValid()) { LOG(ERROR) << "Unable to create RequestChannelReceiver"; return nullptr; } - const bool blocking = fmqRequestChannel->getEventFlagWord() != nullptr; - return std::make_unique<RequestChannelReceiver>(std::move(fmqRequestChannel), blocking); + if (fmqRequestChannel->getEventFlagWord() == nullptr) { + LOG(ERROR) + << "RequestChannelReceiver::create was passed an MQDescriptor without an EventFlag"; + return nullptr; + } + + return std::make_unique<RequestChannelReceiver>(std::move(fmqRequestChannel), + pollingTimeWindow); } RequestChannelReceiver::RequestChannelReceiver(std::unique_ptr<FmqRequestChannel> fmqRequestChannel, - bool blocking) - : mFmqRequestChannel(std::move(fmqRequestChannel)), mBlocking(blocking) {} + std::chrono::microseconds pollingTimeWindow) + : mFmqRequestChannel(std::move(fmqRequestChannel)), kPollingTimeWindow(pollingTimeWindow) {} std::optional<std::tuple<Request, std::vector<int32_t>, MeasureTiming>> RequestChannelReceiver::getBlocking() { @@ -328,17 +342,15 @@ void RequestChannelReceiver::invalidate() { // force unblock // ExecutionBurstServer is by default waiting on a request packet. If the - // client process destroys its burst object, the server will still be - // waiting on the futex (assuming mBlocking is true). This force unblock - // wakes up any thread waiting on the futex. - if (mBlocking) { - // TODO: look for a different/better way to signal/notify the futex to - // wake up any thread waiting on it - FmqRequestDatum datum; - datum.packetInformation({/*.packetSize=*/0, /*.numberOfInputOperands=*/0, - /*.numberOfOutputOperands=*/0, /*.numberOfPools=*/0}); - mFmqRequestChannel->writeBlocking(&datum, 1); - } + // client process destroys its burst object, the server may still be waiting + // on the futex. This force unblock wakes up any thread waiting on the + // futex. + // TODO: look for a different/better way to signal/notify the futex to wake + // up any thread waiting on it + FmqRequestDatum datum; + datum.packetInformation({/*.packetSize=*/0, /*.numberOfInputOperands=*/0, + /*.numberOfOutputOperands=*/0, /*.numberOfPools=*/0}); + mFmqRequestChannel->writeBlocking(&datum, 1); } std::optional<std::vector<FmqRequestDatum>> RequestChannelReceiver::getPacketBlocking() { @@ -348,17 +360,53 @@ std::optional<std::vector<FmqRequestDatum>> RequestChannelReceiver::getPacketBlo return std::nullopt; } - // wait for request packet and read first element of request packet - FmqRequestDatum datum; - bool success = false; - if (mBlocking) { - success = mFmqRequestChannel->readBlocking(&datum, 1); - } else { - while ((success = !mTeardown.load(std::memory_order_relaxed)) && - !mFmqRequestChannel->read(&datum, 1)) { + // First spend time polling if results are available in FMQ instead of + // waiting on the futex. Polling is more responsive (yielding lower + // latencies), but can take up more power, so only poll for a limited period + // of time. + + auto& getCurrentTime = std::chrono::high_resolution_clock::now; + const auto timeToStopPolling = getCurrentTime() + kPollingTimeWindow; + + while (getCurrentTime() < timeToStopPolling) { + // if class is being torn down, immediately return + if (mTeardown.load(std::memory_order_relaxed)) { + return std::nullopt; + } + + // Check if data is available. If it is, immediately retrieve it and + // return. + const size_t available = mFmqRequestChannel->availableToRead(); + if (available > 0) { + // This is the first point when we know an execution is occurring, + // so begin to collect systraces. Note that a similar systrace does + // not exist at the corresponding point in + // ResultChannelReceiver::getPacketBlocking because the execution is + // already in flight. + NNTRACE_FULL(NNTRACE_LAYER_IPC, NNTRACE_PHASE_EXECUTION, + "ExecutionBurstServer getting packet"); + std::vector<FmqRequestDatum> packet(available); + const bool success = mFmqRequestChannel->read(packet.data(), available); + if (!success) { + LOG(ERROR) << "Error receiving packet"; + return std::nullopt; + } + return std::make_optional(std::move(packet)); } } + // If we get to this point, we either stopped polling because it was taking + // too long or polling was not allowed. Instead, perform a blocking call + // which uses a futex to save power. + + // wait for request packet and read first element of request packet + FmqRequestDatum datum; + bool success = mFmqRequestChannel->readBlocking(&datum, 1); + + // This is the first point when we know an execution is occurring, so begin + // to collect systraces. Note that a similar systrace does not exist at the + // corresponding point in ResultChannelReceiver::getPacketBlocking because + // the execution is already in flight. NNTRACE_FULL(NNTRACE_LAYER_IPC, NNTRACE_PHASE_EXECUTION, "ExecutionBurstServer getting packet"); // retrieve remaining elements @@ -393,17 +441,21 @@ std::unique_ptr<ResultChannelSender> ResultChannelSender::create( const FmqResultDescriptor& resultChannel) { std::unique_ptr<FmqResultChannel> fmqResultChannel = std::make_unique<FmqResultChannel>(resultChannel); + if (!fmqResultChannel->isValid()) { LOG(ERROR) << "Unable to create RequestChannelSender"; return nullptr; } - const bool blocking = fmqResultChannel->getEventFlagWord() != nullptr; - return std::make_unique<ResultChannelSender>(std::move(fmqResultChannel), blocking); + if (fmqResultChannel->getEventFlagWord() == nullptr) { + LOG(ERROR) << "ResultChannelSender::create was passed an MQDescriptor without an EventFlag"; + return nullptr; + } + + return std::make_unique<ResultChannelSender>(std::move(fmqResultChannel)); } -ResultChannelSender::ResultChannelSender(std::unique_ptr<FmqResultChannel> fmqResultChannel, - bool blocking) - : mFmqResultChannel(std::move(fmqResultChannel)), mBlocking(blocking) {} +ResultChannelSender::ResultChannelSender(std::unique_ptr<FmqResultChannel> fmqResultChannel) + : mFmqResultChannel(std::move(fmqResultChannel)) {} bool ResultChannelSender::send(ErrorStatus errorStatus, const std::vector<OutputShape>& outputShapes, Timing timing) { @@ -417,18 +469,15 @@ bool ResultChannelSender::sendPacket(const std::vector<FmqResultDatum>& packet) << "ResultChannelSender::sendPacket -- packet size exceeds size available in FMQ"; const std::vector<FmqResultDatum> errorPacket = serialize(ErrorStatus::GENERAL_FAILURE, {}, kNoTiming); - if (mBlocking) { - return mFmqResultChannel->writeBlocking(errorPacket.data(), errorPacket.size()); - } else { - return mFmqResultChannel->write(errorPacket.data(), errorPacket.size()); - } - } - if (mBlocking) { - return mFmqResultChannel->writeBlocking(packet.data(), packet.size()); - } else { - return mFmqResultChannel->write(packet.data(), packet.size()); + // Always send the packet with "blocking" because this signals the futex + // and unblocks the consumer if it is waiting on the futex. + return mFmqResultChannel->writeBlocking(errorPacket.data(), errorPacket.size()); } + + // Always send the packet with "blocking" because this signals the futex and + // unblocks the consumer if it is waiting on the futex. + return mFmqResultChannel->writeBlocking(packet.data(), packet.size()); } // ExecutionBurstServer methods @@ -436,7 +485,8 @@ bool ResultChannelSender::sendPacket(const std::vector<FmqResultDatum>& packet) sp<ExecutionBurstServer> ExecutionBurstServer::create( const sp<IBurstCallback>& callback, const MQDescriptorSync<FmqRequestDatum>& requestChannel, const MQDescriptorSync<FmqResultDatum>& resultChannel, - std::shared_ptr<IBurstExecutorWithCache> executorWithCache) { + std::shared_ptr<IBurstExecutorWithCache> executorWithCache, + std::chrono::microseconds pollingTimeWindow) { // check inputs if (callback == nullptr || executorWithCache == nullptr) { LOG(ERROR) << "ExecutionBurstServer::create passed a nullptr"; @@ -445,7 +495,7 @@ sp<ExecutionBurstServer> ExecutionBurstServer::create( // create FMQ objects std::unique_ptr<RequestChannelReceiver> requestChannelReceiver = - RequestChannelReceiver::create(requestChannel); + RequestChannelReceiver::create(requestChannel, pollingTimeWindow); std::unique_ptr<ResultChannelSender> resultChannelSender = ResultChannelSender::create(resultChannel); @@ -462,7 +512,8 @@ sp<ExecutionBurstServer> ExecutionBurstServer::create( sp<ExecutionBurstServer> ExecutionBurstServer::create( const sp<IBurstCallback>& callback, const MQDescriptorSync<FmqRequestDatum>& requestChannel, - const MQDescriptorSync<FmqResultDatum>& resultChannel, IPreparedModel* preparedModel) { + const MQDescriptorSync<FmqResultDatum>& resultChannel, IPreparedModel* preparedModel, + std::chrono::microseconds pollingTimeWindow) { // check relevant input if (preparedModel == nullptr) { LOG(ERROR) << "ExecutionBurstServer::create passed a nullptr"; @@ -475,7 +526,7 @@ sp<ExecutionBurstServer> ExecutionBurstServer::create( // make and return context return ExecutionBurstServer::create(callback, requestChannel, resultChannel, - preparedModelAdapter); + preparedModelAdapter, pollingTimeWindow); } ExecutionBurstServer::ExecutionBurstServer( diff --git a/nn/common/include/ExecutionBurstController.h b/nn/common/include/ExecutionBurstController.h index 6328096b0..652b0d911 100644 --- a/nn/common/include/ExecutionBurstController.h +++ b/nn/common/include/ExecutionBurstController.h @@ -17,18 +17,21 @@ #ifndef ANDROID_FRAMEWORKS_ML_NN_COMMON_EXECUTION_BURST_CONTROLLER_H #define ANDROID_FRAMEWORKS_ML_NN_COMMON_EXECUTION_BURST_CONTROLLER_H -#include "HalInterfaces.h" - #include <android-base/macros.h> #include <fmq/MessageQueue.h> #include <hidl/MQDescriptor.h> #include <atomic> +#include <chrono> #include <map> #include <memory> #include <mutex> #include <stack> #include <tuple> +#include <utility> +#include <vector> + +#include "HalInterfaces.h" namespace android::nn { @@ -70,10 +73,10 @@ std::optional<std::tuple<hal::ErrorStatus, std::vector<hal::OutputShape>, hal::T * * Because the receiver can wait on a packet that may never come (e.g., because * the sending side of the packet has been closed), this object can be - * invalidating, unblocking the receiver. + * invalidated, unblocking the receiver. */ class ResultChannelReceiver { - using FmqResultDescriptor = ::android::hardware::MQDescriptorSync<hal::FmqResultDatum>; + using FmqResultDescriptor = hardware::MQDescriptorSync<hal::FmqResultDatum>; using FmqResultChannel = hardware::MessageQueue<hal::FmqResultDatum, hardware::kSynchronizedReadWrite>; @@ -84,13 +87,15 @@ class ResultChannelReceiver { * Prefer this call over the constructor. * * @param channelLength Number of elements in the FMQ. - * @param blocking 'true' if FMQ should use futex, 'false' if it should - * spin-wait. + * @param pollingTimeWindow How much time (in microseconds) the + * ResultChannelReceiver is allowed to poll the FMQ before waiting on + * the blocking futex. Polling may result in lower latencies at the + * potential cost of more power usage. * @return A pair of ResultChannelReceiver and the FMQ descriptor on * successful creation, both nullptr otherwise. */ static std::pair<std::unique_ptr<ResultChannelReceiver>, const FmqResultDescriptor*> create( - size_t channelLength, bool blocking); + size_t channelLength, std::chrono::microseconds pollingTimeWindow); /** * Get the result from the channel. @@ -114,12 +119,13 @@ class ResultChannelReceiver { // prefer calling ResultChannelReceiver::getBlocking std::optional<std::vector<hal::FmqResultDatum>> getPacketBlocking(); - ResultChannelReceiver(std::unique_ptr<FmqResultChannel> fmqResultChannel, bool blocking); + ResultChannelReceiver(std::unique_ptr<FmqResultChannel> fmqResultChannel, + std::chrono::microseconds pollingTimeWindow); private: const std::unique_ptr<FmqResultChannel> mFmqResultChannel; std::atomic<bool> mValid{true}; - const bool mBlocking; + const std::chrono::microseconds kPollingTimeWindow; }; /** @@ -128,7 +134,7 @@ class ResultChannelReceiver { * available. */ class RequestChannelSender { - using FmqRequestDescriptor = ::android::hardware::MQDescriptorSync<hal::FmqRequestDatum>; + using FmqRequestDescriptor = hardware::MQDescriptorSync<hal::FmqRequestDatum>; using FmqRequestChannel = hardware::MessageQueue<hal::FmqRequestDatum, hardware::kSynchronizedReadWrite>; @@ -139,13 +145,11 @@ class RequestChannelSender { * Prefer this call over the constructor. * * @param channelLength Number of elements in the FMQ. - * @param blocking 'true' if FMQ should use futex, 'false' if it should - * spin-wait. * @return A pair of ResultChannelReceiver and the FMQ descriptor on * successful creation, both nullptr otherwise. */ static std::pair<std::unique_ptr<RequestChannelSender>, const FmqRequestDescriptor*> create( - size_t channelLength, bool blocking); + size_t channelLength); /** * Send the request to the channel. @@ -169,12 +173,11 @@ class RequestChannelSender { // prefer calling RequestChannelSender::send bool sendPacket(const std::vector<hal::FmqRequestDatum>& packet); - RequestChannelSender(std::unique_ptr<FmqRequestChannel> fmqRequestChannel, bool blocking); + RequestChannelSender(std::unique_ptr<FmqRequestChannel> fmqRequestChannel); private: const std::unique_ptr<FmqRequestChannel> mFmqRequestChannel; std::atomic<bool> mValid{true}; - const bool mBlocking; }; /** @@ -260,15 +263,15 @@ class ExecutionBurstController { * Prefer this over ExecutionBurstController's constructor. * * @param preparedModel Model prepared for execution to execute on. - * @param blocking 'true' if the FMQ should use a futex to perform blocking - * until data is available in a less responsive, but more energy - * efficient manner. 'false' if the FMQ should use spin-looping to - * wait until data is available in a more responsive, but less energy - * efficient manner. + * @param pollingTimeWindow How much time (in microseconds) the + * ExecutionBurstController is allowed to poll the FMQ before waiting on + * the blocking futex. Polling may result in lower latencies at the + * potential cost of more power usage. * @return ExecutionBurstController Execution burst controller object. */ static std::unique_ptr<ExecutionBurstController> create( - const sp<hal::IPreparedModel>& preparedModel, bool blocking); + const sp<hal::IPreparedModel>& preparedModel, + std::chrono::microseconds pollingTimeWindow); // prefer calling ExecutionBurstController::create ExecutionBurstController(const std::shared_ptr<RequestChannelSender>& requestChannelSender, @@ -288,34 +291,13 @@ class ExecutionBurstController { * @param memoryIds Identifiers corresponding to each memory object in the * request's pools. * @return A tuple of: - * - status of the execution - * - dynamic output shapes from the execution - * - any execution time measurements of the execution - */ - std::tuple<hal::ErrorStatus, std::vector<hal::OutputShape>, hal::Timing> compute( - const hal::Request& request, hal::MeasureTiming measure, - const std::vector<intptr_t>& memoryIds); - - // TODO: combine "compute" and "tryCompute" back into a single function. - // "tryCompute" was created later to return the "fallback" boolean. This - // could not be done directly in "compute" because the VTS test cases (which - // test burst using "compute") had already been locked down and could not be - // changed. - /** - * Execute a request on a model. - * - * @param request Arguments to be executed on a model. - * @param measure Whether to collect timing measurements, either YES or NO - * @param memoryIds Identifiers corresponding to each memory object in the - * request's pools. - * @return A tuple of: * - result code of the execution * - dynamic output shapes from the execution * - any execution time measurements of the execution * - whether or not a failed burst execution should be re-run using a * different path (e.g., IPreparedModel::executeSynchronously) */ - std::tuple<int, std::vector<hal::OutputShape>, hal::Timing, bool> tryCompute( + std::tuple<int, std::vector<hal::OutputShape>, hal::Timing, bool> compute( const hal::Request& request, hal::MeasureTiming measure, const std::vector<intptr_t>& memoryIds); diff --git a/nn/common/include/ExecutionBurstServer.h b/nn/common/include/ExecutionBurstServer.h index 977d0d375..9da0dc742 100644 --- a/nn/common/include/ExecutionBurstServer.h +++ b/nn/common/include/ExecutionBurstServer.h @@ -17,23 +17,24 @@ #ifndef ANDROID_FRAMEWORKS_ML_NN_COMMON_EXECUTION_BURST_SERVER_H #define ANDROID_FRAMEWORKS_ML_NN_COMMON_EXECUTION_BURST_SERVER_H -#include "HalInterfaces.h" - #include <android-base/macros.h> #include <fmq/MessageQueue.h> #include <hidl/MQDescriptor.h> #include <atomic> +#include <chrono> #include <memory> #include <optional> #include <thread> +#include <tuple> #include <vector> +#include "HalInterfaces.h" + namespace android::nn { -using hardware::MQDescriptorSync; -using FmqRequestDescriptor = MQDescriptorSync<hal::FmqRequestDatum>; -using FmqResultDescriptor = MQDescriptorSync<hal::FmqResultDatum>; +using FmqRequestDescriptor = hardware::MQDescriptorSync<hal::FmqRequestDatum>; +using FmqResultDescriptor = hardware::MQDescriptorSync<hal::FmqResultDatum>; /** * Function to serialize results. @@ -69,7 +70,7 @@ std::optional<std::tuple<hal::Request, std::vector<int32_t>, hal::MeasureTiming> * * Because the receiver can wait on a packet that may never come (e.g., because * the sending side of the packet has been closed), this object can be - * invalidating, unblocking the receiver. + * invalidated, unblocking the receiver. */ class RequestChannelReceiver { using FmqRequestChannel = @@ -82,10 +83,15 @@ class RequestChannelReceiver { * Prefer this call over the constructor. * * @param requestChannel Descriptor for the request channel. + * @param pollingTimeWindow How much time (in microseconds) the + * RequestChannelReceiver is allowed to poll the FMQ before waiting on + * the blocking futex. Polling may result in lower latencies at the + * potential cost of more power usage. * @return RequestChannelReceiver on successful creation, nullptr otherwise. */ static std::unique_ptr<RequestChannelReceiver> create( - const FmqRequestDescriptor& requestChannel); + const FmqRequestDescriptor& requestChannel, + std::chrono::microseconds pollingTimeWindow); /** * Get the request from the channel. @@ -105,14 +111,15 @@ class RequestChannelReceiver { */ void invalidate(); - RequestChannelReceiver(std::unique_ptr<FmqRequestChannel> fmqRequestChannel, bool blocking); + RequestChannelReceiver(std::unique_ptr<FmqRequestChannel> fmqRequestChannel, + std::chrono::microseconds pollingTimeWindow); private: std::optional<std::vector<hal::FmqRequestDatum>> getPacketBlocking(); const std::unique_ptr<FmqRequestChannel> mFmqRequestChannel; std::atomic<bool> mTeardown{false}; - const bool mBlocking; + const std::chrono::microseconds kPollingTimeWindow; }; /** @@ -149,11 +156,10 @@ class ResultChannelSender { // prefer calling ResultChannelSender::send bool sendPacket(const std::vector<hal::FmqResultDatum>& packet); - ResultChannelSender(std::unique_ptr<FmqResultChannel> fmqResultChannel, bool blocking); + ResultChannelSender(std::unique_ptr<FmqResultChannel> fmqResultChannel); private: const std::unique_ptr<FmqResultChannel> mFmqResultChannel; - const bool mBlocking; }; /** @@ -247,12 +253,17 @@ class ExecutionBurstServer : public hal::IBurstContext { * the result of the execution. * @param executorWithCache Object which maintains a local cache of the * memory pools and executes using the cached memory pools. + * @param pollingTimeWindow How much time (in microseconds) the + * ExecutionBurstServer is allowed to poll the FMQ before waiting on + * the blocking futex. Polling may result in lower latencies at the + * potential cost of more power usage. * @result IBurstContext Handle to the burst context. */ static sp<ExecutionBurstServer> create( const sp<hal::IBurstCallback>& callback, const FmqRequestDescriptor& requestChannel, const FmqResultDescriptor& resultChannel, - std::shared_ptr<IBurstExecutorWithCache> executorWithCache); + std::shared_ptr<IBurstExecutorWithCache> executorWithCache, + std::chrono::microseconds pollingTimeWindow = std::chrono::microseconds{0}); /** * Create automated context to manage FMQ-based executions. @@ -271,12 +282,16 @@ class ExecutionBurstServer : public hal::IBurstContext { * @param preparedModel PreparedModel that the burst object was created from. * IPreparedModel::executeSynchronously will be used to perform the * execution. + * @param pollingTimeWindow How much time (in microseconds) the + * ExecutionBurstServer is allowed to poll the FMQ before waiting on + * the blocking futex. Polling may result in lower latencies at the + * potential cost of more power usage. * @result IBurstContext Handle to the burst context. */ - static sp<ExecutionBurstServer> create(const sp<hal::IBurstCallback>& callback, - const FmqRequestDescriptor& requestChannel, - const FmqResultDescriptor& resultChannel, - hal::IPreparedModel* preparedModel); + static sp<ExecutionBurstServer> create( + const sp<hal::IBurstCallback>& callback, const FmqRequestDescriptor& requestChannel, + const FmqResultDescriptor& resultChannel, hal::IPreparedModel* preparedModel, + std::chrono::microseconds pollingTimeWindow = std::chrono::microseconds{0}); ExecutionBurstServer(const sp<hal::IBurstCallback>& callback, std::unique_ptr<RequestChannelReceiver> requestChannel, diff --git a/nn/driver/sample/SampleDriver.cpp b/nn/driver/sample/SampleDriver.cpp index 0448c2d79..50cb7729a 100644 --- a/nn/driver/sample/SampleDriver.cpp +++ b/nn/driver/sample/SampleDriver.cpp @@ -19,6 +19,7 @@ #include "SampleDriver.h" #include <android-base/logging.h> +#include <android-base/properties.h> #include <hidl/LegacySupport.h> #include <algorithm> @@ -185,9 +186,9 @@ Return<ErrorStatus> prepareModelBase(const T_Model& model, const SampleDriver* d } // asynchronously prepare the model from a new, detached thread - std::thread([model, driver, callback] { + std::thread([model, driver, preference, callback] { sp<SamplePreparedModel> preparedModel = - new SamplePreparedModel(convertToV1_3(model), driver); + new SamplePreparedModel(convertToV1_3(model), driver, preference); if (!preparedModel->initialize()) { notify(callback, ErrorStatus::INVALID_ARGUMENT, nullptr); return; @@ -472,6 +473,22 @@ class BurstExecutorWithCache : public ExecutionBurstServer::IBurstExecutorWithCa std::map<int32_t, std::optional<RunTimePoolInfo>> mMemoryCache; // cached requestPoolInfos }; +// This is the amount of time the ExecutionBurstServer should spend polling the +// FMQ to see if it has data available before it should fall back to waiting on +// the futex. +static std::chrono::microseconds getPollingTimeWindow() { + constexpr int32_t defaultPollingTimeWindow = 50; +#ifdef NN_DEBUGGABLE + constexpr int32_t minPollingTimeWindow = 0; + const int32_t selectedPollingTimeWindow = + base::GetIntProperty("debug.nn.sample-driver-burst-polling-window", + defaultPollingTimeWindow, minPollingTimeWindow); + return std::chrono::microseconds{selectedPollingTimeWindow}; +#else + return std::chrono::microseconds{defaultPollingTimeWindow}; +#endif // NN_DEBUGGABLE +} + Return<void> SamplePreparedModel::configureExecutionBurst( const sp<V1_2::IBurstCallback>& callback, const MQDescriptorSync<V1_2::FmqRequestDatum>& requestChannel, @@ -480,17 +497,22 @@ Return<void> SamplePreparedModel::configureExecutionBurst( NNTRACE_FULL(NNTRACE_LAYER_DRIVER, NNTRACE_PHASE_EXECUTION, "SampleDriver::configureExecutionBurst"); + const bool preferPowerOverLatency = (kPreference == hal::ExecutionPreference::LOW_POWER); + const auto pollingTimeWindow = + (preferPowerOverLatency ? std::chrono::microseconds{0} : getPollingTimeWindow()); + // Alternatively, the burst could be configured via: // const sp<V1_2::IBurstContext> burst = // ExecutionBurstServer::create(callback, requestChannel, - // resultChannel, this); + // resultChannel, this, + // pollingTimeWindow); // // However, this alternative representation does not include a memory map // caching optimization, and adds overhead. const std::shared_ptr<BurstExecutorWithCache> executorWithCache = std::make_shared<BurstExecutorWithCache>(mModel, mDriver, mPoolInfos); const sp<V1_2::IBurstContext> burst = ExecutionBurstServer::create( - callback, requestChannel, resultChannel, executorWithCache); + callback, requestChannel, resultChannel, executorWithCache, pollingTimeWindow); if (burst == nullptr) { cb(ErrorStatus::GENERAL_FAILURE, {}); diff --git a/nn/driver/sample/SampleDriver.h b/nn/driver/sample/SampleDriver.h index a85dcd5ea..8788ed3a8 100644 --- a/nn/driver/sample/SampleDriver.h +++ b/nn/driver/sample/SampleDriver.h @@ -91,8 +91,9 @@ class SampleDriver : public hal::IDevice { class SamplePreparedModel : public hal::IPreparedModel { public: - SamplePreparedModel(const hal::Model& model, const SampleDriver* driver) - : mModel(model), mDriver(driver) {} + SamplePreparedModel(const hal::Model& model, const SampleDriver* driver, + hal::ExecutionPreference preference) + : mModel(model), mDriver(driver), kPreference(preference) {} ~SamplePreparedModel() override {} bool initialize(); hal::Return<hal::ErrorStatus> execute( @@ -113,6 +114,7 @@ class SamplePreparedModel : public hal::IPreparedModel { hal::Model mModel; const SampleDriver* mDriver; std::vector<RunTimePoolInfo> mPoolInfos; + const hal::ExecutionPreference kPreference; }; } // namespace sample_driver diff --git a/nn/runtime/CompilationBuilder.cpp b/nn/runtime/CompilationBuilder.cpp index 912f0087b..be0260ca2 100644 --- a/nn/runtime/CompilationBuilder.cpp +++ b/nn/runtime/CompilationBuilder.cpp @@ -18,6 +18,11 @@ #include "CompilationBuilder.h" +#include <algorithm> +#include <memory> +#include <string> +#include <utility> +#include <vector> #include "BurstBuilder.h" #include "ExecutionBuilder.h" #include "ExecutionBurstController.h" @@ -156,7 +161,8 @@ int CompilationBuilder::createBurst(BurstBuilder** burst) { *burst = nullptr; return ANEURALNETWORKS_BAD_STATE; } - std::vector<std::shared_ptr<ExecutionBurstController>> burstControllers = mPlan.makeBursts(); + std::vector<std::shared_ptr<ExecutionBurstController>> burstControllers = + mPlan.makeBursts(mPreference); *burst = new (std::nothrow) BurstBuilder(this, std::move(burstControllers)); return (*burst ? ANEURALNETWORKS_NO_ERROR : ANEURALNETWORKS_OUT_OF_MEMORY); } diff --git a/nn/runtime/ExecutionPlan.cpp b/nn/runtime/ExecutionPlan.cpp index 5f656e1c2..901305216 100644 --- a/nn/runtime/ExecutionPlan.cpp +++ b/nn/runtime/ExecutionPlan.cpp @@ -557,7 +557,8 @@ ExecutionPlan::Controller::Controller( // indicate the regular execution path should be used. This can occur either // because PreparedModel was nullptr (cpu was best choice), or because the // IPreparedModel was of insufficient version or failed to configure the burst. -std::vector<std::shared_ptr<ExecutionBurstController>> ExecutionPlan::makeBursts() const { +std::vector<std::shared_ptr<ExecutionBurstController>> ExecutionPlan::makeBursts( + int preference) const { switch (mState) { // burst object for each partition in the compound case case COMPOUND: { @@ -565,7 +566,10 @@ std::vector<std::shared_ptr<ExecutionBurstController>> ExecutionPlan::makeBursts bursts.reserve(compound()->mSteps.size()); for (const auto& step : compound()->mSteps) { if (const auto preparedModel = step->getPreparedSubModel()) { - bursts.push_back(preparedModel->configureExecutionBurst(/*blocking=*/true)); + const bool preferPowerOverLatency = + (preference == ANEURALNETWORKS_PREFER_LOW_POWER); + bursts.push_back( + preparedModel->configureExecutionBurst(preferPowerOverLatency)); } else { bursts.push_back(nullptr); } @@ -577,7 +581,9 @@ std::vector<std::shared_ptr<ExecutionBurstController>> ExecutionPlan::makeBursts std::vector<std::shared_ptr<ExecutionBurstController>> burst; auto simpleBody = simple(); if (const auto preparedModel = simpleBody->mPreparedModel) { - burst.push_back(preparedModel->configureExecutionBurst(/*blocking=*/true)); + const bool preferPowerOverLatency = + (preference == ANEURALNETWORKS_PREFER_LOW_POWER); + burst.push_back(preparedModel->configureExecutionBurst(preferPowerOverLatency)); } else { burst.push_back(nullptr); } diff --git a/nn/runtime/ExecutionPlan.h b/nn/runtime/ExecutionPlan.h index cd3c01848..8fa7083c1 100644 --- a/nn/runtime/ExecutionPlan.h +++ b/nn/runtime/ExecutionPlan.h @@ -209,7 +209,7 @@ class ExecutionPlan { size_t mNextStepIndex; }; - std::vector<std::shared_ptr<ExecutionBurstController>> makeBursts() const; + std::vector<std::shared_ptr<ExecutionBurstController>> makeBursts(int preference) const; std::shared_ptr<Controller> makeController(ExecutionBuilder* executionBuilder, const BurstBuilder* burstBuilder) const; diff --git a/nn/runtime/Manager.cpp b/nn/runtime/Manager.cpp index 6a3882dc1..34378b3fd 100644 --- a/nn/runtime/Manager.cpp +++ b/nn/runtime/Manager.cpp @@ -524,10 +524,10 @@ std::tuple<int, std::vector<OutputShape>, Timing> DriverPreparedModel::execute( memoryIds.push_back(memory->getKey()); } - VLOG(EXECUTION) << "Before ExecutionBurstController->tryCompute() " + VLOG(EXECUTION) << "Before ExecutionBurstController->compute() " << SHOW_IF_DEBUG(toString(request)); std::tie(n, outputShapes, timing, burstFallback) = - burstController->tryCompute(request, measure, memoryIds); + burstController->compute(request, measure, memoryIds); } // compute from IPreparedModel if either: diff --git a/nn/runtime/VersionedInterfaces.cpp b/nn/runtime/VersionedInterfaces.cpp index 325b75c0e..ba6e2af7c 100644 --- a/nn/runtime/VersionedInterfaces.cpp +++ b/nn/runtime/VersionedInterfaces.cpp @@ -19,9 +19,11 @@ #include "VersionedInterfaces.h" #include <android-base/logging.h> +#include <android-base/properties.h> #include <android-base/scopeguard.h> #include <android-base/thread_annotations.h> +#include <chrono> #include <functional> #include <memory> #include <string> @@ -276,12 +278,30 @@ std::tuple<int, std::vector<OutputShape>, Timing> VersionedIPreparedModel::execu return executeAsynchronously(request, measure); } +// This is the amount of time the ExecutionBurstController should spend polling +// the FMQ to see if it has data available before it should fall back to +// waiting on the futex. +static std::chrono::microseconds getPollingTimeWindow() { + constexpr int32_t defaultPollingTimeWindow = 50; +#ifdef NN_DEBUGGABLE + constexpr int32_t minPollingTimeWindow = 0; + const int32_t selectedPollingTimeWindow = + base::GetIntProperty("debug.nn.burst-conrtoller-polling-window", + defaultPollingTimeWindow, minPollingTimeWindow); + return std::chrono::microseconds{selectedPollingTimeWindow}; +#else + return std::chrono::microseconds{defaultPollingTimeWindow}; +#endif // NN_DEBUGGABLE +} + std::shared_ptr<ExecutionBurstController> VersionedIPreparedModel::configureExecutionBurst( - bool blocking) const { + bool preferPowerOverLatency) const { if (mPreparedModelV1_2 == nullptr) { return nullptr; } - return ExecutionBurstController::create(mPreparedModelV1_2, blocking); + const auto pollingTimeWindow = + (preferPowerOverLatency ? std::chrono::microseconds{0} : getPollingTimeWindow()); + return ExecutionBurstController::create(mPreparedModelV1_2, pollingTimeWindow); } std::shared_ptr<VersionedIDevice> VersionedIDevice::create(std::string serviceName, diff --git a/nn/runtime/VersionedInterfaces.h b/nn/runtime/VersionedInterfaces.h index 8665745b6..87e776507 100644 --- a/nn/runtime/VersionedInterfaces.h +++ b/nn/runtime/VersionedInterfaces.h @@ -687,12 +687,16 @@ class VersionedIPreparedModel { /** * Creates a burst controller on a prepared model. * - * @param blocking 'true' if the FMQ should block until data is available. + * @param preferPowerOverLatency 'true' if the Burst object should run in a + * more power efficient mode, 'false' if more + * power can be used to possibly reduce + * burst compute latency. * @return ExecutionBurstController Execution burst controller object. * nullptr is returned if the burst cannot * be configured for any reason. */ - std::shared_ptr<ExecutionBurstController> configureExecutionBurst(bool blocking) const; + std::shared_ptr<ExecutionBurstController> configureExecutionBurst( + bool preferPowerOverLatency) const; private: std::tuple<int, std::vector<hal::OutputShape>, hal::Timing> executeAsynchronously( diff --git a/nn/runtime/test/TestIntrospectionControl.cpp b/nn/runtime/test/TestIntrospectionControl.cpp index 84617d9b0..9d0cbe6c3 100644 --- a/nn/runtime/test/TestIntrospectionControl.cpp +++ b/nn/runtime/test/TestIntrospectionControl.cpp @@ -16,6 +16,7 @@ #include <gtest/gtest.h> +#include <chrono> #include <iterator> #include <map> #include <queue> @@ -309,7 +310,8 @@ std::set<Success> expectedPassSet = {Success::PASS_NEITHER, Success::PASS_DEVICE class TestPreparedModel12 : public SamplePreparedModel { public: TestPreparedModel12(const HidlModel& model, const SampleDriver* driver, Success success) - : SamplePreparedModel(model, driver), mSuccess(success) {} + : SamplePreparedModel(model, driver, ExecutionPreference::FAST_SINGLE_ANSWER), + mSuccess(success) {} Return<ErrorStatus> execute(const Request&, const sp<V1_0::IExecutionCallback>& callback) override { @@ -384,8 +386,8 @@ class TestPreparedModel12 : public SamplePreparedModel { const MQDescriptorSync<V1_2::FmqRequestDatum>& requestChannel, const MQDescriptorSync<V1_2::FmqResultDatum>& resultChannel, configureExecutionBurst_cb cb) override { - const sp<V1_2::IBurstContext> burst = - ExecutionBurstServer::create(callback, requestChannel, resultChannel, this); + const sp<V1_2::IBurstContext> burst = ExecutionBurstServer::create( + callback, requestChannel, resultChannel, this, std::chrono::microseconds{0}); cb(burst == nullptr ? ErrorStatus::GENERAL_FAILURE : ErrorStatus::NONE, burst); return Void(); |