summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJean-Luc Brouillet <jeanluc@google.com>2017-10-11 22:34:04 -0700
committerJean-Luc Brouillet <jeanluc@google.com>2017-10-12 23:45:14 -0700
commit1da8fed77c5c296afa18f754ec3616e7f02a4cfd (patch)
tree1e910c5ebde2cf5b59df8d1da612fdb001e8be09
parent59c183a7ff87fffd47284a4a3f41479116d71d36 (diff)
downloadml-1da8fed77c5c296afa18f754ec3616e7f02a4cfd.tar.gz
Pass large model operand values in shared memory.
Because of a limitation in HIDL, large tensor values (e.g. weights) need to pass in shared memory rather than as HIDL arguments. This prevented large models from running on a driver. Separated the handling of memory pools so that request and model pools are not comingled. Also improve logging so we see more details about the models when logging. Bug: 67603060 Test: Ran system tests and VTS tests. Change-Id: I760e31275699f9306c4b21945935dc3a4ca68754
-rw-r--r--nn/common/CpuExecutor.cpp36
-rw-r--r--nn/common/Utils.cpp10
-rw-r--r--nn/common/include/CpuExecutor.h16
-rw-r--r--nn/common/include/Utils.h3
-rw-r--r--nn/driver/sample/SampleDriver.cpp29
-rw-r--r--nn/driver/sample/SampleDriver.h4
-rw-r--r--nn/runtime/ExecutionBuilder.cpp30
-rw-r--r--nn/runtime/ExecutionPlan.cpp12
-rw-r--r--nn/runtime/Memory.cpp2
-rw-r--r--nn/runtime/ModelBuilder.cpp83
-rw-r--r--nn/runtime/ModelBuilder.h26
-rw-r--r--nn/runtime/NeuralNetworks.cpp107
-rw-r--r--nn/runtime/include/NeuralNetworks.h26
13 files changed, 257 insertions, 127 deletions
diff --git a/nn/common/CpuExecutor.cpp b/nn/common/CpuExecutor.cpp
index 79f9255bd..9c6df76e6 100644
--- a/nn/common/CpuExecutor.cpp
+++ b/nn/common/CpuExecutor.cpp
@@ -79,6 +79,19 @@ bool RunTimePoolInfo::update() {
return true;
}
+bool setRunTimePoolInfosFromHidlMemories(std::vector<RunTimePoolInfo>* poolInfos,
+ const hidl_vec<hidl_memory>& pools) {
+ poolInfos->resize(pools.size());
+ for (size_t i = 0; i < pools.size(); i++) {
+ auto& poolInfo = (*poolInfos)[i];
+ if (!poolInfo.set(pools[i])) {
+ LOG(ERROR) << "Could not map pool";
+ return false;
+ }
+ }
+ return true;
+}
+
// Updates the RunTimeOperandInfo with the newly calculated shape.
// Allocate the buffer if we need to.
static bool setInfoAndAllocateIfNeeded(RunTimeOperandInfo* info, const Shape& shape) {
@@ -113,14 +126,15 @@ static bool setInfoAndAllocateIfNeeded(RunTimeOperandInfo* info, const Shape& sh
// Ignore the .pools entry in model and request. This will have been taken care of
// by the caller.
int CpuExecutor::run(const Model& model, const Request& request,
- const std::vector<RunTimePoolInfo>& runTimePoolInfos) {
+ const std::vector<RunTimePoolInfo>& modelPoolInfos,
+ const std::vector<RunTimePoolInfo>& requestPoolInfos) {
VLOG(CPUEXE) << "CpuExecutor::run()";
// VLOG(CPUEXE) << "model: " << toString(model);
VLOG(CPUEXE) << "request: " << toString(request);
mModel = &model;
mRequest = &request; // TODO check if mRequest is needed
- initializeRunTimeInfo(runTimePoolInfos);
+ initializeRunTimeInfo(modelPoolInfos, requestPoolInfos);
// The model has serialized the operation in execution order.
for (const auto& operation : model.operations) {
int n = executeOperation(operation);
@@ -128,7 +142,10 @@ int CpuExecutor::run(const Model& model, const Request& request,
return n;
}
}
- for (auto runtimeInfo : runTimePoolInfos) {
+ for (auto runtimeInfo : modelPoolInfos) {
+ runtimeInfo.update();
+ }
+ for (auto runtimeInfo : requestPoolInfos) {
runtimeInfo.update();
}
mModel = nullptr;
@@ -137,7 +154,8 @@ int CpuExecutor::run(const Model& model, const Request& request,
return ANEURALNETWORKS_NO_ERROR;
}
-bool CpuExecutor::initializeRunTimeInfo(const std::vector<RunTimePoolInfo>& runTimePoolInfos) {
+bool CpuExecutor::initializeRunTimeInfo(const std::vector<RunTimePoolInfo>& modelPoolInfos,
+ const std::vector<RunTimePoolInfo>& requestPoolInfos) {
VLOG(CPUEXE) << "CpuExecutor::initializeRunTimeInfo";
const size_t count = mModel->operands.size();
mOperands.resize(count);
@@ -163,8 +181,8 @@ bool CpuExecutor::initializeRunTimeInfo(const std::vector<RunTimePoolInfo>& runT
break;
case OperandLifeTime::CONSTANT_REFERENCE: {
auto poolIndex = from.location.poolIndex;
- nnAssert(poolIndex < runTimePoolInfos.size());
- auto& r = runTimePoolInfos[poolIndex];
+ nnAssert(poolIndex < modelPoolInfos.size());
+ auto& r = modelPoolInfos[poolIndex];
to.buffer = r.buffer + from.location.offset;
to.numberOfUsesLeft = 0;
break;
@@ -183,7 +201,7 @@ bool CpuExecutor::initializeRunTimeInfo(const std::vector<RunTimePoolInfo>& runT
// Adjust the runtime info for the arguments passed to the model,
// modifying the buffer location, and possibly the dimensions.
- auto updateForArguments = [this, &runTimePoolInfos](const std::vector<uint32_t>& indexes,
+ auto updateForArguments = [this, &requestPoolInfos](const std::vector<uint32_t>& indexes,
const hidl_vec<RequestArgument>& arguments) {
nnAssert(indexes.size() == arguments.size());
for (size_t i = 0; i < indexes.size(); i++) {
@@ -203,8 +221,8 @@ bool CpuExecutor::initializeRunTimeInfo(const std::vector<RunTimePoolInfo>& runT
nnAssert(to.buffer == nullptr);
} else {
auto poolIndex = from.location.poolIndex;
- nnAssert(poolIndex < runTimePoolInfos.size());
- auto& r = runTimePoolInfos[poolIndex];
+ nnAssert(poolIndex < requestPoolInfos.size());
+ auto& r = requestPoolInfos[poolIndex];
to.buffer = r.buffer + from.location.offset;
}
}
diff --git a/nn/common/Utils.cpp b/nn/common/Utils.cpp
index f73b12cec..245626731 100644
--- a/nn/common/Utils.cpp
+++ b/nn/common/Utils.cpp
@@ -245,6 +245,16 @@ uint32_t alignBytesNeeded(uint32_t index, size_t length) {
return extra;
}
+void logModelToInfo(const Model& model) {
+ LOG(INFO) << "Model start";
+ LOG(INFO) << "operands" << toString(model.operands);
+ LOG(INFO) << "operations" << toString(model.operations);
+ LOG(INFO) << "inputIndexes" << toString(model.inputIndexes);
+ LOG(INFO) << "outputIndexes" << toString(model.outputIndexes);
+ LOG(INFO) << "operandValues size" << model.operandValues.size();
+ LOG(INFO) << "pools" << toString(model.pools);
+}
+
// Validates the type. The used dimensions can be underspecified.
int validateOperandType(const ANeuralNetworksOperandType& type, const char* tag,
bool allowPartial) {
diff --git a/nn/common/include/CpuExecutor.h b/nn/common/include/CpuExecutor.h
index dd92eaf1b..b765efc7c 100644
--- a/nn/common/include/CpuExecutor.h
+++ b/nn/common/include/CpuExecutor.h
@@ -55,10 +55,7 @@ struct RunTimeOperandInfo {
uint32_t numberOfUsesLeft;
Shape shape() const {
- return Shape{.type = type,
- .dimensions = dimensions,
- .scale = scale,
- .offset = zeroPoint};
+ return Shape{.type = type, .dimensions = dimensions, .scale = scale, .offset = zeroPoint};
}
};
@@ -72,6 +69,9 @@ struct RunTimePoolInfo {
bool update();
};
+bool setRunTimePoolInfosFromHidlMemories(std::vector<RunTimePoolInfo>* poolInfos,
+ const hidl_vec<hidl_memory>& pools);
+
// This class is used to execute a model on the CPU.
class CpuExecutor {
public:
@@ -80,17 +80,17 @@ public:
// The model must outlive the executor. We prevent it from being modified
// while this is executing.
int run(const Model& model, const Request& request,
- const std::vector<RunTimePoolInfo>& runTimePoolInfos);
+ const std::vector<RunTimePoolInfo>& modelPoolInfos,
+ const std::vector<RunTimePoolInfo>& requestPoolInfos);
private:
- bool initializeRunTimeInfo(const std::vector<RunTimePoolInfo>& runTimePoolInfos);
+ bool initializeRunTimeInfo(const std::vector<RunTimePoolInfo>& modelPoolInfos,
+ const std::vector<RunTimePoolInfo>& requestPoolInfos);
// Runs one operation of the graph.
int executeOperation(const Operation& entry);
// Decrement the usage count for the operands listed. Frees the memory
// allocated for any temporary variable with a count of zero.
void freeNoLongerUsedOperands(const std::vector<uint32_t>& inputs);
- void setLocationAndUses(RunTimeOperandInfo* to, const DataLocation& location,
- const std::vector<RunTimePoolInfo>& runTimePoolInfos);
// The model and the request that we'll execute. Only valid while run()
// is being executed.
diff --git a/nn/common/include/Utils.h b/nn/common/include/Utils.h
index da035592b..3eebf2606 100644
--- a/nn/common/include/Utils.h
+++ b/nn/common/include/Utils.h
@@ -106,6 +106,9 @@ hidl_memory allocateSharedMemory(int64_t size);
// to determine what this should be.
uint32_t alignBytesNeeded(uint32_t index, size_t length);
+// Does a detailed LOG(INFO) of the model
+void logModelToInfo(const Model& model);
+
inline void setFromIntList(hidl_vec<uint32_t>* vec, uint32_t count, const uint32_t* data) {
vec->resize(count);
for (uint32_t i = 0; i < count; i++) {
diff --git a/nn/driver/sample/SampleDriver.cpp b/nn/driver/sample/SampleDriver.cpp
index 902d4e8c0..faeecae07 100644
--- a/nn/driver/sample/SampleDriver.cpp
+++ b/nn/driver/sample/SampleDriver.cpp
@@ -31,7 +31,10 @@ namespace sample_driver {
Return<ErrorStatus> SampleDriver::prepareModel(const Model& model,
const sp<IPreparedModelCallback>& callback) {
- VLOG(DRIVER) << "prepareModel(" << toString(model) << ")"; // TODO errror
+ if (VLOG_IS_ON(DRIVER)) {
+ VLOG(DRIVER) << "prepareModel";
+ logModelToInfo(model);
+ }
if (callback.get() == nullptr) {
LOG(ERROR) << "invalid callback passed to prepareModel";
return ErrorStatus::INVALID_ARGUMENT;
@@ -42,9 +45,12 @@ Return<ErrorStatus> SampleDriver::prepareModel(const Model& model,
}
// TODO: make asynchronous later
- sp<IPreparedModel> preparedModel = new SamplePreparedModel(model);
+ sp<SamplePreparedModel> preparedModel = new SamplePreparedModel(model);
+ if (!preparedModel->initialize()) {
+ callback->notify(ErrorStatus::INVALID_ARGUMENT, nullptr);
+ return ErrorStatus::INVALID_ARGUMENT;
+ }
callback->notify(ErrorStatus::NONE, preparedModel);
-
return ErrorStatus::NONE;
}
@@ -64,27 +70,20 @@ int SampleDriver::run() {
return 1;
}
-static bool mapPools(std::vector<RunTimePoolInfo>* poolInfos, const hidl_vec<hidl_memory>& pools) {
- poolInfos->resize(pools.size());
- for (size_t i = 0; i < pools.size(); i++) {
- auto& poolInfo = (*poolInfos)[i];
- if (!poolInfo.set(pools[i])) {
- return false;
- }
- }
- return true;
+bool SamplePreparedModel::initialize() {
+ return setRunTimePoolInfosFromHidlMemories(&mPoolInfos, mModel.pools);
}
void SamplePreparedModel::asyncExecute(const Request& request,
const sp<IExecutionCallback>& callback) {
- std::vector<RunTimePoolInfo> poolInfo;
- if (!mapPools(&poolInfo, request.pools)) {
+ std::vector<RunTimePoolInfo> requestPoolInfos;
+ if (!setRunTimePoolInfosFromHidlMemories(&requestPoolInfos, request.pools)) {
callback->notify(ErrorStatus::GENERAL_FAILURE);
return;
}
CpuExecutor executor;
- int n = executor.run(mModel, request, poolInfo);
+ int n = executor.run(mModel, request, mPoolInfos, requestPoolInfos);
VLOG(DRIVER) << "executor.run returned " << n;
ErrorStatus executionStatus =
n == ANEURALNETWORKS_NO_ERROR ? ErrorStatus::NONE : ErrorStatus::GENERAL_FAILURE;
diff --git a/nn/driver/sample/SampleDriver.h b/nn/driver/sample/SampleDriver.h
index 51581fed7..7e95c952b 100644
--- a/nn/driver/sample/SampleDriver.h
+++ b/nn/driver/sample/SampleDriver.h
@@ -17,6 +17,7 @@
#ifndef ANDROID_ML_NN_SAMPLE_DRIVER_SAMPLE_DRIVER_H
#define ANDROID_ML_NN_SAMPLE_DRIVER_SAMPLE_DRIVER_H
+#include "CpuExecutor.h"
#include "HalInterfaces.h"
#include "NeuralNetworks.h"
@@ -52,12 +53,15 @@ public:
: // Make a copy of the model, as we need to preserve it.
mModel(model) {}
~SamplePreparedModel() override {}
+ bool initialize();
Return<ErrorStatus> execute(const Request& request,
const sp<IExecutionCallback>& callback) override;
private:
void asyncExecute(const Request& request, const sp<IExecutionCallback>& callback);
+
Model mModel;
+ std::vector<RunTimePoolInfo> mPoolInfos;
};
} // namespace sample_driver
diff --git a/nn/runtime/ExecutionBuilder.cpp b/nn/runtime/ExecutionBuilder.cpp
index 56dc723b2..077e068d8 100644
--- a/nn/runtime/ExecutionBuilder.cpp
+++ b/nn/runtime/ExecutionBuilder.cpp
@@ -96,8 +96,7 @@ ExecutionBuilder::ExecutionBuilder(const CompilationBuilder* compilation) :
mModel(compilation->mModel),
mPlan(&compilation->mPlan),
mInputs(mModel->inputCount()),
- mOutputs(mModel->outputCount()),
- mMemories(mModel->getMemories()) {
+ mOutputs(mModel->outputCount()) {
VLOG(EXECUTION) << "ExecutionBuilder::ExecutionBuilder";
}
@@ -600,10 +599,11 @@ int StepExecutor::startComputeOnDevice(sp<ExecutionCallback>* synchronizationCal
}
static void asyncStartComputeOnCpu(const Model& model, const Request& request,
- const std::vector<RunTimePoolInfo>& runTimePoolInfos,
+ const std::vector<RunTimePoolInfo>& modelPoolInfos,
+ const std::vector<RunTimePoolInfo>& requestPoolInfos,
const sp<IExecutionCallback>& executionCallback) {
CpuExecutor executor;
- int err = executor.run(model, request, runTimePoolInfos);
+ int err = executor.run(model, request, modelPoolInfos, requestPoolInfos);
ErrorStatus status = err == ANEURALNETWORKS_NO_ERROR ?
ErrorStatus::NONE : ErrorStatus::GENERAL_FAILURE;
executionCallback->notify(status);
@@ -622,23 +622,30 @@ int StepExecutor::startComputeOnCpu(sp<ExecutionCallback>* synchronizationCallba
sp<ExecutionCallback> executionCallback = new ExecutionCallback();
*synchronizationCallback = nullptr;
- std::vector<RunTimePoolInfo> runTimePoolInfos;
+ std::vector<RunTimePoolInfo> modelPoolInfos;
+ if (!setRunTimePoolInfosFromHidlMemories(&modelPoolInfos, model.pools)) {
+ return ANEURALNETWORKS_UNMAPPABLE;
+ }
+
+ std::vector<RunTimePoolInfo> requestPoolInfos;
uint32_t count = mMemories.size();
- runTimePoolInfos.resize(count);
+ requestPoolInfos.resize(count);
for (uint32_t i = 0; i < count; i++) {
const Memory* mem = mMemories[i];
- runTimePoolInfos[i].set(mem->getHidlMemory());
+ if (!requestPoolInfos[i].set(mem->getHidlMemory())) {
+ return ANEURALNETWORKS_UNMAPPABLE;
+ }
}
// Create as many pools as there are input / output.
- auto fixPointerArguments = [&runTimePoolInfos](std::vector<ModelArgumentInfo>& argumentInfos) {
+ auto fixPointerArguments = [&requestPoolInfos](std::vector<ModelArgumentInfo>& argumentInfos) {
for (ModelArgumentInfo& argumentInfo : argumentInfos) {
if (argumentInfo.state == ModelArgumentInfo::POINTER) {
RunTimePoolInfo runTimeInfo = {
.buffer = static_cast<uint8_t*>(argumentInfo.buffer)};
argumentInfo.locationAndLength.poolIndex =
- static_cast<uint32_t>(runTimePoolInfos.size());
+ static_cast<uint32_t>(requestPoolInfos.size());
argumentInfo.locationAndLength.offset = 0;
- runTimePoolInfos.push_back(runTimeInfo);
+ requestPoolInfos.push_back(runTimeInfo);
}
}
};
@@ -651,7 +658,8 @@ int StepExecutor::startComputeOnCpu(sp<ExecutionCallback>* synchronizationCallba
// TODO: should model be moved with a std::cref?
std::thread thread(asyncStartComputeOnCpu, model, std::move(request),
- std::move(runTimePoolInfos), executionCallback);
+ std::move(modelPoolInfos), std::move(requestPoolInfos),
+ executionCallback);
executionCallback->bind_thread(std::move(thread));
*synchronizationCallback = executionCallback;
diff --git a/nn/runtime/ExecutionPlan.cpp b/nn/runtime/ExecutionPlan.cpp
index d2f74d5cf..009fc3366 100644
--- a/nn/runtime/ExecutionPlan.cpp
+++ b/nn/runtime/ExecutionPlan.cpp
@@ -360,9 +360,11 @@ int ExecutionStep::finishSubModel(const ModelBuilder* fromModel, bool* hasOutput
void ExecutionStep::dump() const {
Model model;
mSubModel->setHidlModel(&model);
- VLOG(COMPILATION) << "ExecutionStep#" << mIndex
- << " for " << (mDevice == nullptr ? "CPU" : mDevice->getName())
- << " submodel: " << toString(model);
+ if (VLOG_IS_ON(COMPILATION)) {
+ VLOG(COMPILATION) << "ExecutionStep#" << mIndex
+ << " for " << (mDevice == nullptr ? "CPU" : mDevice->getName());
+ logModelToInfo(model);
+ }
}
int ExecutionPlan::CompoundBody::finish(const ModelBuilder* fromModel) {
@@ -750,8 +752,8 @@ int ModelBuilder::partitionTheWork(const std::vector<std::shared_ptr<Device>>& d
if (VLOG_IS_ON(COMPILATION)) {
Model model;
setHidlModel(&model);
- VLOG(COMPILATION) << "ModelBuilder::partitionTheWork: original model: "
- << toString(model);
+ VLOG(COMPILATION) << "ModelBuilder::partitionTheWork: original model: ";
+ logModelToInfo(model);
plan->dump();
}
return n;
diff --git a/nn/runtime/Memory.cpp b/nn/runtime/Memory.cpp
index 9b05dbf4e..5660e0272 100644
--- a/nn/runtime/Memory.cpp
+++ b/nn/runtime/Memory.cpp
@@ -109,12 +109,14 @@ int MemoryFd::getPointer(uint8_t** buffer) const {
}
uint32_t MemoryTracker::add(const Memory* memory) {
+ VLOG(MODEL) << __func__ << " for " << memory;
// See if we already have this memory. If so,
// return its index.
auto i = mKnown.find(memory);
if (i != mKnown.end()) {
return i->second;
}
+ VLOG(MODEL) << "It's new";
// It's a new one. Save it an assign an index to it.
size_t next = mKnown.size();
if (next > 0xFFFFFFFF) {
diff --git a/nn/runtime/ModelBuilder.cpp b/nn/runtime/ModelBuilder.cpp
index 2274b89c7..f446beeb2 100644
--- a/nn/runtime/ModelBuilder.cpp
+++ b/nn/runtime/ModelBuilder.cpp
@@ -58,6 +58,7 @@ int ModelBuilder::addOperand(const ANeuralNetworksOperandType& type) {
}
int ModelBuilder::setOperandValue(uint32_t index, const void* buffer, size_t length) {
+ VLOG(MODEL) << __func__ << " for operand " << index << " size " << length;
if (index >= operandCount()) {
LOG(ERROR) << "ANeuralNetworksModel_setOperandValue setting operand " << index << " of "
<< operandCount();
@@ -76,25 +77,81 @@ int ModelBuilder::setOperandValue(uint32_t index, const void* buffer, size_t len
.offset = 0,
.length = 0};
} else {
+ if (length > 0xFFFFFFFF) {
+ LOG(ERROR) << "ANeuralNetworksModel_setOperandValue value length of " << length
+ << " exceeds max size";
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+ uint32_t valueLength = static_cast<uint32_t>(length);
uint32_t neededLength = sizeOfData(operand.type, operand.dimensions);
- if (neededLength != length) {
- LOG(ERROR) << "ANeuralNetworksModel_setOperandValue setting " << length
+ if (neededLength != valueLength) {
+ LOG(ERROR) << "ANeuralNetworksModel_setOperandValue setting " << valueLength
<< " bytes when needing " << neededLength;
return ANEURALNETWORKS_BAD_DATA;
}
- uint32_t existingSize = static_cast<uint32_t>(mOperandValues.size());
- uint32_t extraBytes = alignBytesNeeded(existingSize, length);
- mOperandValues.resize(existingSize + extraBytes + length);
- operand.lifetime = OperandLifeTime::CONSTANT_COPY;
- operand.location = {
- .poolIndex = 0, .offset = existingSize + extraBytes, .length = neededLength};
- memcpy(&mOperandValues[operand.location.offset], buffer, length);
+ if (valueLength <= ANEURALNETWORKS_MAX_SIZE_OF_IMMEDIATELY_COPIED_VALUES) {
+ uint32_t existingSize = static_cast<uint32_t>(mSmallOperandValues.size());
+ uint32_t extraBytes = alignBytesNeeded(existingSize, valueLength);
+ mSmallOperandValues.resize(existingSize + extraBytes + valueLength);
+ operand.lifetime = OperandLifeTime::CONSTANT_COPY;
+ operand.location = {
+ .poolIndex = 0, .offset = existingSize + extraBytes, .length = neededLength};
+ memcpy(&mSmallOperandValues[operand.location.offset], buffer, valueLength);
+ VLOG(MODEL) << "Copied small value to offset " << operand.location.offset;
+ } else {
+ VLOG(MODEL) << "Saving large value";
+ operand.lifetime = OperandLifeTime::CONSTANT_REFERENCE;
+ // The values for poolIndex and offset will be set when the model is finished.
+ operand.location = {.poolIndex = 0, .offset = 0, .length = valueLength};
+ // We keep track of the buffers. We'll allocate the shared memory only
+ // once we know the total size, to avoid needless copies.
+ mLargeOperandValues.push_back(LargeValue{.operandIndex = index, .buffer = buffer});
+ }
+ }
+ return ANEURALNETWORKS_NO_ERROR;
+}
+
+int ModelBuilder::copyLargeValuesToSharedMemory() {
+ VLOG(MODEL) << __func__ << " has " << mLargeOperandValues.size() << " values.";
+ if (!mLargeOperandValues.empty()) {
+ // Calculate the size of the shared memory needed for all the large values.
+ // Also sets the offset for each value within the memory.
+ size_t poolSize = 0;
+ for (LargeValue& l: mLargeOperandValues) {
+ Operand& operand = mOperands[l.operandIndex];
+ nnAssert(operand.lifetime == OperandLifeTime::CONSTANT_REFERENCE);
+ poolSize += alignBytesNeeded(poolSize, operand.location.length);
+ operand.location.offset = poolSize;
+ poolSize += operand.location.length;
+ }
+
+ // Allocated the shared memory.
+ int n = mLargeValueMemory.create(poolSize);
+ if (n != ANEURALNETWORKS_NO_ERROR) {
+ return n;
+ }
+ uint8_t* memoryPointer = nullptr;
+ n = mLargeValueMemory.getPointer(&memoryPointer);
+ if (n != ANEURALNETWORKS_NO_ERROR) {
+ return n;
+ }
+ uint32_t poolIndex = mMemories.add(&mLargeValueMemory);
+ VLOG(MODEL) << "Allocated large value pool of size " << poolSize << " at index "
+ << poolIndex;
+
+ // Copy the values to this memory.
+ for (LargeValue& l: mLargeOperandValues) {
+ Operand& operand = mOperands[l.operandIndex];
+ operand.location.poolIndex = poolIndex;
+ memcpy(memoryPointer + operand.location.offset, l.buffer, operand.location.length);
+ }
}
return ANEURALNETWORKS_NO_ERROR;
}
int ModelBuilder::setOperandValueFromMemory(uint32_t index, const Memory* memory, uint32_t offset,
size_t length) {
+ VLOG(MODEL) << __func__ << " for operand " << index << " offset " << offset << " size " << length;
if (index >= operandCount()) {
LOG(ERROR) << "ANeuralNetworksModel_setOperandValueFromMemory setting operand " << index
<< " of " << operandCount();
@@ -223,8 +280,14 @@ int ModelBuilder::finish() {
return ANEURALNETWORKS_BAD_STATE;
}
+ int n = copyLargeValuesToSharedMemory();
+ if (n != ANEURALNETWORKS_NO_ERROR) {
+ return n;
+ }
+
// We sort the operations so that they will be in the appropriate
// order for a single-threaded, op at a time execution.
+ // TODO: we don't need this if we always run the partitioner.
sortIntoRunOrder();
mCompletedModel = true;
return ANEURALNETWORKS_NO_ERROR;
@@ -282,7 +345,7 @@ void ModelBuilder::setHidlModel(Model* model) const {
model->operations = mOperations;
model->inputIndexes = mInputIndexes;
model->outputIndexes = mOutputIndexes;
- model->operandValues = mOperandValues;
+ model->operandValues = mSmallOperandValues;
uint32_t count = mMemories.size();
model->pools.resize(count);
diff --git a/nn/runtime/ModelBuilder.h b/nn/runtime/ModelBuilder.h
index edb646614..d5ab078bf 100644
--- a/nn/runtime/ModelBuilder.h
+++ b/nn/runtime/ModelBuilder.h
@@ -78,7 +78,7 @@ public:
const MemoryTracker& getMemories() const { return mMemories; }
const std::vector<Operation>& getOperations() const { return mOperations; }
const uint8_t* getPointerToOperandValue(uint32_t offset) const {
- return mOperandValues.data() + offset;
+ return mSmallOperandValues.data() + offset;
}
int partitionTheWork(const std::vector<std::shared_ptr<Device>>& devices,
@@ -99,12 +99,9 @@ public:
// Sorts the operations to be in the correct order for single threaded
// node-at-a-time execution.
void sortIntoRunOrder();
- /*
- int32_t getOperandIndex(const ArrayInfo& info, uint32_t listIndex) const {
- nnAssert(listIndex < info.count);
- return mOperandIndexes[info.offset + listIndex];
- }
- */
+
+ // Copies the large values to a shared memory, if we have any.
+ int copyLargeValuesToSharedMemory();
// The operations of the graph.
std::vector<Operation> mOperations;
@@ -118,11 +115,18 @@ public:
MemoryTracker mMemories;
- // The value of the operands that are defined at model
+ // The value of the small operands that are defined at model
// creation time.
- // TODO We are copying all the values. Once we support memory
- // pools, revisit.
- std::vector<uint8_t> mOperandValues;
+ std::vector<uint8_t> mSmallOperandValues;
+
+ struct LargeValue {
+ uint32_t operandIndex;
+ const void* buffer;
+ };
+ // Operand index and buffer pointer for all the large operand values of this model.
+ std::vector<LargeValue> mLargeOperandValues;
+ // The shared memory region that will contain the large values.
+ Memory mLargeValueMemory;
// Once the model has been finished, we should not allow further
// modifications to the model.
diff --git a/nn/runtime/NeuralNetworks.cpp b/nn/runtime/NeuralNetworks.cpp
index 979ca7fc1..3766e3b0e 100644
--- a/nn/runtime/NeuralNetworks.cpp
+++ b/nn/runtime/NeuralNetworks.cpp
@@ -36,83 +36,86 @@
// Make sure the constants defined in the header files have not changed values.
// IMPORTANT: When adding new values, update kNumberOfDataTypes or kNumberOfDataTypesOEM
// in Utils.h.
-static_assert(ANEURALNETWORKS_FLOAT32 == 0, "ANEURALNETWORKS_FLOAT32 may have changed");
-static_assert(ANEURALNETWORKS_INT32 == 1, "ANEURALNETWORKS_INT32 may have changed");
-static_assert(ANEURALNETWORKS_UINT32 == 2, "ANEURALNETWORKS_UINT32 may have changed");
+static_assert(ANEURALNETWORKS_FLOAT32 == 0, "ANEURALNETWORKS_FLOAT32 has changed");
+static_assert(ANEURALNETWORKS_INT32 == 1, "ANEURALNETWORKS_INT32 has changed");
+static_assert(ANEURALNETWORKS_UINT32 == 2, "ANEURALNETWORKS_UINT32 has changed");
static_assert(ANEURALNETWORKS_TENSOR_FLOAT32 == 3,
- "ANEURALNETWORKS_TENSOR_FLOAT32 may have changed");
-static_assert(ANEURALNETWORKS_TENSOR_INT32 == 4, "ANEURALNETWORKS_TENSOR_INT32 may have changed");
+ "ANEURALNETWORKS_TENSOR_FLOAT32 has changed");
+static_assert(ANEURALNETWORKS_TENSOR_INT32 == 4, "ANEURALNETWORKS_TENSOR_INT32 has changed");
static_assert(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM == 5,
- "ANEURALNETWORKS_TENSOR_QUANT8_ASYMM may have changed");
-static_assert(ANEURALNETWORKS_OEM_SCALAR == 10000, "ANEURALNETWORKS_OEM_SCALAR may have changed");
+ "ANEURALNETWORKS_TENSOR_QUANT8_ASYMM has changed");
+static_assert(ANEURALNETWORKS_OEM_SCALAR == 10000, "ANEURALNETWORKS_OEM_SCALAR has changed");
static_assert(ANEURALNETWORKS_TENSOR_OEM_BYTE == 10001,
- "ANEURALNETWORKS_TENSOR_OEM_BYTE may have changed");
+ "ANEURALNETWORKS_TENSOR_OEM_BYTE has changed");
// IMPORTANT: When adding new values, update kNumberOfOperationTypes or
// kNumberOfOperationTypesOEMin Utils.h.
-static_assert(ANEURALNETWORKS_ADD == 0, "ANEURALNETWORKS_ADD may have changed");
+static_assert(ANEURALNETWORKS_ADD == 0, "ANEURALNETWORKS_ADD has changed");
static_assert(ANEURALNETWORKS_AVERAGE_POOL_2D == 1,
- "ANEURALNETWORKS_AVERAGE_POOL_2D may have changed");
-static_assert(ANEURALNETWORKS_CONCATENATION == 2, "ANEURALNETWORKS_CONCATENATION may have changed");
-static_assert(ANEURALNETWORKS_CONV_2D == 3, "ANEURALNETWORKS_CONV_2D may have changed");
+ "ANEURALNETWORKS_AVERAGE_POOL_2D has changed");
+static_assert(ANEURALNETWORKS_CONCATENATION == 2, "ANEURALNETWORKS_CONCATENATION has changed");
+static_assert(ANEURALNETWORKS_CONV_2D == 3, "ANEURALNETWORKS_CONV_2D has changed");
static_assert(ANEURALNETWORKS_DEPTHWISE_CONV_2D == 4,
- "ANEURALNETWORKS_DEPTHWISE_CONV_2D may have changed");
+ "ANEURALNETWORKS_DEPTHWISE_CONV_2D has changed");
static_assert(ANEURALNETWORKS_DEPTH_TO_SPACE == 5,
- "ANEURALNETWORKS_DEPTH_TO_SPACE may have changed");
-static_assert(ANEURALNETWORKS_DEQUANTIZE == 6, "ANEURALNETWORKS_DEQUANTIZE may have changed");
+ "ANEURALNETWORKS_DEPTH_TO_SPACE has changed");
+static_assert(ANEURALNETWORKS_DEQUANTIZE == 6, "ANEURALNETWORKS_DEQUANTIZE has changed");
static_assert(ANEURALNETWORKS_EMBEDDING_LOOKUP == 7,
- "ANEURALNETWORKS_EMBEDDING_LOOKUP may have changed");
-static_assert(ANEURALNETWORKS_FLOOR == 8, "ANEURALNETWORKS_FLOOR may have changed");
+ "ANEURALNETWORKS_EMBEDDING_LOOKUP has changed");
+static_assert(ANEURALNETWORKS_FLOOR == 8, "ANEURALNETWORKS_FLOOR has changed");
static_assert(ANEURALNETWORKS_FULLY_CONNECTED == 9,
- "ANEURALNETWORKS_FULLY_CONNECTED may have changed");
+ "ANEURALNETWORKS_FULLY_CONNECTED has changed");
static_assert(ANEURALNETWORKS_HASHTABLE_LOOKUP == 10,
- "ANEURALNETWORKS_HASHTABLE_LOOKUP may have changed");
+ "ANEURALNETWORKS_HASHTABLE_LOOKUP has changed");
static_assert(ANEURALNETWORKS_L2_NORMALIZATION == 11,
- "ANEURALNETWORKS_L2_NORMALIZATION may have changed");
-static_assert(ANEURALNETWORKS_L2_POOL_2D == 12, "ANEURALNETWORKS_L2_POOL may have changed");
+ "ANEURALNETWORKS_L2_NORMALIZATION has changed");
+static_assert(ANEURALNETWORKS_L2_POOL_2D == 12, "ANEURALNETWORKS_L2_POOL has changed");
static_assert(ANEURALNETWORKS_LOCAL_RESPONSE_NORMALIZATION == 13,
- "ANEURALNETWORKS_LOCAL_RESPONSE_NORMALIZATION may have changed");
-static_assert(ANEURALNETWORKS_LOGISTIC == 14, "ANEURALNETWORKS_LOGISTIC may have changed");
+ "ANEURALNETWORKS_LOCAL_RESPONSE_NORMALIZATION has changed");
+static_assert(ANEURALNETWORKS_LOGISTIC == 14, "ANEURALNETWORKS_LOGISTIC has changed");
static_assert(ANEURALNETWORKS_LSH_PROJECTION == 15,
- "ANEURALNETWORKS_LSH_PROJECTION may have changed");
-static_assert(ANEURALNETWORKS_LSTM == 16, "ANEURALNETWORKS_LSTM may have changed");
-static_assert(ANEURALNETWORKS_MAX_POOL_2D == 17, "ANEURALNETWORKS_MAX_POOL may have changed");
-static_assert(ANEURALNETWORKS_MUL == 18, "ANEURALNETWORKS_MUL may have changed");
-static_assert(ANEURALNETWORKS_RELU == 19, "ANEURALNETWORKS_RELU may have changed");
-static_assert(ANEURALNETWORKS_RELU1 == 20, "ANEURALNETWORKS_RELU1 may have changed");
-static_assert(ANEURALNETWORKS_RELU6 == 21, "ANEURALNETWORKS_RELU6 may have changed");
-static_assert(ANEURALNETWORKS_RESHAPE == 22, "ANEURALNETWORKS_RESHAPE may have changed");
+ "ANEURALNETWORKS_LSH_PROJECTION has changed");
+static_assert(ANEURALNETWORKS_LSTM == 16, "ANEURALNETWORKS_LSTM has changed");
+static_assert(ANEURALNETWORKS_MAX_POOL_2D == 17, "ANEURALNETWORKS_MAX_POOL has changed");
+static_assert(ANEURALNETWORKS_MUL == 18, "ANEURALNETWORKS_MUL has changed");
+static_assert(ANEURALNETWORKS_RELU == 19, "ANEURALNETWORKS_RELU has changed");
+static_assert(ANEURALNETWORKS_RELU1 == 20, "ANEURALNETWORKS_RELU1 has changed");
+static_assert(ANEURALNETWORKS_RELU6 == 21, "ANEURALNETWORKS_RELU6 has changed");
+static_assert(ANEURALNETWORKS_RESHAPE == 22, "ANEURALNETWORKS_RESHAPE has changed");
static_assert(ANEURALNETWORKS_RESIZE_BILINEAR == 23,
- "ANEURALNETWORKS_RESIZE_BILINEAR may have changed");
-static_assert(ANEURALNETWORKS_RNN == 24, "ANEURALNETWORKS_RNN may have changed");
-static_assert(ANEURALNETWORKS_SOFTMAX == 25, "ANEURALNETWORKS_SOFTMAX may have changed");
+ "ANEURALNETWORKS_RESIZE_BILINEAR has changed");
+static_assert(ANEURALNETWORKS_RNN == 24, "ANEURALNETWORKS_RNN has changed");
+static_assert(ANEURALNETWORKS_SOFTMAX == 25, "ANEURALNETWORKS_SOFTMAX has changed");
static_assert(ANEURALNETWORKS_SPACE_TO_DEPTH == 26,
- "ANEURALNETWORKS_SPACE_TO_DEPTH may have changed");
-static_assert(ANEURALNETWORKS_SVDF == 27, "ANEURALNETWORKS_SVDF may have changed");
-static_assert(ANEURALNETWORKS_TANH == 28, "ANEURALNETWORKS_TANH may have changed");
+ "ANEURALNETWORKS_SPACE_TO_DEPTH has changed");
+static_assert(ANEURALNETWORKS_SVDF == 27, "ANEURALNETWORKS_SVDF has changed");
+static_assert(ANEURALNETWORKS_TANH == 28, "ANEURALNETWORKS_TANH has changed");
static_assert(ANEURALNETWORKS_OEM_OPERATION == 10000,
- "ANEURALNETWORKS_OEM_OPERATION may have changed");
+ "ANEURALNETWORKS_OEM_OPERATION has changed");
-static_assert(ANEURALNETWORKS_FUSED_NONE == 0, "ANEURALNETWORKS_FUSED_NONE may have changed");
-static_assert(ANEURALNETWORKS_FUSED_RELU == 1, "ANEURALNETWORKS_FUSED_RELU may have changed");
-static_assert(ANEURALNETWORKS_FUSED_RELU1 == 2, "ANEURALNETWORKS_FUSED_RELU1 may have changed");
-static_assert(ANEURALNETWORKS_FUSED_RELU6 == 3, "ANEURALNETWORKS_FUSED_RELU6 may have changed");
+static_assert(ANEURALNETWORKS_FUSED_NONE == 0, "ANEURALNETWORKS_FUSED_NONE has changed");
+static_assert(ANEURALNETWORKS_FUSED_RELU == 1, "ANEURALNETWORKS_FUSED_RELU has changed");
+static_assert(ANEURALNETWORKS_FUSED_RELU1 == 2, "ANEURALNETWORKS_FUSED_RELU1 has changed");
+static_assert(ANEURALNETWORKS_FUSED_RELU6 == 3, "ANEURALNETWORKS_FUSED_RELU6 has changed");
static_assert(ANEURALNETWORKS_PREFER_LOW_POWER == 0,
- "ANEURALNETWORKS_PREFER_LOW_POWER may have changed");
+ "ANEURALNETWORKS_PREFER_LOW_POWER has changed");
static_assert(ANEURALNETWORKS_PREFER_FAST_SINGLE_ANSWER == 1,
- "ANEURALNETWORKS_PREFER_FAST_SINGLE_ANSWER may have changed");
+ "ANEURALNETWORKS_PREFER_FAST_SINGLE_ANSWER has changed");
static_assert(ANEURALNETWORKS_PREFER_SUSTAINED_SPEED == 2,
- "ANEURALNETWORKS_PREFER_SUSTAINED_SPEED may have changed");
+ "ANEURALNETWORKS_PREFER_SUSTAINED_SPEED has changed");
-static_assert(ANEURALNETWORKS_NO_ERROR == 0, "ANEURALNETWORKS_NO_ERROR may have changed");
-static_assert(ANEURALNETWORKS_OUT_OF_MEMORY == 1, "ANEURALNETWORKS_OUT_OF_MEMORY may have changed");
-static_assert(ANEURALNETWORKS_INCOMPLETE == 2, "ANEURALNETWORKS_INCOMPLETE may have changed");
+static_assert(ANEURALNETWORKS_NO_ERROR == 0, "ANEURALNETWORKS_NO_ERROR has changed");
+static_assert(ANEURALNETWORKS_OUT_OF_MEMORY == 1, "ANEURALNETWORKS_OUT_OF_MEMORY has changed");
+static_assert(ANEURALNETWORKS_INCOMPLETE == 2, "ANEURALNETWORKS_INCOMPLETE has changed");
static_assert(ANEURALNETWORKS_UNEXPECTED_NULL == 3,
- "ANEURALNETWORKS_UNEXPECTED_NULL may have changed");
-static_assert(ANEURALNETWORKS_BAD_DATA == 4, "ANEURALNETWORKS_BAD_DATA may have changed");
-static_assert(ANEURALNETWORKS_OP_FAILED == 5, "ANEURALNETWORKS_OP_FAILED may have changed");
-static_assert(ANEURALNETWORKS_BAD_STATE == 6, "ANEURALNETWORKS_BAD_STATE may have changed");
+ "ANEURALNETWORKS_UNEXPECTED_NULL has changed");
+static_assert(ANEURALNETWORKS_BAD_DATA == 4, "ANEURALNETWORKS_BAD_DATA has changed");
+static_assert(ANEURALNETWORKS_OP_FAILED == 5, "ANEURALNETWORKS_OP_FAILED has changed");
+static_assert(ANEURALNETWORKS_BAD_STATE == 6, "ANEURALNETWORKS_BAD_STATE has changed");
+
+static_assert(ANEURALNETWORKS_MAX_SIZE_OF_IMMEDIATELY_COPIED_VALUES == 128,
+ "ANEURALNETWORKS_MAX_SIZE_OF_IMMEDIATELY_COPIED_VALUES has changed");
// Make sure that the constants are compatible with the values defined in
// hardware/interfaces/neuralnetworks/1.0/types.hal.
diff --git a/nn/runtime/include/NeuralNetworks.h b/nn/runtime/include/NeuralNetworks.h
index 3d93ef034..7bb9e32a7 100644
--- a/nn/runtime/include/NeuralNetworks.h
+++ b/nn/runtime/include/NeuralNetworks.h
@@ -1205,6 +1205,15 @@ typedef enum {
} ResultCode;
/**
+ * For {@link ANeuralNetworksModel_setOperandValue}, values with a
+ * length smaller or equal to this will be immediately copied into
+ * the model. The size is in bytes.
+ */
+enum {
+ ANEURALNETWORKS_MAX_SIZE_OF_IMMEDIATELY_COPIED_VALUES = 128
+};
+
+/**
* ANeuralNetworksMemory is an opaque type that represents memory.
*
* This type is used to represent shared memory, memory mapped files,
@@ -1468,13 +1477,18 @@ int ANeuralNetworksModel_addOperand(ANeuralNetworksModel* model,
/**
* Sets an operand to a constant value.
*
- * For scalar values, the content of buffer is copied into the model.
+ * Values of length smaller or equal to
+ * {@link ANEURALNETWORKS_MAX_SIZE_OF_IMMEDIATELY_COPIED_VALUES}
+ * are immediately copied into the model.
+ *
+ * For values of length greater than {@link ANEURALNETWORKS_MAX_SIZE_OF_IMMEDIATELY_COPIED_VALUES},
+ * a pointer to the buffer is stored within the model. The application is responsible
+ * for not changing the content of this region until all executions using this model
+ * have completed. As the data may be copied during processing, modifying the data
+ * after this call yields undefined results.
*
- * For tensor values, a pointer to the buffer is stored within the model.
- * The application is responsible for not changing the content of this region
- * until all executions using this model have completed. As the data may
- * be copied during processing, modifying the data after this call yields
- * undefined results.
+ * For large tensors, using {@link ANeuralNetworksModel_setOperandValueFromMemory}
+ * is likely to be more efficient.
*
* To indicate that an optional operand should be considered missing,
* pass nullptr for buffer and 0 for length.