diff options
author | Jean-Luc Brouillet <jeanluc@google.com> | 2017-10-11 22:34:04 -0700 |
---|---|---|
committer | Jean-Luc Brouillet <jeanluc@google.com> | 2017-10-12 23:45:14 -0700 |
commit | 1da8fed77c5c296afa18f754ec3616e7f02a4cfd (patch) | |
tree | 1e910c5ebde2cf5b59df8d1da612fdb001e8be09 | |
parent | 59c183a7ff87fffd47284a4a3f41479116d71d36 (diff) | |
download | ml-1da8fed77c5c296afa18f754ec3616e7f02a4cfd.tar.gz |
Pass large model operand values in shared memory.
Because of a limitation in HIDL, large tensor values (e.g. weights)
need to pass in shared memory rather than as HIDL arguments. This
prevented large models from running on a driver.
Separated the handling of memory pools so that request and model
pools are not comingled.
Also improve logging so we see more details about the models when
logging.
Bug: 67603060
Test: Ran system tests and VTS tests.
Change-Id: I760e31275699f9306c4b21945935dc3a4ca68754
-rw-r--r-- | nn/common/CpuExecutor.cpp | 36 | ||||
-rw-r--r-- | nn/common/Utils.cpp | 10 | ||||
-rw-r--r-- | nn/common/include/CpuExecutor.h | 16 | ||||
-rw-r--r-- | nn/common/include/Utils.h | 3 | ||||
-rw-r--r-- | nn/driver/sample/SampleDriver.cpp | 29 | ||||
-rw-r--r-- | nn/driver/sample/SampleDriver.h | 4 | ||||
-rw-r--r-- | nn/runtime/ExecutionBuilder.cpp | 30 | ||||
-rw-r--r-- | nn/runtime/ExecutionPlan.cpp | 12 | ||||
-rw-r--r-- | nn/runtime/Memory.cpp | 2 | ||||
-rw-r--r-- | nn/runtime/ModelBuilder.cpp | 83 | ||||
-rw-r--r-- | nn/runtime/ModelBuilder.h | 26 | ||||
-rw-r--r-- | nn/runtime/NeuralNetworks.cpp | 107 | ||||
-rw-r--r-- | nn/runtime/include/NeuralNetworks.h | 26 |
13 files changed, 257 insertions, 127 deletions
diff --git a/nn/common/CpuExecutor.cpp b/nn/common/CpuExecutor.cpp index 79f9255bd..9c6df76e6 100644 --- a/nn/common/CpuExecutor.cpp +++ b/nn/common/CpuExecutor.cpp @@ -79,6 +79,19 @@ bool RunTimePoolInfo::update() { return true; } +bool setRunTimePoolInfosFromHidlMemories(std::vector<RunTimePoolInfo>* poolInfos, + const hidl_vec<hidl_memory>& pools) { + poolInfos->resize(pools.size()); + for (size_t i = 0; i < pools.size(); i++) { + auto& poolInfo = (*poolInfos)[i]; + if (!poolInfo.set(pools[i])) { + LOG(ERROR) << "Could not map pool"; + return false; + } + } + return true; +} + // Updates the RunTimeOperandInfo with the newly calculated shape. // Allocate the buffer if we need to. static bool setInfoAndAllocateIfNeeded(RunTimeOperandInfo* info, const Shape& shape) { @@ -113,14 +126,15 @@ static bool setInfoAndAllocateIfNeeded(RunTimeOperandInfo* info, const Shape& sh // Ignore the .pools entry in model and request. This will have been taken care of // by the caller. int CpuExecutor::run(const Model& model, const Request& request, - const std::vector<RunTimePoolInfo>& runTimePoolInfos) { + const std::vector<RunTimePoolInfo>& modelPoolInfos, + const std::vector<RunTimePoolInfo>& requestPoolInfos) { VLOG(CPUEXE) << "CpuExecutor::run()"; // VLOG(CPUEXE) << "model: " << toString(model); VLOG(CPUEXE) << "request: " << toString(request); mModel = &model; mRequest = &request; // TODO check if mRequest is needed - initializeRunTimeInfo(runTimePoolInfos); + initializeRunTimeInfo(modelPoolInfos, requestPoolInfos); // The model has serialized the operation in execution order. for (const auto& operation : model.operations) { int n = executeOperation(operation); @@ -128,7 +142,10 @@ int CpuExecutor::run(const Model& model, const Request& request, return n; } } - for (auto runtimeInfo : runTimePoolInfos) { + for (auto runtimeInfo : modelPoolInfos) { + runtimeInfo.update(); + } + for (auto runtimeInfo : requestPoolInfos) { runtimeInfo.update(); } mModel = nullptr; @@ -137,7 +154,8 @@ int CpuExecutor::run(const Model& model, const Request& request, return ANEURALNETWORKS_NO_ERROR; } -bool CpuExecutor::initializeRunTimeInfo(const std::vector<RunTimePoolInfo>& runTimePoolInfos) { +bool CpuExecutor::initializeRunTimeInfo(const std::vector<RunTimePoolInfo>& modelPoolInfos, + const std::vector<RunTimePoolInfo>& requestPoolInfos) { VLOG(CPUEXE) << "CpuExecutor::initializeRunTimeInfo"; const size_t count = mModel->operands.size(); mOperands.resize(count); @@ -163,8 +181,8 @@ bool CpuExecutor::initializeRunTimeInfo(const std::vector<RunTimePoolInfo>& runT break; case OperandLifeTime::CONSTANT_REFERENCE: { auto poolIndex = from.location.poolIndex; - nnAssert(poolIndex < runTimePoolInfos.size()); - auto& r = runTimePoolInfos[poolIndex]; + nnAssert(poolIndex < modelPoolInfos.size()); + auto& r = modelPoolInfos[poolIndex]; to.buffer = r.buffer + from.location.offset; to.numberOfUsesLeft = 0; break; @@ -183,7 +201,7 @@ bool CpuExecutor::initializeRunTimeInfo(const std::vector<RunTimePoolInfo>& runT // Adjust the runtime info for the arguments passed to the model, // modifying the buffer location, and possibly the dimensions. - auto updateForArguments = [this, &runTimePoolInfos](const std::vector<uint32_t>& indexes, + auto updateForArguments = [this, &requestPoolInfos](const std::vector<uint32_t>& indexes, const hidl_vec<RequestArgument>& arguments) { nnAssert(indexes.size() == arguments.size()); for (size_t i = 0; i < indexes.size(); i++) { @@ -203,8 +221,8 @@ bool CpuExecutor::initializeRunTimeInfo(const std::vector<RunTimePoolInfo>& runT nnAssert(to.buffer == nullptr); } else { auto poolIndex = from.location.poolIndex; - nnAssert(poolIndex < runTimePoolInfos.size()); - auto& r = runTimePoolInfos[poolIndex]; + nnAssert(poolIndex < requestPoolInfos.size()); + auto& r = requestPoolInfos[poolIndex]; to.buffer = r.buffer + from.location.offset; } } diff --git a/nn/common/Utils.cpp b/nn/common/Utils.cpp index f73b12cec..245626731 100644 --- a/nn/common/Utils.cpp +++ b/nn/common/Utils.cpp @@ -245,6 +245,16 @@ uint32_t alignBytesNeeded(uint32_t index, size_t length) { return extra; } +void logModelToInfo(const Model& model) { + LOG(INFO) << "Model start"; + LOG(INFO) << "operands" << toString(model.operands); + LOG(INFO) << "operations" << toString(model.operations); + LOG(INFO) << "inputIndexes" << toString(model.inputIndexes); + LOG(INFO) << "outputIndexes" << toString(model.outputIndexes); + LOG(INFO) << "operandValues size" << model.operandValues.size(); + LOG(INFO) << "pools" << toString(model.pools); +} + // Validates the type. The used dimensions can be underspecified. int validateOperandType(const ANeuralNetworksOperandType& type, const char* tag, bool allowPartial) { diff --git a/nn/common/include/CpuExecutor.h b/nn/common/include/CpuExecutor.h index dd92eaf1b..b765efc7c 100644 --- a/nn/common/include/CpuExecutor.h +++ b/nn/common/include/CpuExecutor.h @@ -55,10 +55,7 @@ struct RunTimeOperandInfo { uint32_t numberOfUsesLeft; Shape shape() const { - return Shape{.type = type, - .dimensions = dimensions, - .scale = scale, - .offset = zeroPoint}; + return Shape{.type = type, .dimensions = dimensions, .scale = scale, .offset = zeroPoint}; } }; @@ -72,6 +69,9 @@ struct RunTimePoolInfo { bool update(); }; +bool setRunTimePoolInfosFromHidlMemories(std::vector<RunTimePoolInfo>* poolInfos, + const hidl_vec<hidl_memory>& pools); + // This class is used to execute a model on the CPU. class CpuExecutor { public: @@ -80,17 +80,17 @@ public: // The model must outlive the executor. We prevent it from being modified // while this is executing. int run(const Model& model, const Request& request, - const std::vector<RunTimePoolInfo>& runTimePoolInfos); + const std::vector<RunTimePoolInfo>& modelPoolInfos, + const std::vector<RunTimePoolInfo>& requestPoolInfos); private: - bool initializeRunTimeInfo(const std::vector<RunTimePoolInfo>& runTimePoolInfos); + bool initializeRunTimeInfo(const std::vector<RunTimePoolInfo>& modelPoolInfos, + const std::vector<RunTimePoolInfo>& requestPoolInfos); // Runs one operation of the graph. int executeOperation(const Operation& entry); // Decrement the usage count for the operands listed. Frees the memory // allocated for any temporary variable with a count of zero. void freeNoLongerUsedOperands(const std::vector<uint32_t>& inputs); - void setLocationAndUses(RunTimeOperandInfo* to, const DataLocation& location, - const std::vector<RunTimePoolInfo>& runTimePoolInfos); // The model and the request that we'll execute. Only valid while run() // is being executed. diff --git a/nn/common/include/Utils.h b/nn/common/include/Utils.h index da035592b..3eebf2606 100644 --- a/nn/common/include/Utils.h +++ b/nn/common/include/Utils.h @@ -106,6 +106,9 @@ hidl_memory allocateSharedMemory(int64_t size); // to determine what this should be. uint32_t alignBytesNeeded(uint32_t index, size_t length); +// Does a detailed LOG(INFO) of the model +void logModelToInfo(const Model& model); + inline void setFromIntList(hidl_vec<uint32_t>* vec, uint32_t count, const uint32_t* data) { vec->resize(count); for (uint32_t i = 0; i < count; i++) { diff --git a/nn/driver/sample/SampleDriver.cpp b/nn/driver/sample/SampleDriver.cpp index 902d4e8c0..faeecae07 100644 --- a/nn/driver/sample/SampleDriver.cpp +++ b/nn/driver/sample/SampleDriver.cpp @@ -31,7 +31,10 @@ namespace sample_driver { Return<ErrorStatus> SampleDriver::prepareModel(const Model& model, const sp<IPreparedModelCallback>& callback) { - VLOG(DRIVER) << "prepareModel(" << toString(model) << ")"; // TODO errror + if (VLOG_IS_ON(DRIVER)) { + VLOG(DRIVER) << "prepareModel"; + logModelToInfo(model); + } if (callback.get() == nullptr) { LOG(ERROR) << "invalid callback passed to prepareModel"; return ErrorStatus::INVALID_ARGUMENT; @@ -42,9 +45,12 @@ Return<ErrorStatus> SampleDriver::prepareModel(const Model& model, } // TODO: make asynchronous later - sp<IPreparedModel> preparedModel = new SamplePreparedModel(model); + sp<SamplePreparedModel> preparedModel = new SamplePreparedModel(model); + if (!preparedModel->initialize()) { + callback->notify(ErrorStatus::INVALID_ARGUMENT, nullptr); + return ErrorStatus::INVALID_ARGUMENT; + } callback->notify(ErrorStatus::NONE, preparedModel); - return ErrorStatus::NONE; } @@ -64,27 +70,20 @@ int SampleDriver::run() { return 1; } -static bool mapPools(std::vector<RunTimePoolInfo>* poolInfos, const hidl_vec<hidl_memory>& pools) { - poolInfos->resize(pools.size()); - for (size_t i = 0; i < pools.size(); i++) { - auto& poolInfo = (*poolInfos)[i]; - if (!poolInfo.set(pools[i])) { - return false; - } - } - return true; +bool SamplePreparedModel::initialize() { + return setRunTimePoolInfosFromHidlMemories(&mPoolInfos, mModel.pools); } void SamplePreparedModel::asyncExecute(const Request& request, const sp<IExecutionCallback>& callback) { - std::vector<RunTimePoolInfo> poolInfo; - if (!mapPools(&poolInfo, request.pools)) { + std::vector<RunTimePoolInfo> requestPoolInfos; + if (!setRunTimePoolInfosFromHidlMemories(&requestPoolInfos, request.pools)) { callback->notify(ErrorStatus::GENERAL_FAILURE); return; } CpuExecutor executor; - int n = executor.run(mModel, request, poolInfo); + int n = executor.run(mModel, request, mPoolInfos, requestPoolInfos); VLOG(DRIVER) << "executor.run returned " << n; ErrorStatus executionStatus = n == ANEURALNETWORKS_NO_ERROR ? ErrorStatus::NONE : ErrorStatus::GENERAL_FAILURE; diff --git a/nn/driver/sample/SampleDriver.h b/nn/driver/sample/SampleDriver.h index 51581fed7..7e95c952b 100644 --- a/nn/driver/sample/SampleDriver.h +++ b/nn/driver/sample/SampleDriver.h @@ -17,6 +17,7 @@ #ifndef ANDROID_ML_NN_SAMPLE_DRIVER_SAMPLE_DRIVER_H #define ANDROID_ML_NN_SAMPLE_DRIVER_SAMPLE_DRIVER_H +#include "CpuExecutor.h" #include "HalInterfaces.h" #include "NeuralNetworks.h" @@ -52,12 +53,15 @@ public: : // Make a copy of the model, as we need to preserve it. mModel(model) {} ~SamplePreparedModel() override {} + bool initialize(); Return<ErrorStatus> execute(const Request& request, const sp<IExecutionCallback>& callback) override; private: void asyncExecute(const Request& request, const sp<IExecutionCallback>& callback); + Model mModel; + std::vector<RunTimePoolInfo> mPoolInfos; }; } // namespace sample_driver diff --git a/nn/runtime/ExecutionBuilder.cpp b/nn/runtime/ExecutionBuilder.cpp index 56dc723b2..077e068d8 100644 --- a/nn/runtime/ExecutionBuilder.cpp +++ b/nn/runtime/ExecutionBuilder.cpp @@ -96,8 +96,7 @@ ExecutionBuilder::ExecutionBuilder(const CompilationBuilder* compilation) : mModel(compilation->mModel), mPlan(&compilation->mPlan), mInputs(mModel->inputCount()), - mOutputs(mModel->outputCount()), - mMemories(mModel->getMemories()) { + mOutputs(mModel->outputCount()) { VLOG(EXECUTION) << "ExecutionBuilder::ExecutionBuilder"; } @@ -600,10 +599,11 @@ int StepExecutor::startComputeOnDevice(sp<ExecutionCallback>* synchronizationCal } static void asyncStartComputeOnCpu(const Model& model, const Request& request, - const std::vector<RunTimePoolInfo>& runTimePoolInfos, + const std::vector<RunTimePoolInfo>& modelPoolInfos, + const std::vector<RunTimePoolInfo>& requestPoolInfos, const sp<IExecutionCallback>& executionCallback) { CpuExecutor executor; - int err = executor.run(model, request, runTimePoolInfos); + int err = executor.run(model, request, modelPoolInfos, requestPoolInfos); ErrorStatus status = err == ANEURALNETWORKS_NO_ERROR ? ErrorStatus::NONE : ErrorStatus::GENERAL_FAILURE; executionCallback->notify(status); @@ -622,23 +622,30 @@ int StepExecutor::startComputeOnCpu(sp<ExecutionCallback>* synchronizationCallba sp<ExecutionCallback> executionCallback = new ExecutionCallback(); *synchronizationCallback = nullptr; - std::vector<RunTimePoolInfo> runTimePoolInfos; + std::vector<RunTimePoolInfo> modelPoolInfos; + if (!setRunTimePoolInfosFromHidlMemories(&modelPoolInfos, model.pools)) { + return ANEURALNETWORKS_UNMAPPABLE; + } + + std::vector<RunTimePoolInfo> requestPoolInfos; uint32_t count = mMemories.size(); - runTimePoolInfos.resize(count); + requestPoolInfos.resize(count); for (uint32_t i = 0; i < count; i++) { const Memory* mem = mMemories[i]; - runTimePoolInfos[i].set(mem->getHidlMemory()); + if (!requestPoolInfos[i].set(mem->getHidlMemory())) { + return ANEURALNETWORKS_UNMAPPABLE; + } } // Create as many pools as there are input / output. - auto fixPointerArguments = [&runTimePoolInfos](std::vector<ModelArgumentInfo>& argumentInfos) { + auto fixPointerArguments = [&requestPoolInfos](std::vector<ModelArgumentInfo>& argumentInfos) { for (ModelArgumentInfo& argumentInfo : argumentInfos) { if (argumentInfo.state == ModelArgumentInfo::POINTER) { RunTimePoolInfo runTimeInfo = { .buffer = static_cast<uint8_t*>(argumentInfo.buffer)}; argumentInfo.locationAndLength.poolIndex = - static_cast<uint32_t>(runTimePoolInfos.size()); + static_cast<uint32_t>(requestPoolInfos.size()); argumentInfo.locationAndLength.offset = 0; - runTimePoolInfos.push_back(runTimeInfo); + requestPoolInfos.push_back(runTimeInfo); } } }; @@ -651,7 +658,8 @@ int StepExecutor::startComputeOnCpu(sp<ExecutionCallback>* synchronizationCallba // TODO: should model be moved with a std::cref? std::thread thread(asyncStartComputeOnCpu, model, std::move(request), - std::move(runTimePoolInfos), executionCallback); + std::move(modelPoolInfos), std::move(requestPoolInfos), + executionCallback); executionCallback->bind_thread(std::move(thread)); *synchronizationCallback = executionCallback; diff --git a/nn/runtime/ExecutionPlan.cpp b/nn/runtime/ExecutionPlan.cpp index d2f74d5cf..009fc3366 100644 --- a/nn/runtime/ExecutionPlan.cpp +++ b/nn/runtime/ExecutionPlan.cpp @@ -360,9 +360,11 @@ int ExecutionStep::finishSubModel(const ModelBuilder* fromModel, bool* hasOutput void ExecutionStep::dump() const { Model model; mSubModel->setHidlModel(&model); - VLOG(COMPILATION) << "ExecutionStep#" << mIndex - << " for " << (mDevice == nullptr ? "CPU" : mDevice->getName()) - << " submodel: " << toString(model); + if (VLOG_IS_ON(COMPILATION)) { + VLOG(COMPILATION) << "ExecutionStep#" << mIndex + << " for " << (mDevice == nullptr ? "CPU" : mDevice->getName()); + logModelToInfo(model); + } } int ExecutionPlan::CompoundBody::finish(const ModelBuilder* fromModel) { @@ -750,8 +752,8 @@ int ModelBuilder::partitionTheWork(const std::vector<std::shared_ptr<Device>>& d if (VLOG_IS_ON(COMPILATION)) { Model model; setHidlModel(&model); - VLOG(COMPILATION) << "ModelBuilder::partitionTheWork: original model: " - << toString(model); + VLOG(COMPILATION) << "ModelBuilder::partitionTheWork: original model: "; + logModelToInfo(model); plan->dump(); } return n; diff --git a/nn/runtime/Memory.cpp b/nn/runtime/Memory.cpp index 9b05dbf4e..5660e0272 100644 --- a/nn/runtime/Memory.cpp +++ b/nn/runtime/Memory.cpp @@ -109,12 +109,14 @@ int MemoryFd::getPointer(uint8_t** buffer) const { } uint32_t MemoryTracker::add(const Memory* memory) { + VLOG(MODEL) << __func__ << " for " << memory; // See if we already have this memory. If so, // return its index. auto i = mKnown.find(memory); if (i != mKnown.end()) { return i->second; } + VLOG(MODEL) << "It's new"; // It's a new one. Save it an assign an index to it. size_t next = mKnown.size(); if (next > 0xFFFFFFFF) { diff --git a/nn/runtime/ModelBuilder.cpp b/nn/runtime/ModelBuilder.cpp index 2274b89c7..f446beeb2 100644 --- a/nn/runtime/ModelBuilder.cpp +++ b/nn/runtime/ModelBuilder.cpp @@ -58,6 +58,7 @@ int ModelBuilder::addOperand(const ANeuralNetworksOperandType& type) { } int ModelBuilder::setOperandValue(uint32_t index, const void* buffer, size_t length) { + VLOG(MODEL) << __func__ << " for operand " << index << " size " << length; if (index >= operandCount()) { LOG(ERROR) << "ANeuralNetworksModel_setOperandValue setting operand " << index << " of " << operandCount(); @@ -76,25 +77,81 @@ int ModelBuilder::setOperandValue(uint32_t index, const void* buffer, size_t len .offset = 0, .length = 0}; } else { + if (length > 0xFFFFFFFF) { + LOG(ERROR) << "ANeuralNetworksModel_setOperandValue value length of " << length + << " exceeds max size"; + return ANEURALNETWORKS_BAD_DATA; + } + uint32_t valueLength = static_cast<uint32_t>(length); uint32_t neededLength = sizeOfData(operand.type, operand.dimensions); - if (neededLength != length) { - LOG(ERROR) << "ANeuralNetworksModel_setOperandValue setting " << length + if (neededLength != valueLength) { + LOG(ERROR) << "ANeuralNetworksModel_setOperandValue setting " << valueLength << " bytes when needing " << neededLength; return ANEURALNETWORKS_BAD_DATA; } - uint32_t existingSize = static_cast<uint32_t>(mOperandValues.size()); - uint32_t extraBytes = alignBytesNeeded(existingSize, length); - mOperandValues.resize(existingSize + extraBytes + length); - operand.lifetime = OperandLifeTime::CONSTANT_COPY; - operand.location = { - .poolIndex = 0, .offset = existingSize + extraBytes, .length = neededLength}; - memcpy(&mOperandValues[operand.location.offset], buffer, length); + if (valueLength <= ANEURALNETWORKS_MAX_SIZE_OF_IMMEDIATELY_COPIED_VALUES) { + uint32_t existingSize = static_cast<uint32_t>(mSmallOperandValues.size()); + uint32_t extraBytes = alignBytesNeeded(existingSize, valueLength); + mSmallOperandValues.resize(existingSize + extraBytes + valueLength); + operand.lifetime = OperandLifeTime::CONSTANT_COPY; + operand.location = { + .poolIndex = 0, .offset = existingSize + extraBytes, .length = neededLength}; + memcpy(&mSmallOperandValues[operand.location.offset], buffer, valueLength); + VLOG(MODEL) << "Copied small value to offset " << operand.location.offset; + } else { + VLOG(MODEL) << "Saving large value"; + operand.lifetime = OperandLifeTime::CONSTANT_REFERENCE; + // The values for poolIndex and offset will be set when the model is finished. + operand.location = {.poolIndex = 0, .offset = 0, .length = valueLength}; + // We keep track of the buffers. We'll allocate the shared memory only + // once we know the total size, to avoid needless copies. + mLargeOperandValues.push_back(LargeValue{.operandIndex = index, .buffer = buffer}); + } + } + return ANEURALNETWORKS_NO_ERROR; +} + +int ModelBuilder::copyLargeValuesToSharedMemory() { + VLOG(MODEL) << __func__ << " has " << mLargeOperandValues.size() << " values."; + if (!mLargeOperandValues.empty()) { + // Calculate the size of the shared memory needed for all the large values. + // Also sets the offset for each value within the memory. + size_t poolSize = 0; + for (LargeValue& l: mLargeOperandValues) { + Operand& operand = mOperands[l.operandIndex]; + nnAssert(operand.lifetime == OperandLifeTime::CONSTANT_REFERENCE); + poolSize += alignBytesNeeded(poolSize, operand.location.length); + operand.location.offset = poolSize; + poolSize += operand.location.length; + } + + // Allocated the shared memory. + int n = mLargeValueMemory.create(poolSize); + if (n != ANEURALNETWORKS_NO_ERROR) { + return n; + } + uint8_t* memoryPointer = nullptr; + n = mLargeValueMemory.getPointer(&memoryPointer); + if (n != ANEURALNETWORKS_NO_ERROR) { + return n; + } + uint32_t poolIndex = mMemories.add(&mLargeValueMemory); + VLOG(MODEL) << "Allocated large value pool of size " << poolSize << " at index " + << poolIndex; + + // Copy the values to this memory. + for (LargeValue& l: mLargeOperandValues) { + Operand& operand = mOperands[l.operandIndex]; + operand.location.poolIndex = poolIndex; + memcpy(memoryPointer + operand.location.offset, l.buffer, operand.location.length); + } } return ANEURALNETWORKS_NO_ERROR; } int ModelBuilder::setOperandValueFromMemory(uint32_t index, const Memory* memory, uint32_t offset, size_t length) { + VLOG(MODEL) << __func__ << " for operand " << index << " offset " << offset << " size " << length; if (index >= operandCount()) { LOG(ERROR) << "ANeuralNetworksModel_setOperandValueFromMemory setting operand " << index << " of " << operandCount(); @@ -223,8 +280,14 @@ int ModelBuilder::finish() { return ANEURALNETWORKS_BAD_STATE; } + int n = copyLargeValuesToSharedMemory(); + if (n != ANEURALNETWORKS_NO_ERROR) { + return n; + } + // We sort the operations so that they will be in the appropriate // order for a single-threaded, op at a time execution. + // TODO: we don't need this if we always run the partitioner. sortIntoRunOrder(); mCompletedModel = true; return ANEURALNETWORKS_NO_ERROR; @@ -282,7 +345,7 @@ void ModelBuilder::setHidlModel(Model* model) const { model->operations = mOperations; model->inputIndexes = mInputIndexes; model->outputIndexes = mOutputIndexes; - model->operandValues = mOperandValues; + model->operandValues = mSmallOperandValues; uint32_t count = mMemories.size(); model->pools.resize(count); diff --git a/nn/runtime/ModelBuilder.h b/nn/runtime/ModelBuilder.h index edb646614..d5ab078bf 100644 --- a/nn/runtime/ModelBuilder.h +++ b/nn/runtime/ModelBuilder.h @@ -78,7 +78,7 @@ public: const MemoryTracker& getMemories() const { return mMemories; } const std::vector<Operation>& getOperations() const { return mOperations; } const uint8_t* getPointerToOperandValue(uint32_t offset) const { - return mOperandValues.data() + offset; + return mSmallOperandValues.data() + offset; } int partitionTheWork(const std::vector<std::shared_ptr<Device>>& devices, @@ -99,12 +99,9 @@ public: // Sorts the operations to be in the correct order for single threaded // node-at-a-time execution. void sortIntoRunOrder(); - /* - int32_t getOperandIndex(const ArrayInfo& info, uint32_t listIndex) const { - nnAssert(listIndex < info.count); - return mOperandIndexes[info.offset + listIndex]; - } - */ + + // Copies the large values to a shared memory, if we have any. + int copyLargeValuesToSharedMemory(); // The operations of the graph. std::vector<Operation> mOperations; @@ -118,11 +115,18 @@ public: MemoryTracker mMemories; - // The value of the operands that are defined at model + // The value of the small operands that are defined at model // creation time. - // TODO We are copying all the values. Once we support memory - // pools, revisit. - std::vector<uint8_t> mOperandValues; + std::vector<uint8_t> mSmallOperandValues; + + struct LargeValue { + uint32_t operandIndex; + const void* buffer; + }; + // Operand index and buffer pointer for all the large operand values of this model. + std::vector<LargeValue> mLargeOperandValues; + // The shared memory region that will contain the large values. + Memory mLargeValueMemory; // Once the model has been finished, we should not allow further // modifications to the model. diff --git a/nn/runtime/NeuralNetworks.cpp b/nn/runtime/NeuralNetworks.cpp index 979ca7fc1..3766e3b0e 100644 --- a/nn/runtime/NeuralNetworks.cpp +++ b/nn/runtime/NeuralNetworks.cpp @@ -36,83 +36,86 @@ // Make sure the constants defined in the header files have not changed values. // IMPORTANT: When adding new values, update kNumberOfDataTypes or kNumberOfDataTypesOEM // in Utils.h. -static_assert(ANEURALNETWORKS_FLOAT32 == 0, "ANEURALNETWORKS_FLOAT32 may have changed"); -static_assert(ANEURALNETWORKS_INT32 == 1, "ANEURALNETWORKS_INT32 may have changed"); -static_assert(ANEURALNETWORKS_UINT32 == 2, "ANEURALNETWORKS_UINT32 may have changed"); +static_assert(ANEURALNETWORKS_FLOAT32 == 0, "ANEURALNETWORKS_FLOAT32 has changed"); +static_assert(ANEURALNETWORKS_INT32 == 1, "ANEURALNETWORKS_INT32 has changed"); +static_assert(ANEURALNETWORKS_UINT32 == 2, "ANEURALNETWORKS_UINT32 has changed"); static_assert(ANEURALNETWORKS_TENSOR_FLOAT32 == 3, - "ANEURALNETWORKS_TENSOR_FLOAT32 may have changed"); -static_assert(ANEURALNETWORKS_TENSOR_INT32 == 4, "ANEURALNETWORKS_TENSOR_INT32 may have changed"); + "ANEURALNETWORKS_TENSOR_FLOAT32 has changed"); +static_assert(ANEURALNETWORKS_TENSOR_INT32 == 4, "ANEURALNETWORKS_TENSOR_INT32 has changed"); static_assert(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM == 5, - "ANEURALNETWORKS_TENSOR_QUANT8_ASYMM may have changed"); -static_assert(ANEURALNETWORKS_OEM_SCALAR == 10000, "ANEURALNETWORKS_OEM_SCALAR may have changed"); + "ANEURALNETWORKS_TENSOR_QUANT8_ASYMM has changed"); +static_assert(ANEURALNETWORKS_OEM_SCALAR == 10000, "ANEURALNETWORKS_OEM_SCALAR has changed"); static_assert(ANEURALNETWORKS_TENSOR_OEM_BYTE == 10001, - "ANEURALNETWORKS_TENSOR_OEM_BYTE may have changed"); + "ANEURALNETWORKS_TENSOR_OEM_BYTE has changed"); // IMPORTANT: When adding new values, update kNumberOfOperationTypes or // kNumberOfOperationTypesOEMin Utils.h. -static_assert(ANEURALNETWORKS_ADD == 0, "ANEURALNETWORKS_ADD may have changed"); +static_assert(ANEURALNETWORKS_ADD == 0, "ANEURALNETWORKS_ADD has changed"); static_assert(ANEURALNETWORKS_AVERAGE_POOL_2D == 1, - "ANEURALNETWORKS_AVERAGE_POOL_2D may have changed"); -static_assert(ANEURALNETWORKS_CONCATENATION == 2, "ANEURALNETWORKS_CONCATENATION may have changed"); -static_assert(ANEURALNETWORKS_CONV_2D == 3, "ANEURALNETWORKS_CONV_2D may have changed"); + "ANEURALNETWORKS_AVERAGE_POOL_2D has changed"); +static_assert(ANEURALNETWORKS_CONCATENATION == 2, "ANEURALNETWORKS_CONCATENATION has changed"); +static_assert(ANEURALNETWORKS_CONV_2D == 3, "ANEURALNETWORKS_CONV_2D has changed"); static_assert(ANEURALNETWORKS_DEPTHWISE_CONV_2D == 4, - "ANEURALNETWORKS_DEPTHWISE_CONV_2D may have changed"); + "ANEURALNETWORKS_DEPTHWISE_CONV_2D has changed"); static_assert(ANEURALNETWORKS_DEPTH_TO_SPACE == 5, - "ANEURALNETWORKS_DEPTH_TO_SPACE may have changed"); -static_assert(ANEURALNETWORKS_DEQUANTIZE == 6, "ANEURALNETWORKS_DEQUANTIZE may have changed"); + "ANEURALNETWORKS_DEPTH_TO_SPACE has changed"); +static_assert(ANEURALNETWORKS_DEQUANTIZE == 6, "ANEURALNETWORKS_DEQUANTIZE has changed"); static_assert(ANEURALNETWORKS_EMBEDDING_LOOKUP == 7, - "ANEURALNETWORKS_EMBEDDING_LOOKUP may have changed"); -static_assert(ANEURALNETWORKS_FLOOR == 8, "ANEURALNETWORKS_FLOOR may have changed"); + "ANEURALNETWORKS_EMBEDDING_LOOKUP has changed"); +static_assert(ANEURALNETWORKS_FLOOR == 8, "ANEURALNETWORKS_FLOOR has changed"); static_assert(ANEURALNETWORKS_FULLY_CONNECTED == 9, - "ANEURALNETWORKS_FULLY_CONNECTED may have changed"); + "ANEURALNETWORKS_FULLY_CONNECTED has changed"); static_assert(ANEURALNETWORKS_HASHTABLE_LOOKUP == 10, - "ANEURALNETWORKS_HASHTABLE_LOOKUP may have changed"); + "ANEURALNETWORKS_HASHTABLE_LOOKUP has changed"); static_assert(ANEURALNETWORKS_L2_NORMALIZATION == 11, - "ANEURALNETWORKS_L2_NORMALIZATION may have changed"); -static_assert(ANEURALNETWORKS_L2_POOL_2D == 12, "ANEURALNETWORKS_L2_POOL may have changed"); + "ANEURALNETWORKS_L2_NORMALIZATION has changed"); +static_assert(ANEURALNETWORKS_L2_POOL_2D == 12, "ANEURALNETWORKS_L2_POOL has changed"); static_assert(ANEURALNETWORKS_LOCAL_RESPONSE_NORMALIZATION == 13, - "ANEURALNETWORKS_LOCAL_RESPONSE_NORMALIZATION may have changed"); -static_assert(ANEURALNETWORKS_LOGISTIC == 14, "ANEURALNETWORKS_LOGISTIC may have changed"); + "ANEURALNETWORKS_LOCAL_RESPONSE_NORMALIZATION has changed"); +static_assert(ANEURALNETWORKS_LOGISTIC == 14, "ANEURALNETWORKS_LOGISTIC has changed"); static_assert(ANEURALNETWORKS_LSH_PROJECTION == 15, - "ANEURALNETWORKS_LSH_PROJECTION may have changed"); -static_assert(ANEURALNETWORKS_LSTM == 16, "ANEURALNETWORKS_LSTM may have changed"); -static_assert(ANEURALNETWORKS_MAX_POOL_2D == 17, "ANEURALNETWORKS_MAX_POOL may have changed"); -static_assert(ANEURALNETWORKS_MUL == 18, "ANEURALNETWORKS_MUL may have changed"); -static_assert(ANEURALNETWORKS_RELU == 19, "ANEURALNETWORKS_RELU may have changed"); -static_assert(ANEURALNETWORKS_RELU1 == 20, "ANEURALNETWORKS_RELU1 may have changed"); -static_assert(ANEURALNETWORKS_RELU6 == 21, "ANEURALNETWORKS_RELU6 may have changed"); -static_assert(ANEURALNETWORKS_RESHAPE == 22, "ANEURALNETWORKS_RESHAPE may have changed"); + "ANEURALNETWORKS_LSH_PROJECTION has changed"); +static_assert(ANEURALNETWORKS_LSTM == 16, "ANEURALNETWORKS_LSTM has changed"); +static_assert(ANEURALNETWORKS_MAX_POOL_2D == 17, "ANEURALNETWORKS_MAX_POOL has changed"); +static_assert(ANEURALNETWORKS_MUL == 18, "ANEURALNETWORKS_MUL has changed"); +static_assert(ANEURALNETWORKS_RELU == 19, "ANEURALNETWORKS_RELU has changed"); +static_assert(ANEURALNETWORKS_RELU1 == 20, "ANEURALNETWORKS_RELU1 has changed"); +static_assert(ANEURALNETWORKS_RELU6 == 21, "ANEURALNETWORKS_RELU6 has changed"); +static_assert(ANEURALNETWORKS_RESHAPE == 22, "ANEURALNETWORKS_RESHAPE has changed"); static_assert(ANEURALNETWORKS_RESIZE_BILINEAR == 23, - "ANEURALNETWORKS_RESIZE_BILINEAR may have changed"); -static_assert(ANEURALNETWORKS_RNN == 24, "ANEURALNETWORKS_RNN may have changed"); -static_assert(ANEURALNETWORKS_SOFTMAX == 25, "ANEURALNETWORKS_SOFTMAX may have changed"); + "ANEURALNETWORKS_RESIZE_BILINEAR has changed"); +static_assert(ANEURALNETWORKS_RNN == 24, "ANEURALNETWORKS_RNN has changed"); +static_assert(ANEURALNETWORKS_SOFTMAX == 25, "ANEURALNETWORKS_SOFTMAX has changed"); static_assert(ANEURALNETWORKS_SPACE_TO_DEPTH == 26, - "ANEURALNETWORKS_SPACE_TO_DEPTH may have changed"); -static_assert(ANEURALNETWORKS_SVDF == 27, "ANEURALNETWORKS_SVDF may have changed"); -static_assert(ANEURALNETWORKS_TANH == 28, "ANEURALNETWORKS_TANH may have changed"); + "ANEURALNETWORKS_SPACE_TO_DEPTH has changed"); +static_assert(ANEURALNETWORKS_SVDF == 27, "ANEURALNETWORKS_SVDF has changed"); +static_assert(ANEURALNETWORKS_TANH == 28, "ANEURALNETWORKS_TANH has changed"); static_assert(ANEURALNETWORKS_OEM_OPERATION == 10000, - "ANEURALNETWORKS_OEM_OPERATION may have changed"); + "ANEURALNETWORKS_OEM_OPERATION has changed"); -static_assert(ANEURALNETWORKS_FUSED_NONE == 0, "ANEURALNETWORKS_FUSED_NONE may have changed"); -static_assert(ANEURALNETWORKS_FUSED_RELU == 1, "ANEURALNETWORKS_FUSED_RELU may have changed"); -static_assert(ANEURALNETWORKS_FUSED_RELU1 == 2, "ANEURALNETWORKS_FUSED_RELU1 may have changed"); -static_assert(ANEURALNETWORKS_FUSED_RELU6 == 3, "ANEURALNETWORKS_FUSED_RELU6 may have changed"); +static_assert(ANEURALNETWORKS_FUSED_NONE == 0, "ANEURALNETWORKS_FUSED_NONE has changed"); +static_assert(ANEURALNETWORKS_FUSED_RELU == 1, "ANEURALNETWORKS_FUSED_RELU has changed"); +static_assert(ANEURALNETWORKS_FUSED_RELU1 == 2, "ANEURALNETWORKS_FUSED_RELU1 has changed"); +static_assert(ANEURALNETWORKS_FUSED_RELU6 == 3, "ANEURALNETWORKS_FUSED_RELU6 has changed"); static_assert(ANEURALNETWORKS_PREFER_LOW_POWER == 0, - "ANEURALNETWORKS_PREFER_LOW_POWER may have changed"); + "ANEURALNETWORKS_PREFER_LOW_POWER has changed"); static_assert(ANEURALNETWORKS_PREFER_FAST_SINGLE_ANSWER == 1, - "ANEURALNETWORKS_PREFER_FAST_SINGLE_ANSWER may have changed"); + "ANEURALNETWORKS_PREFER_FAST_SINGLE_ANSWER has changed"); static_assert(ANEURALNETWORKS_PREFER_SUSTAINED_SPEED == 2, - "ANEURALNETWORKS_PREFER_SUSTAINED_SPEED may have changed"); + "ANEURALNETWORKS_PREFER_SUSTAINED_SPEED has changed"); -static_assert(ANEURALNETWORKS_NO_ERROR == 0, "ANEURALNETWORKS_NO_ERROR may have changed"); -static_assert(ANEURALNETWORKS_OUT_OF_MEMORY == 1, "ANEURALNETWORKS_OUT_OF_MEMORY may have changed"); -static_assert(ANEURALNETWORKS_INCOMPLETE == 2, "ANEURALNETWORKS_INCOMPLETE may have changed"); +static_assert(ANEURALNETWORKS_NO_ERROR == 0, "ANEURALNETWORKS_NO_ERROR has changed"); +static_assert(ANEURALNETWORKS_OUT_OF_MEMORY == 1, "ANEURALNETWORKS_OUT_OF_MEMORY has changed"); +static_assert(ANEURALNETWORKS_INCOMPLETE == 2, "ANEURALNETWORKS_INCOMPLETE has changed"); static_assert(ANEURALNETWORKS_UNEXPECTED_NULL == 3, - "ANEURALNETWORKS_UNEXPECTED_NULL may have changed"); -static_assert(ANEURALNETWORKS_BAD_DATA == 4, "ANEURALNETWORKS_BAD_DATA may have changed"); -static_assert(ANEURALNETWORKS_OP_FAILED == 5, "ANEURALNETWORKS_OP_FAILED may have changed"); -static_assert(ANEURALNETWORKS_BAD_STATE == 6, "ANEURALNETWORKS_BAD_STATE may have changed"); + "ANEURALNETWORKS_UNEXPECTED_NULL has changed"); +static_assert(ANEURALNETWORKS_BAD_DATA == 4, "ANEURALNETWORKS_BAD_DATA has changed"); +static_assert(ANEURALNETWORKS_OP_FAILED == 5, "ANEURALNETWORKS_OP_FAILED has changed"); +static_assert(ANEURALNETWORKS_BAD_STATE == 6, "ANEURALNETWORKS_BAD_STATE has changed"); + +static_assert(ANEURALNETWORKS_MAX_SIZE_OF_IMMEDIATELY_COPIED_VALUES == 128, + "ANEURALNETWORKS_MAX_SIZE_OF_IMMEDIATELY_COPIED_VALUES has changed"); // Make sure that the constants are compatible with the values defined in // hardware/interfaces/neuralnetworks/1.0/types.hal. diff --git a/nn/runtime/include/NeuralNetworks.h b/nn/runtime/include/NeuralNetworks.h index 3d93ef034..7bb9e32a7 100644 --- a/nn/runtime/include/NeuralNetworks.h +++ b/nn/runtime/include/NeuralNetworks.h @@ -1205,6 +1205,15 @@ typedef enum { } ResultCode; /** + * For {@link ANeuralNetworksModel_setOperandValue}, values with a + * length smaller or equal to this will be immediately copied into + * the model. The size is in bytes. + */ +enum { + ANEURALNETWORKS_MAX_SIZE_OF_IMMEDIATELY_COPIED_VALUES = 128 +}; + +/** * ANeuralNetworksMemory is an opaque type that represents memory. * * This type is used to represent shared memory, memory mapped files, @@ -1468,13 +1477,18 @@ int ANeuralNetworksModel_addOperand(ANeuralNetworksModel* model, /** * Sets an operand to a constant value. * - * For scalar values, the content of buffer is copied into the model. + * Values of length smaller or equal to + * {@link ANEURALNETWORKS_MAX_SIZE_OF_IMMEDIATELY_COPIED_VALUES} + * are immediately copied into the model. + * + * For values of length greater than {@link ANEURALNETWORKS_MAX_SIZE_OF_IMMEDIATELY_COPIED_VALUES}, + * a pointer to the buffer is stored within the model. The application is responsible + * for not changing the content of this region until all executions using this model + * have completed. As the data may be copied during processing, modifying the data + * after this call yields undefined results. * - * For tensor values, a pointer to the buffer is stored within the model. - * The application is responsible for not changing the content of this region - * until all executions using this model have completed. As the data may - * be copied during processing, modifying the data after this call yields - * undefined results. + * For large tensors, using {@link ANeuralNetworksModel_setOperandValueFromMemory} + * is likely to be more efficient. * * To indicate that an optional operand should be considered missing, * pass nullptr for buffer and 0 for length. |