summaryrefslogtreecommitdiff
path: root/nn
diff options
context:
space:
mode:
Diffstat (limited to 'nn')
-rw-r--r--nn/TEST_MAPPING16
-rw-r--r--nn/common/Utils.cpp44
-rw-r--r--nn/common/include/Utils.h2
-rw-r--r--nn/runtime/Manager.cpp8
-rw-r--r--nn/runtime/VersionedInterfaces.cpp27
-rw-r--r--nn/runtime/test/TestCompliance.cpp32
-rw-r--r--nn/runtime/test/TestPartitioning.cpp124
7 files changed, 198 insertions, 55 deletions
diff --git a/nn/TEST_MAPPING b/nn/TEST_MAPPING
index f4d4501f9..f3eaa9940 100644
--- a/nn/TEST_MAPPING
+++ b/nn/TEST_MAPPING
@@ -1,10 +1,22 @@
{
"presubmit": [
{
- "name": "CtsNNAPITestCases"
+ "name": "CtsNNAPITestCases",
+ "options": [
+ {
+ // b/153876253, temporarily filter out failing l2_norm tests
+ "include-filter": "-*l2_normalization_axis_corner_case*"
+ }
+ ]
},
{
- "name": "NeuralNetworksTest_static"
+ "name": "NeuralNetworksTest_static",
+ "options": [
+ {
+ // b/153876253, temporarily filter out failing l2_norm tests
+ "include-filter": "-*l2_normalization_axis_corner_case*"
+ }
+ ]
},
{
"name": "NeuralNetworksTest_utils"
diff --git a/nn/common/Utils.cpp b/nn/common/Utils.cpp
index cd97ffa52..81e5cf1e1 100644
--- a/nn/common/Utils.cpp
+++ b/nn/common/Utils.cpp
@@ -21,6 +21,8 @@
#include <android-base/logging.h>
#include <android-base/properties.h>
#include <android-base/strings.h>
+#include <errno.h>
+#include <poll.h>
#include <sys/system_properties.h>
#include <algorithm>
@@ -32,9 +34,6 @@
#include <utility>
#include <vector>
-#include <errno.h>
-#include <poll.h>
-
#include "ControlFlow.h"
#include "NeuralNetworks.h"
#include "NeuralNetworksOEM.h"
@@ -3100,7 +3099,22 @@ bool compliantWithV1_0(const V1_0::Request& request) {
bool compliantWithV1_0(const V1_3::Request& request) {
return std::all_of(request.pools.begin(), request.pools.end(), [](const auto& pool) {
- return pool.getDiscriminator() == V1_3::Request::MemoryPool::hidl_discriminator::hidlMemory;
+ if (pool.getDiscriminator() != V1_3::Request::MemoryPool::hidl_discriminator::hidlMemory) {
+ return false;
+ }
+ const auto& name = pool.hidlMemory().name();
+ return name == "ashmem" || name == "mmap_fd";
+ });
+}
+
+bool compliantWithV1_2(const V1_3::Request& request) {
+ return std::all_of(request.pools.begin(), request.pools.end(), [](const auto& pool) {
+ if (pool.getDiscriminator() != V1_3::Request::MemoryPool::hidl_discriminator::hidlMemory) {
+ return false;
+ }
+ const auto& name = pool.hidlMemory().name();
+ return name == "ashmem" || name == "mmap_fd" || name == "hardware_buffer_blob" ||
+ name == "hardware_buffer";
});
}
@@ -3123,17 +3137,29 @@ V1_0::Request convertToV1_0(const V1_0::Request& request) {
return request;
}
-V1_0::Request convertToV1_0(const V1_3::Request& request) {
- if (!compliantWithV1_0(request)) {
- LOG(ERROR) << "Upcasting non-compliant request " << SHOW_IF_DEBUG(toString(request))
- << " from V1_3::Request to V1_0::Request";
- }
+static V1_0::Request uncheckedConvertToV1_0(const V1_3::Request& request) {
hidl_vec<hidl_memory> pools(request.pools.size());
std::transform(request.pools.begin(), request.pools.end(), pools.begin(),
[](const auto& pool) { return convertToV1_0(pool); });
return {.inputs = request.inputs, .outputs = request.outputs, .pools = std::move(pools)};
}
+V1_0::Request convertToV1_0(const V1_3::Request& request) {
+ if (!compliantWithV1_0(request)) {
+ LOG(ERROR) << "Upcasting non-compliant request " << SHOW_IF_DEBUG(toString(request))
+ << " from V1_3::Request to V1_0::Request of version 1.0";
+ }
+ return uncheckedConvertToV1_0(request);
+}
+
+V1_0::Request convertToV1_2(const V1_3::Request& request) {
+ if (!compliantWithV1_2(request)) {
+ LOG(ERROR) << "Upcasting non-compliant request " << SHOW_IF_DEBUG(toString(request))
+ << " from V1_3::Request to V1_0::Request of version 1.2";
+ }
+ return uncheckedConvertToV1_0(request);
+}
+
V1_3::Request convertToV1_3(const V1_0::Request& request) {
hidl_vec<V1_3::Request::MemoryPool> pools(request.pools.size());
std::transform(request.pools.begin(), request.pools.end(), pools.begin(),
diff --git a/nn/common/include/Utils.h b/nn/common/include/Utils.h
index 24e69211c..ca11c5ebc 100644
--- a/nn/common/include/Utils.h
+++ b/nn/common/include/Utils.h
@@ -530,9 +530,11 @@ hal::hidl_vec<hal::V1_3::Operand> convertToV1_3(const hal::hidl_vec<hal::V1_3::O
bool compliantWithV1_0(const hal::V1_0::Request& request);
bool compliantWithV1_0(const hal::V1_3::Request& request);
+bool compliantWithV1_2(const hal::V1_3::Request& request);
hal::V1_0::Request convertToV1_0(const hal::V1_0::Request& request);
hal::V1_0::Request convertToV1_0(const hal::V1_3::Request& request);
+hal::V1_0::Request convertToV1_2(const hal::V1_3::Request& request);
hal::V1_3::Request convertToV1_3(const hal::V1_0::Request& request);
hal::V1_3::Request convertToV1_3(const hal::V1_3::Request& request);
diff --git a/nn/runtime/Manager.cpp b/nn/runtime/Manager.cpp
index 310710e3c..634cd2aec 100644
--- a/nn/runtime/Manager.cpp
+++ b/nn/runtime/Manager.cpp
@@ -379,9 +379,9 @@ std::tuple<int, std::vector<OutputShape>, Timing> DriverPreparedModel::execute(
const bool burstCompute = (burstController != nullptr);
bool burstFallback = true;
if (burstCompute) {
- const bool compliant = compliantWithV1_0(request);
+ const bool compliant = compliantWithV1_2(request);
if (compliant) {
- V1_0::Request request10 = convertToV1_0(request);
+ V1_0::Request request12 = convertToV1_2(request);
std::vector<intptr_t> memoryIds;
memoryIds.reserve(localMemories.size());
for (const Memory* memory : localMemories) {
@@ -390,9 +390,9 @@ std::tuple<int, std::vector<OutputShape>, Timing> DriverPreparedModel::execute(
}
VLOG(EXECUTION) << "Before ExecutionBurstController->compute() "
- << SHOW_IF_DEBUG(toString(request10));
+ << SHOW_IF_DEBUG(toString(request12));
std::tie(n, outputShapes, timing, burstFallback) =
- burstController->compute(request10, measure, memoryIds);
+ burstController->compute(request12, measure, memoryIds);
}
}
diff --git a/nn/runtime/VersionedInterfaces.cpp b/nn/runtime/VersionedInterfaces.cpp
index 3ae950eac..33d290cfe 100644
--- a/nn/runtime/VersionedInterfaces.cpp
+++ b/nn/runtime/VersionedInterfaces.cpp
@@ -241,17 +241,16 @@ std::tuple<int, std::vector<OutputShape>, Timing> VersionedIPreparedModel::execu
return getResults(*callback);
}
- const bool compliant = compliantWithV1_0(request);
- if (!compliant) {
- LOG(ERROR) << "Could not handle execute or execute_1_2!";
- return failWithStatus(ErrorStatus::GENERAL_FAILURE);
- }
- const V1_0::Request request10 = convertToV1_0(request);
-
// version 1.2 HAL
if (mPreparedModelV1_2 != nullptr) {
+ const bool compliant = compliantWithV1_2(request);
+ if (!compliant) {
+ LOG(ERROR) << "Could not handle execute_1_2!";
+ return failWithStatus(ErrorStatus::GENERAL_FAILURE);
+ }
+ const V1_0::Request request12 = convertToV1_2(request);
Return<V1_0::ErrorStatus> ret =
- mPreparedModelV1_2->execute_1_2(request10, measure, callback);
+ mPreparedModelV1_2->execute_1_2(request12, measure, callback);
if (ret.isDeadObject()) {
LOG(ERROR) << "execute_1_2 failure: " << ret.description();
return failDeadObject();
@@ -271,6 +270,12 @@ std::tuple<int, std::vector<OutputShape>, Timing> VersionedIPreparedModel::execu
// version 1.0 HAL
if (mPreparedModelV1_0 != nullptr) {
+ const bool compliant = compliantWithV1_0(request);
+ if (!compliant) {
+ LOG(ERROR) << "Could not handle execute!";
+ return failWithStatus(ErrorStatus::GENERAL_FAILURE);
+ }
+ const V1_0::Request request10 = convertToV1_0(request);
Return<V1_0::ErrorStatus> ret = mPreparedModelV1_0->execute(request10, callback);
if (ret.isDeadObject()) {
LOG(ERROR) << "execute failure: " << ret.description();
@@ -324,16 +329,16 @@ std::tuple<int, std::vector<OutputShape>, Timing> VersionedIPreparedModel::execu
// version 1.2 HAL
if (mPreparedModelV1_2 != nullptr) {
- const bool compliant = compliantWithV1_0(request);
+ const bool compliant = compliantWithV1_2(request);
if (!compliant) {
LOG(ERROR) << "Could not handle executeSynchronously!";
return kFailure;
}
- const V1_0::Request request10 = convertToV1_0(request);
+ const V1_0::Request request12 = convertToV1_2(request);
std::tuple<int, std::vector<OutputShape>, Timing> result;
Return<void> ret = mPreparedModelV1_2->executeSynchronously(
- request10, measure,
+ request12, measure,
[&result](V1_0::ErrorStatus error, const hidl_vec<OutputShape>& outputShapes,
const Timing& timing) {
result = getExecutionResult(convertToV1_3(error), outputShapes, timing);
diff --git a/nn/runtime/test/TestCompliance.cpp b/nn/runtime/test/TestCompliance.cpp
index 53bff038b..db5ab4d3e 100644
--- a/nn/runtime/test/TestCompliance.cpp
+++ b/nn/runtime/test/TestCompliance.cpp
@@ -18,6 +18,7 @@
#include "GeneratedTestUtils.h"
#include "HalInterfaces.h"
+#include "Memory.h"
#include "MemoryUtils.h"
#include "ModelBuilder.h"
#include "TestNeuralNetworksWrapper.h"
@@ -71,8 +72,14 @@ static void testAvailableSinceV1_0(const WrapperModel& wrapperModel) {
ASSERT_TRUE(compliantWithV1_0(hidlModel));
}
+static void testAvailableSinceV1_2(const Request& request) {
+ ASSERT_FALSE(compliantWithV1_0(request));
+ ASSERT_TRUE(compliantWithV1_2(request));
+}
+
static void testAvailableSinceV1_3(const Request& request) {
ASSERT_FALSE(compliantWithV1_0(request));
+ ASSERT_FALSE(compliantWithV1_2(request));
}
static const WrapperOperandType kTypeTensorFloat(WrapperType::TENSOR_FLOAT32, {1});
@@ -126,7 +133,7 @@ TEST_F(ComplianceTest, Rank0TensorTemporaryVariable) {
testAvailableSinceV1_2(model);
}
-TEST_F(ComplianceTest, HardwareBuffer) {
+TEST_F(ComplianceTest, HardwareBufferModel) {
const size_t memorySize = 20;
AHardwareBuffer_Desc desc{
.width = memorySize,
@@ -157,6 +164,29 @@ TEST_F(ComplianceTest, HardwareBuffer) {
AHardwareBuffer_release(buffer);
}
+TEST_F(ComplianceTest, HardwareBufferRequest) {
+ const auto [n, ahwb] = MemoryRuntimeAHWB::create(1024);
+ ASSERT_EQ(n, ANEURALNETWORKS_NO_ERROR);
+ Request::MemoryPool sharedMemoryPool, ahwbMemoryPool = ahwb->getMemoryPool();
+ sharedMemoryPool.hidlMemory(allocateSharedMemory(1024));
+ ASSERT_TRUE(sharedMemoryPool.hidlMemory().valid());
+ ASSERT_TRUE(ahwbMemoryPool.hidlMemory().valid());
+
+ // AHardwareBuffer as input.
+ testAvailableSinceV1_2(Request{
+ .inputs = {{.hasNoValue = false, .location = {.poolIndex = 0}, .dimensions = {}}},
+ .outputs = {{.hasNoValue = false, .location = {.poolIndex = 1}, .dimensions = {}}},
+ .pools = {ahwbMemoryPool, sharedMemoryPool},
+ });
+
+ // AHardwareBuffer as output.
+ testAvailableSinceV1_2(Request{
+ .inputs = {{.hasNoValue = false, .location = {.poolIndex = 0}, .dimensions = {}}},
+ .outputs = {{.hasNoValue = false, .location = {.poolIndex = 1}, .dimensions = {}}},
+ .pools = {sharedMemoryPool, ahwbMemoryPool},
+ });
+}
+
TEST_F(ComplianceTest, DeviceMemory) {
Request::MemoryPool sharedMemoryPool, deviceMemoryPool;
sharedMemoryPool.hidlMemory(allocateSharedMemory(1024));
diff --git a/nn/runtime/test/TestPartitioning.cpp b/nn/runtime/test/TestPartitioning.cpp
index 3bde4cf90..7b4205ac8 100644
--- a/nn/runtime/test/TestPartitioning.cpp
+++ b/nn/runtime/test/TestPartitioning.cpp
@@ -86,6 +86,13 @@
// MINIMUM, POW, or PRELU. These operations take no activation
// function, so we only get 4 operation kinds, for which we
// use operation encodings 16..19.
+// - There is another collection of operations (each of which has one inpus
+// and one output):
+// - Single operation available at driver version V1_3 or
+// later. It is represented in the graph as HARD_SWISH.
+// These operations take no activation function, for which we
+// use operation encodings 20..20.
+
// When we instantiate a device for testing purposes, we specify what subset of
// those operations the device is able to execute.
//
@@ -204,6 +211,11 @@ const uint32_t kFirstEncodingPRELU = kFirstEncodingPOW + 1;
const uint32_t kFirstEncodingV1_2 = kFirstEncodingMAXIMUM;
const uint32_t kLastEncodingV1_2 = kFirstEncodingPRELU;
+// V1_3 operations
+const uint32_t kFirstEncodingHARD_SWISH = kLastEncodingV1_2 + 1;
+const uint32_t kFirstEncodingV1_3 = kFirstEncodingHARD_SWISH;
+const uint32_t kLastEncodingV1_3 = kFirstEncodingHARD_SWISH;
+
const std::map<OperationType, uint32_t> operationToFirstEncoding = {
{OperationType::ADD, kFirstEncodingADD},
{OperationType::MUL, kFirstEncodingMUL},
@@ -213,6 +225,7 @@ const std::map<OperationType, uint32_t> operationToFirstEncoding = {
{OperationType::MINIMUM, kFirstEncodingMINIMUM},
{OperationType::POW, kFirstEncodingPOW},
{OperationType::PRELU, kFirstEncodingPRELU},
+ {OperationType::HARD_SWISH, kFirstEncodingHARD_SWISH},
};
// Sorted in reverse order (std::greater) so that we can use map::lower_bound to
@@ -227,6 +240,7 @@ const std::map<uint32_t, std::pair<uint32_t, bool>, std::greater<>> firstEncodin
{kFirstEncodingMINIMUM, {ANEURALNETWORKS_MINIMUM, false}},
{kFirstEncodingPOW, {ANEURALNETWORKS_POW, false}},
{kFirstEncodingPRELU, {ANEURALNETWORKS_PRELU, false}},
+ {kFirstEncodingHARD_SWISH, {ANEURALNETWORKS_HARD_SWISH, false}},
};
// Look up the operation with the specified index in a graph, and return the
@@ -664,6 +678,16 @@ class PartitioningModel : private WrapperModel {
return addOperation2To1(operation + kFirstEncodingV1_2, input0, input1, dimensionedOutput);
}
+ // Create a V1_3 operation with two inputs and one output, specifying the
+ // operation kind (where 0 is the first V1_3 operation) and the input
+ // operand indexes.
+ // Returns the output operand index.
+ uint32_t addOperation1To1V1_3(uint32_t operation, const uint32_t input0,
+ Dimensioned dimensionedOutput = Dimensioned::YES) {
+ CHECK_LE(operation, kLastEncodingV1_3 - kFirstEncodingV1_3);
+ return addOperation1To1(operation + kFirstEncodingV1_3, input0, dimensionedOutput);
+ }
+
// Create an OEM operation with one input and one output,
// specifying the input operand index. Returns the output operand
// index.
@@ -725,6 +749,20 @@ class PartitioningModel : private WrapperModel {
}
}
+ // Create an operation with one inputs and one output, specifying
+ // the operation kind and the input operand indexes.
+ // Returns the output operand index.
+ uint32_t addOperation1To1(uint32_t operation, const uint32_t input0,
+ Dimensioned dimensionedOutput = Dimensioned::YES) {
+ auto it = firstEncodingToOperation.lower_bound(operation);
+ CHECK(it != firstEncodingToOperation.end());
+ ANeuralNetworksOperationType type = it->second.first;
+
+ uint32_t output = addOperandOfSameType(input0, dimensionedOutput);
+ addOperation(type, {input0}, {output});
+ return output;
+ }
+
// Create a scalar integer operand of the specified value, and
// return the corresponding operand index.
uint32_t addIntOperand(int32_t value) {
@@ -850,10 +888,11 @@ class PartitioningTest : public ::testing::Test {
}
DeviceSpecification(const std::string& name, float perf, HalVersion halVersion,
uint32_t operationMaskV1_0, uint32_t operationMaskV1_1 = 0,
- uint32_t operationMaskV1_2 = 0)
- : DeviceSpecification(name, perf, perf,
- makeOperationMask(halVersion, operationMaskV1_0,
- operationMaskV1_1, operationMaskV1_2)) {
+ uint32_t operationMaskV1_2 = 0, uint32_t operationMaskV1_3 = 0)
+ : DeviceSpecification(
+ name, perf, perf,
+ makeOperationMask(halVersion, operationMaskV1_0, operationMaskV1_1,
+ operationMaskV1_2, operationMaskV1_3)) {
mHalVersion = halVersion;
}
@@ -886,7 +925,11 @@ class PartitioningTest : public ::testing::Test {
// This is used by a DeviceSpecification constructor to build a mask of
// operations to be supported by the device.
static uint32_t makeOperationMask(HalVersion halVersion, uint32_t operationMaskV1_0,
- uint32_t operationMaskV1_1, uint32_t operationMaskV1_2) {
+ uint32_t operationMaskV1_1, uint32_t operationMaskV1_2,
+ uint32_t operationMaskV1_3) {
+ if (halVersion < HalVersion::V1_3) {
+ CHECK(!operationMaskV1_3);
+ }
if (halVersion < HalVersion::V1_2) {
CHECK(!operationMaskV1_2);
}
@@ -900,9 +943,12 @@ class PartitioningTest : public ::testing::Test {
maskOfWidth(kLastEncodingV1_1 - kFirstEncodingV1_1 + 1);
static const uint32_t kOperationMaskV1_2 =
maskOfWidth(kLastEncodingV1_2 - kFirstEncodingV1_2 + 1);
+ static const uint32_t kOperationMaskV1_3 =
+ maskOfWidth(kLastEncodingV1_3 - kFirstEncodingV1_3 + 1);
return ((operationMaskV1_0 & kOperationMaskV1_0) << kFirstEncodingV1_0) |
((operationMaskV1_1 & kOperationMaskV1_1) << kFirstEncodingV1_1) |
- ((operationMaskV1_2 & kOperationMaskV1_2) << kFirstEncodingV1_2);
+ ((operationMaskV1_2 & kOperationMaskV1_2) << kFirstEncodingV1_2) |
+ ((operationMaskV1_3 & kOperationMaskV1_3) << kFirstEncodingV1_3);
}
};
static std::vector<std::shared_ptr<Device>> makeDevices(
@@ -1394,36 +1440,39 @@ TEST_F(PartitioningTest, SliceModel) {
uint32_t opnd3 = model.addOperation2To1V1_0(1, opnd0, opnd1);
uint32_t opnd4 = model.addOperation2To1V1_1(0, opnd0, opnd1);
uint32_t opnd5 = model.addOperation2To1V1_2(0, opnd2, opnd3);
- model.identifyInputsAndOutputs({opnd0, opnd1}, {opnd2, opnd4, opnd5});
+ uint32_t opnd6 = model.addOperation1To1V1_3(0, opnd2);
+ model.identifyInputsAndOutputs({opnd0, opnd1}, {opnd2, opnd4, opnd5, opnd6});
model.finish();
ASSERT_TRUE(model.isValid());
- // Simple partition (V1_0, V1_1, V1_2 devices are available; V1_2 has best perf).
+ // Simple partition (V1_0, V1_1, V1_2, V1_3 devices are available; V1_3 has best perf).
// No need to compare the original model to the model from the plan -- we
// didn't actually do any partitioning.
const auto devicesA = makeDevices({{"V1_0", 0.8, HalVersion::V1_0, ~0U},
{"V1_1", 0.7, HalVersion::V1_1, ~0U, ~0U},
- {"V1_2", 0.6, HalVersion::V1_2, ~0U, ~0U, ~0U}});
+ {"V1_2", 0.6, HalVersion::V1_2, ~0U, ~0U, ~0U},
+ {"V1_3", 0.5, HalVersion::V1_3, ~0U, ~0U, ~0U, ~0U}});
ExecutionPlan planA;
ASSERT_EQ(model.partitionTheWork(devicesA, ExecutePreference::PREFER_LOW_POWER,
ExecutePriority::DEFAULT, {}, &planA),
ANEURALNETWORKS_NO_ERROR);
ASSERT_EQ(planA.forTest_getKind(), ExecutionPlan::Kind::SIMPLE);
ASSERT_NE(planA.forTest_simpleGetDevice().get(), nullptr);
- ASSERT_EQ(planA.forTest_simpleGetDevice()->getName(), "V1_2");
+ ASSERT_EQ(planA.forTest_simpleGetDevice()->getName(), "V1_3");
// Compound partition (V1_0, V1_1, V1_2 devices are available, in decreasing
// order of performance; model is distributed across all three devices).
const auto devicesB = makeDevices({{"V1_0", 0.6, HalVersion::V1_0, ~0U},
{"V1_1", 0.7, HalVersion::V1_1, ~0U, ~0U},
- {"V1_2", 0.8, HalVersion::V1_2, ~0U, ~0U, ~0U}});
+ {"V1_2", 0.8, HalVersion::V1_2, ~0U, ~0U, ~0U},
+ {"V1_3", 0.9, HalVersion::V1_3, ~0U, ~0U, ~0U, ~0U}});
ExecutionPlan planB;
ASSERT_EQ(model.partitionTheWork(devicesB, ExecutePreference::PREFER_LOW_POWER,
ExecutePriority::DEFAULT, {}, &planB),
ANEURALNETWORKS_NO_ERROR);
ASSERT_EQ(planB.forTest_getKind(), ExecutionPlan::Kind::COMPOUND);
const auto& stepsB = planB.forTest_compoundGetSteps();
- ASSERT_EQ(stepsB.size(), size_t(3));
+ ASSERT_EQ(stepsB.size(), size_t(4));
{
// Build a model to compare against the step model from stepsB[0].
PartitioningModel modelB0;
@@ -1465,25 +1514,44 @@ TEST_F(PartitioningTest, SliceModel) {
// Build a model to compare against the step model from stepsB[2].
PartitioningModel modelB2;
uint32_t b2Opnd0 = modelB2.addFloatOperand();
- uint32_t b2Opnd1 = modelB2.addFloatOperand();
- uint32_t b2Opnd2 = modelB2.addOperation2To1V1_2(0, b2Opnd0, b2Opnd1);
+ uint32_t b2Opnd1 = modelB2.addOperation1To1V1_3(0, b2Opnd0);
// Note: In the partitioning algorithm, temps that are
// step model inputs precede model outputs that are step model
- // inputs. In the original model "model", opnd3 is a temp and
- // opnd2 is a model output; so in the step model "modelB2", the
- // corresponding inputs b2Opnd1 and b2Opnd0 must appear in
- // that order.
- modelB2.identifyInputsAndOutputs({b2Opnd1, b2Opnd0}, {b2Opnd2});
+ // inputs.
+ modelB2.identifyInputsAndOutputs({b2Opnd0}, {b2Opnd1});
modelB2.finish();
ASSERT_TRUE(modelB2.isValid());
ASSERT_NO_FATAL_FAILURE(
- compare(stepsB[2], &modelB2, devicesB[2], RemapVectorType{}, // modelInputs
- RemapVectorType{{opnd5, b2Opnd2}}, // modelOutputs
- RemapVectorType{{opnd3, b2Opnd1}}, // tempsAsStepModelInputs
+ compare(stepsB[2], &modelB2, devicesB[3], RemapVectorType{}, // modelInputs
+ RemapVectorType{{opnd6, b2Opnd1}}, // modelOutputs
+ RemapVectorType{}, // tempsAsStepModelInputs
StepModelOutputSetType{}, // tempsAsStepModelOutputs
RemapVectorType{{opnd2, b2Opnd0}})); // outputsAsStepModelInputs
}
+ {
+ // Build a model to compare against the step model from stepsB[3].
+ PartitioningModel modelB3;
+ uint32_t b3Opnd0 = modelB3.addFloatOperand();
+ uint32_t b3Opnd1 = modelB3.addFloatOperand();
+ uint32_t b3Opnd2 = modelB3.addOperation2To1V1_2(0, b3Opnd0, b3Opnd1);
+ // Note: In the partitioning algorithm, temps that are
+ // step model inputs precede model outputs that are step model
+ // inputs. In the original model "model", opnd3 is a temp and
+ // opnd2 is a model output; so in the step model "modelB3", the
+ // corresponding inputs b3Opnd1 and b3Opnd0 must appear in
+ // that order.
+ modelB3.identifyInputsAndOutputs({b3Opnd1, b3Opnd0}, {b3Opnd2});
+ modelB3.finish();
+ ASSERT_TRUE(modelB3.isValid());
+
+ ASSERT_NO_FATAL_FAILURE(
+ compare(stepsB[3], &modelB3, devicesB[2], RemapVectorType{}, // modelInputs
+ RemapVectorType{{opnd5, b3Opnd2}}, // modelOutputs
+ RemapVectorType{{opnd3, b3Opnd1}}, // tempsAsStepModelInputs
+ StepModelOutputSetType{}, // tempsAsStepModelOutputs
+ RemapVectorType{{opnd2, b3Opnd0}})); // outputsAsStepModelInputs
+ }
// TODO: Make sure this still works when we have multiple devices
// of same version available for slicing. An easy (?) choice would
@@ -1494,25 +1562,25 @@ TEST_F(PartitioningTest, SliceModel) {
TEST_F(PartitioningTest, SliceModelToEmpty) {
PartitioningModel model;
uint32_t opnd0 = model.addFloatOperand();
- uint32_t opnd1 = model.addFloatOperand();
- uint32_t opnd2 = model.addOperation2To1V1_2(0, opnd0, opnd1);
- model.identifyInputsAndOutputs({opnd0, opnd1}, {opnd2});
+ uint32_t opnd1 = model.addOperation1To1V1_3(0, opnd0);
+ model.identifyInputsAndOutputs({opnd0}, {opnd1});
model.finish();
ASSERT_TRUE(model.isValid());
- // Only the V1_2 device can handle any operations in the model.
+ // Only the V1_3 device can handle any operations in the model.
// No need to compare the original model to the model from the plan -- we
// didn't actually do any partitioning.
const auto devices = makeDevices({{"V1_0", 0.6, HalVersion::V1_0, ~0U},
{"V1_1", 0.7, HalVersion::V1_1, ~0U, ~0U},
- {"V1_2", 0.8, HalVersion::V1_2, ~0U, ~0U, ~0U}});
+ {"V1_2", 0.8, HalVersion::V1_2, ~0U, ~0U, ~0U},
+ {"V1_3", 0.9, HalVersion::V1_3, ~0U, ~0U, ~0U, ~0U}});
ExecutionPlan plan;
ASSERT_EQ(model.partitionTheWork(devices, ExecutePreference::PREFER_LOW_POWER,
ExecutePriority::DEFAULT, {}, &plan),
ANEURALNETWORKS_NO_ERROR);
ASSERT_EQ(plan.forTest_getKind(), ExecutionPlan::Kind::SIMPLE);
ASSERT_NE(plan.forTest_simpleGetDevice().get(), nullptr);
- ASSERT_EQ(plan.forTest_simpleGetDevice()->getName(), "V1_2");
+ ASSERT_EQ(plan.forTest_simpleGetDevice()->getName(), "V1_3");
}
TEST_F(PartitioningTest, Cpu) {