diff options
Diffstat (limited to 'nn')
-rw-r--r-- | nn/TEST_MAPPING | 16 | ||||
-rw-r--r-- | nn/common/Utils.cpp | 44 | ||||
-rw-r--r-- | nn/common/include/Utils.h | 2 | ||||
-rw-r--r-- | nn/runtime/Manager.cpp | 8 | ||||
-rw-r--r-- | nn/runtime/VersionedInterfaces.cpp | 27 | ||||
-rw-r--r-- | nn/runtime/test/TestCompliance.cpp | 32 | ||||
-rw-r--r-- | nn/runtime/test/TestPartitioning.cpp | 124 |
7 files changed, 198 insertions, 55 deletions
diff --git a/nn/TEST_MAPPING b/nn/TEST_MAPPING index f4d4501f9..f3eaa9940 100644 --- a/nn/TEST_MAPPING +++ b/nn/TEST_MAPPING @@ -1,10 +1,22 @@ { "presubmit": [ { - "name": "CtsNNAPITestCases" + "name": "CtsNNAPITestCases", + "options": [ + { + // b/153876253, temporarily filter out failing l2_norm tests + "include-filter": "-*l2_normalization_axis_corner_case*" + } + ] }, { - "name": "NeuralNetworksTest_static" + "name": "NeuralNetworksTest_static", + "options": [ + { + // b/153876253, temporarily filter out failing l2_norm tests + "include-filter": "-*l2_normalization_axis_corner_case*" + } + ] }, { "name": "NeuralNetworksTest_utils" diff --git a/nn/common/Utils.cpp b/nn/common/Utils.cpp index cd97ffa52..81e5cf1e1 100644 --- a/nn/common/Utils.cpp +++ b/nn/common/Utils.cpp @@ -21,6 +21,8 @@ #include <android-base/logging.h> #include <android-base/properties.h> #include <android-base/strings.h> +#include <errno.h> +#include <poll.h> #include <sys/system_properties.h> #include <algorithm> @@ -32,9 +34,6 @@ #include <utility> #include <vector> -#include <errno.h> -#include <poll.h> - #include "ControlFlow.h" #include "NeuralNetworks.h" #include "NeuralNetworksOEM.h" @@ -3100,7 +3099,22 @@ bool compliantWithV1_0(const V1_0::Request& request) { bool compliantWithV1_0(const V1_3::Request& request) { return std::all_of(request.pools.begin(), request.pools.end(), [](const auto& pool) { - return pool.getDiscriminator() == V1_3::Request::MemoryPool::hidl_discriminator::hidlMemory; + if (pool.getDiscriminator() != V1_3::Request::MemoryPool::hidl_discriminator::hidlMemory) { + return false; + } + const auto& name = pool.hidlMemory().name(); + return name == "ashmem" || name == "mmap_fd"; + }); +} + +bool compliantWithV1_2(const V1_3::Request& request) { + return std::all_of(request.pools.begin(), request.pools.end(), [](const auto& pool) { + if (pool.getDiscriminator() != V1_3::Request::MemoryPool::hidl_discriminator::hidlMemory) { + return false; + } + const auto& name = pool.hidlMemory().name(); + return name == "ashmem" || name == "mmap_fd" || name == "hardware_buffer_blob" || + name == "hardware_buffer"; }); } @@ -3123,17 +3137,29 @@ V1_0::Request convertToV1_0(const V1_0::Request& request) { return request; } -V1_0::Request convertToV1_0(const V1_3::Request& request) { - if (!compliantWithV1_0(request)) { - LOG(ERROR) << "Upcasting non-compliant request " << SHOW_IF_DEBUG(toString(request)) - << " from V1_3::Request to V1_0::Request"; - } +static V1_0::Request uncheckedConvertToV1_0(const V1_3::Request& request) { hidl_vec<hidl_memory> pools(request.pools.size()); std::transform(request.pools.begin(), request.pools.end(), pools.begin(), [](const auto& pool) { return convertToV1_0(pool); }); return {.inputs = request.inputs, .outputs = request.outputs, .pools = std::move(pools)}; } +V1_0::Request convertToV1_0(const V1_3::Request& request) { + if (!compliantWithV1_0(request)) { + LOG(ERROR) << "Upcasting non-compliant request " << SHOW_IF_DEBUG(toString(request)) + << " from V1_3::Request to V1_0::Request of version 1.0"; + } + return uncheckedConvertToV1_0(request); +} + +V1_0::Request convertToV1_2(const V1_3::Request& request) { + if (!compliantWithV1_2(request)) { + LOG(ERROR) << "Upcasting non-compliant request " << SHOW_IF_DEBUG(toString(request)) + << " from V1_3::Request to V1_0::Request of version 1.2"; + } + return uncheckedConvertToV1_0(request); +} + V1_3::Request convertToV1_3(const V1_0::Request& request) { hidl_vec<V1_3::Request::MemoryPool> pools(request.pools.size()); std::transform(request.pools.begin(), request.pools.end(), pools.begin(), diff --git a/nn/common/include/Utils.h b/nn/common/include/Utils.h index 24e69211c..ca11c5ebc 100644 --- a/nn/common/include/Utils.h +++ b/nn/common/include/Utils.h @@ -530,9 +530,11 @@ hal::hidl_vec<hal::V1_3::Operand> convertToV1_3(const hal::hidl_vec<hal::V1_3::O bool compliantWithV1_0(const hal::V1_0::Request& request); bool compliantWithV1_0(const hal::V1_3::Request& request); +bool compliantWithV1_2(const hal::V1_3::Request& request); hal::V1_0::Request convertToV1_0(const hal::V1_0::Request& request); hal::V1_0::Request convertToV1_0(const hal::V1_3::Request& request); +hal::V1_0::Request convertToV1_2(const hal::V1_3::Request& request); hal::V1_3::Request convertToV1_3(const hal::V1_0::Request& request); hal::V1_3::Request convertToV1_3(const hal::V1_3::Request& request); diff --git a/nn/runtime/Manager.cpp b/nn/runtime/Manager.cpp index 310710e3c..634cd2aec 100644 --- a/nn/runtime/Manager.cpp +++ b/nn/runtime/Manager.cpp @@ -379,9 +379,9 @@ std::tuple<int, std::vector<OutputShape>, Timing> DriverPreparedModel::execute( const bool burstCompute = (burstController != nullptr); bool burstFallback = true; if (burstCompute) { - const bool compliant = compliantWithV1_0(request); + const bool compliant = compliantWithV1_2(request); if (compliant) { - V1_0::Request request10 = convertToV1_0(request); + V1_0::Request request12 = convertToV1_2(request); std::vector<intptr_t> memoryIds; memoryIds.reserve(localMemories.size()); for (const Memory* memory : localMemories) { @@ -390,9 +390,9 @@ std::tuple<int, std::vector<OutputShape>, Timing> DriverPreparedModel::execute( } VLOG(EXECUTION) << "Before ExecutionBurstController->compute() " - << SHOW_IF_DEBUG(toString(request10)); + << SHOW_IF_DEBUG(toString(request12)); std::tie(n, outputShapes, timing, burstFallback) = - burstController->compute(request10, measure, memoryIds); + burstController->compute(request12, measure, memoryIds); } } diff --git a/nn/runtime/VersionedInterfaces.cpp b/nn/runtime/VersionedInterfaces.cpp index 3ae950eac..33d290cfe 100644 --- a/nn/runtime/VersionedInterfaces.cpp +++ b/nn/runtime/VersionedInterfaces.cpp @@ -241,17 +241,16 @@ std::tuple<int, std::vector<OutputShape>, Timing> VersionedIPreparedModel::execu return getResults(*callback); } - const bool compliant = compliantWithV1_0(request); - if (!compliant) { - LOG(ERROR) << "Could not handle execute or execute_1_2!"; - return failWithStatus(ErrorStatus::GENERAL_FAILURE); - } - const V1_0::Request request10 = convertToV1_0(request); - // version 1.2 HAL if (mPreparedModelV1_2 != nullptr) { + const bool compliant = compliantWithV1_2(request); + if (!compliant) { + LOG(ERROR) << "Could not handle execute_1_2!"; + return failWithStatus(ErrorStatus::GENERAL_FAILURE); + } + const V1_0::Request request12 = convertToV1_2(request); Return<V1_0::ErrorStatus> ret = - mPreparedModelV1_2->execute_1_2(request10, measure, callback); + mPreparedModelV1_2->execute_1_2(request12, measure, callback); if (ret.isDeadObject()) { LOG(ERROR) << "execute_1_2 failure: " << ret.description(); return failDeadObject(); @@ -271,6 +270,12 @@ std::tuple<int, std::vector<OutputShape>, Timing> VersionedIPreparedModel::execu // version 1.0 HAL if (mPreparedModelV1_0 != nullptr) { + const bool compliant = compliantWithV1_0(request); + if (!compliant) { + LOG(ERROR) << "Could not handle execute!"; + return failWithStatus(ErrorStatus::GENERAL_FAILURE); + } + const V1_0::Request request10 = convertToV1_0(request); Return<V1_0::ErrorStatus> ret = mPreparedModelV1_0->execute(request10, callback); if (ret.isDeadObject()) { LOG(ERROR) << "execute failure: " << ret.description(); @@ -324,16 +329,16 @@ std::tuple<int, std::vector<OutputShape>, Timing> VersionedIPreparedModel::execu // version 1.2 HAL if (mPreparedModelV1_2 != nullptr) { - const bool compliant = compliantWithV1_0(request); + const bool compliant = compliantWithV1_2(request); if (!compliant) { LOG(ERROR) << "Could not handle executeSynchronously!"; return kFailure; } - const V1_0::Request request10 = convertToV1_0(request); + const V1_0::Request request12 = convertToV1_2(request); std::tuple<int, std::vector<OutputShape>, Timing> result; Return<void> ret = mPreparedModelV1_2->executeSynchronously( - request10, measure, + request12, measure, [&result](V1_0::ErrorStatus error, const hidl_vec<OutputShape>& outputShapes, const Timing& timing) { result = getExecutionResult(convertToV1_3(error), outputShapes, timing); diff --git a/nn/runtime/test/TestCompliance.cpp b/nn/runtime/test/TestCompliance.cpp index 53bff038b..db5ab4d3e 100644 --- a/nn/runtime/test/TestCompliance.cpp +++ b/nn/runtime/test/TestCompliance.cpp @@ -18,6 +18,7 @@ #include "GeneratedTestUtils.h" #include "HalInterfaces.h" +#include "Memory.h" #include "MemoryUtils.h" #include "ModelBuilder.h" #include "TestNeuralNetworksWrapper.h" @@ -71,8 +72,14 @@ static void testAvailableSinceV1_0(const WrapperModel& wrapperModel) { ASSERT_TRUE(compliantWithV1_0(hidlModel)); } +static void testAvailableSinceV1_2(const Request& request) { + ASSERT_FALSE(compliantWithV1_0(request)); + ASSERT_TRUE(compliantWithV1_2(request)); +} + static void testAvailableSinceV1_3(const Request& request) { ASSERT_FALSE(compliantWithV1_0(request)); + ASSERT_FALSE(compliantWithV1_2(request)); } static const WrapperOperandType kTypeTensorFloat(WrapperType::TENSOR_FLOAT32, {1}); @@ -126,7 +133,7 @@ TEST_F(ComplianceTest, Rank0TensorTemporaryVariable) { testAvailableSinceV1_2(model); } -TEST_F(ComplianceTest, HardwareBuffer) { +TEST_F(ComplianceTest, HardwareBufferModel) { const size_t memorySize = 20; AHardwareBuffer_Desc desc{ .width = memorySize, @@ -157,6 +164,29 @@ TEST_F(ComplianceTest, HardwareBuffer) { AHardwareBuffer_release(buffer); } +TEST_F(ComplianceTest, HardwareBufferRequest) { + const auto [n, ahwb] = MemoryRuntimeAHWB::create(1024); + ASSERT_EQ(n, ANEURALNETWORKS_NO_ERROR); + Request::MemoryPool sharedMemoryPool, ahwbMemoryPool = ahwb->getMemoryPool(); + sharedMemoryPool.hidlMemory(allocateSharedMemory(1024)); + ASSERT_TRUE(sharedMemoryPool.hidlMemory().valid()); + ASSERT_TRUE(ahwbMemoryPool.hidlMemory().valid()); + + // AHardwareBuffer as input. + testAvailableSinceV1_2(Request{ + .inputs = {{.hasNoValue = false, .location = {.poolIndex = 0}, .dimensions = {}}}, + .outputs = {{.hasNoValue = false, .location = {.poolIndex = 1}, .dimensions = {}}}, + .pools = {ahwbMemoryPool, sharedMemoryPool}, + }); + + // AHardwareBuffer as output. + testAvailableSinceV1_2(Request{ + .inputs = {{.hasNoValue = false, .location = {.poolIndex = 0}, .dimensions = {}}}, + .outputs = {{.hasNoValue = false, .location = {.poolIndex = 1}, .dimensions = {}}}, + .pools = {sharedMemoryPool, ahwbMemoryPool}, + }); +} + TEST_F(ComplianceTest, DeviceMemory) { Request::MemoryPool sharedMemoryPool, deviceMemoryPool; sharedMemoryPool.hidlMemory(allocateSharedMemory(1024)); diff --git a/nn/runtime/test/TestPartitioning.cpp b/nn/runtime/test/TestPartitioning.cpp index 3bde4cf90..7b4205ac8 100644 --- a/nn/runtime/test/TestPartitioning.cpp +++ b/nn/runtime/test/TestPartitioning.cpp @@ -86,6 +86,13 @@ // MINIMUM, POW, or PRELU. These operations take no activation // function, so we only get 4 operation kinds, for which we // use operation encodings 16..19. +// - There is another collection of operations (each of which has one inpus +// and one output): +// - Single operation available at driver version V1_3 or +// later. It is represented in the graph as HARD_SWISH. +// These operations take no activation function, for which we +// use operation encodings 20..20. + // When we instantiate a device for testing purposes, we specify what subset of // those operations the device is able to execute. // @@ -204,6 +211,11 @@ const uint32_t kFirstEncodingPRELU = kFirstEncodingPOW + 1; const uint32_t kFirstEncodingV1_2 = kFirstEncodingMAXIMUM; const uint32_t kLastEncodingV1_2 = kFirstEncodingPRELU; +// V1_3 operations +const uint32_t kFirstEncodingHARD_SWISH = kLastEncodingV1_2 + 1; +const uint32_t kFirstEncodingV1_3 = kFirstEncodingHARD_SWISH; +const uint32_t kLastEncodingV1_3 = kFirstEncodingHARD_SWISH; + const std::map<OperationType, uint32_t> operationToFirstEncoding = { {OperationType::ADD, kFirstEncodingADD}, {OperationType::MUL, kFirstEncodingMUL}, @@ -213,6 +225,7 @@ const std::map<OperationType, uint32_t> operationToFirstEncoding = { {OperationType::MINIMUM, kFirstEncodingMINIMUM}, {OperationType::POW, kFirstEncodingPOW}, {OperationType::PRELU, kFirstEncodingPRELU}, + {OperationType::HARD_SWISH, kFirstEncodingHARD_SWISH}, }; // Sorted in reverse order (std::greater) so that we can use map::lower_bound to @@ -227,6 +240,7 @@ const std::map<uint32_t, std::pair<uint32_t, bool>, std::greater<>> firstEncodin {kFirstEncodingMINIMUM, {ANEURALNETWORKS_MINIMUM, false}}, {kFirstEncodingPOW, {ANEURALNETWORKS_POW, false}}, {kFirstEncodingPRELU, {ANEURALNETWORKS_PRELU, false}}, + {kFirstEncodingHARD_SWISH, {ANEURALNETWORKS_HARD_SWISH, false}}, }; // Look up the operation with the specified index in a graph, and return the @@ -664,6 +678,16 @@ class PartitioningModel : private WrapperModel { return addOperation2To1(operation + kFirstEncodingV1_2, input0, input1, dimensionedOutput); } + // Create a V1_3 operation with two inputs and one output, specifying the + // operation kind (where 0 is the first V1_3 operation) and the input + // operand indexes. + // Returns the output operand index. + uint32_t addOperation1To1V1_3(uint32_t operation, const uint32_t input0, + Dimensioned dimensionedOutput = Dimensioned::YES) { + CHECK_LE(operation, kLastEncodingV1_3 - kFirstEncodingV1_3); + return addOperation1To1(operation + kFirstEncodingV1_3, input0, dimensionedOutput); + } + // Create an OEM operation with one input and one output, // specifying the input operand index. Returns the output operand // index. @@ -725,6 +749,20 @@ class PartitioningModel : private WrapperModel { } } + // Create an operation with one inputs and one output, specifying + // the operation kind and the input operand indexes. + // Returns the output operand index. + uint32_t addOperation1To1(uint32_t operation, const uint32_t input0, + Dimensioned dimensionedOutput = Dimensioned::YES) { + auto it = firstEncodingToOperation.lower_bound(operation); + CHECK(it != firstEncodingToOperation.end()); + ANeuralNetworksOperationType type = it->second.first; + + uint32_t output = addOperandOfSameType(input0, dimensionedOutput); + addOperation(type, {input0}, {output}); + return output; + } + // Create a scalar integer operand of the specified value, and // return the corresponding operand index. uint32_t addIntOperand(int32_t value) { @@ -850,10 +888,11 @@ class PartitioningTest : public ::testing::Test { } DeviceSpecification(const std::string& name, float perf, HalVersion halVersion, uint32_t operationMaskV1_0, uint32_t operationMaskV1_1 = 0, - uint32_t operationMaskV1_2 = 0) - : DeviceSpecification(name, perf, perf, - makeOperationMask(halVersion, operationMaskV1_0, - operationMaskV1_1, operationMaskV1_2)) { + uint32_t operationMaskV1_2 = 0, uint32_t operationMaskV1_3 = 0) + : DeviceSpecification( + name, perf, perf, + makeOperationMask(halVersion, operationMaskV1_0, operationMaskV1_1, + operationMaskV1_2, operationMaskV1_3)) { mHalVersion = halVersion; } @@ -886,7 +925,11 @@ class PartitioningTest : public ::testing::Test { // This is used by a DeviceSpecification constructor to build a mask of // operations to be supported by the device. static uint32_t makeOperationMask(HalVersion halVersion, uint32_t operationMaskV1_0, - uint32_t operationMaskV1_1, uint32_t operationMaskV1_2) { + uint32_t operationMaskV1_1, uint32_t operationMaskV1_2, + uint32_t operationMaskV1_3) { + if (halVersion < HalVersion::V1_3) { + CHECK(!operationMaskV1_3); + } if (halVersion < HalVersion::V1_2) { CHECK(!operationMaskV1_2); } @@ -900,9 +943,12 @@ class PartitioningTest : public ::testing::Test { maskOfWidth(kLastEncodingV1_1 - kFirstEncodingV1_1 + 1); static const uint32_t kOperationMaskV1_2 = maskOfWidth(kLastEncodingV1_2 - kFirstEncodingV1_2 + 1); + static const uint32_t kOperationMaskV1_3 = + maskOfWidth(kLastEncodingV1_3 - kFirstEncodingV1_3 + 1); return ((operationMaskV1_0 & kOperationMaskV1_0) << kFirstEncodingV1_0) | ((operationMaskV1_1 & kOperationMaskV1_1) << kFirstEncodingV1_1) | - ((operationMaskV1_2 & kOperationMaskV1_2) << kFirstEncodingV1_2); + ((operationMaskV1_2 & kOperationMaskV1_2) << kFirstEncodingV1_2) | + ((operationMaskV1_3 & kOperationMaskV1_3) << kFirstEncodingV1_3); } }; static std::vector<std::shared_ptr<Device>> makeDevices( @@ -1394,36 +1440,39 @@ TEST_F(PartitioningTest, SliceModel) { uint32_t opnd3 = model.addOperation2To1V1_0(1, opnd0, opnd1); uint32_t opnd4 = model.addOperation2To1V1_1(0, opnd0, opnd1); uint32_t opnd5 = model.addOperation2To1V1_2(0, opnd2, opnd3); - model.identifyInputsAndOutputs({opnd0, opnd1}, {opnd2, opnd4, opnd5}); + uint32_t opnd6 = model.addOperation1To1V1_3(0, opnd2); + model.identifyInputsAndOutputs({opnd0, opnd1}, {opnd2, opnd4, opnd5, opnd6}); model.finish(); ASSERT_TRUE(model.isValid()); - // Simple partition (V1_0, V1_1, V1_2 devices are available; V1_2 has best perf). + // Simple partition (V1_0, V1_1, V1_2, V1_3 devices are available; V1_3 has best perf). // No need to compare the original model to the model from the plan -- we // didn't actually do any partitioning. const auto devicesA = makeDevices({{"V1_0", 0.8, HalVersion::V1_0, ~0U}, {"V1_1", 0.7, HalVersion::V1_1, ~0U, ~0U}, - {"V1_2", 0.6, HalVersion::V1_2, ~0U, ~0U, ~0U}}); + {"V1_2", 0.6, HalVersion::V1_2, ~0U, ~0U, ~0U}, + {"V1_3", 0.5, HalVersion::V1_3, ~0U, ~0U, ~0U, ~0U}}); ExecutionPlan planA; ASSERT_EQ(model.partitionTheWork(devicesA, ExecutePreference::PREFER_LOW_POWER, ExecutePriority::DEFAULT, {}, &planA), ANEURALNETWORKS_NO_ERROR); ASSERT_EQ(planA.forTest_getKind(), ExecutionPlan::Kind::SIMPLE); ASSERT_NE(planA.forTest_simpleGetDevice().get(), nullptr); - ASSERT_EQ(planA.forTest_simpleGetDevice()->getName(), "V1_2"); + ASSERT_EQ(planA.forTest_simpleGetDevice()->getName(), "V1_3"); // Compound partition (V1_0, V1_1, V1_2 devices are available, in decreasing // order of performance; model is distributed across all three devices). const auto devicesB = makeDevices({{"V1_0", 0.6, HalVersion::V1_0, ~0U}, {"V1_1", 0.7, HalVersion::V1_1, ~0U, ~0U}, - {"V1_2", 0.8, HalVersion::V1_2, ~0U, ~0U, ~0U}}); + {"V1_2", 0.8, HalVersion::V1_2, ~0U, ~0U, ~0U}, + {"V1_3", 0.9, HalVersion::V1_3, ~0U, ~0U, ~0U, ~0U}}); ExecutionPlan planB; ASSERT_EQ(model.partitionTheWork(devicesB, ExecutePreference::PREFER_LOW_POWER, ExecutePriority::DEFAULT, {}, &planB), ANEURALNETWORKS_NO_ERROR); ASSERT_EQ(planB.forTest_getKind(), ExecutionPlan::Kind::COMPOUND); const auto& stepsB = planB.forTest_compoundGetSteps(); - ASSERT_EQ(stepsB.size(), size_t(3)); + ASSERT_EQ(stepsB.size(), size_t(4)); { // Build a model to compare against the step model from stepsB[0]. PartitioningModel modelB0; @@ -1465,25 +1514,44 @@ TEST_F(PartitioningTest, SliceModel) { // Build a model to compare against the step model from stepsB[2]. PartitioningModel modelB2; uint32_t b2Opnd0 = modelB2.addFloatOperand(); - uint32_t b2Opnd1 = modelB2.addFloatOperand(); - uint32_t b2Opnd2 = modelB2.addOperation2To1V1_2(0, b2Opnd0, b2Opnd1); + uint32_t b2Opnd1 = modelB2.addOperation1To1V1_3(0, b2Opnd0); // Note: In the partitioning algorithm, temps that are // step model inputs precede model outputs that are step model - // inputs. In the original model "model", opnd3 is a temp and - // opnd2 is a model output; so in the step model "modelB2", the - // corresponding inputs b2Opnd1 and b2Opnd0 must appear in - // that order. - modelB2.identifyInputsAndOutputs({b2Opnd1, b2Opnd0}, {b2Opnd2}); + // inputs. + modelB2.identifyInputsAndOutputs({b2Opnd0}, {b2Opnd1}); modelB2.finish(); ASSERT_TRUE(modelB2.isValid()); ASSERT_NO_FATAL_FAILURE( - compare(stepsB[2], &modelB2, devicesB[2], RemapVectorType{}, // modelInputs - RemapVectorType{{opnd5, b2Opnd2}}, // modelOutputs - RemapVectorType{{opnd3, b2Opnd1}}, // tempsAsStepModelInputs + compare(stepsB[2], &modelB2, devicesB[3], RemapVectorType{}, // modelInputs + RemapVectorType{{opnd6, b2Opnd1}}, // modelOutputs + RemapVectorType{}, // tempsAsStepModelInputs StepModelOutputSetType{}, // tempsAsStepModelOutputs RemapVectorType{{opnd2, b2Opnd0}})); // outputsAsStepModelInputs } + { + // Build a model to compare against the step model from stepsB[3]. + PartitioningModel modelB3; + uint32_t b3Opnd0 = modelB3.addFloatOperand(); + uint32_t b3Opnd1 = modelB3.addFloatOperand(); + uint32_t b3Opnd2 = modelB3.addOperation2To1V1_2(0, b3Opnd0, b3Opnd1); + // Note: In the partitioning algorithm, temps that are + // step model inputs precede model outputs that are step model + // inputs. In the original model "model", opnd3 is a temp and + // opnd2 is a model output; so in the step model "modelB3", the + // corresponding inputs b3Opnd1 and b3Opnd0 must appear in + // that order. + modelB3.identifyInputsAndOutputs({b3Opnd1, b3Opnd0}, {b3Opnd2}); + modelB3.finish(); + ASSERT_TRUE(modelB3.isValid()); + + ASSERT_NO_FATAL_FAILURE( + compare(stepsB[3], &modelB3, devicesB[2], RemapVectorType{}, // modelInputs + RemapVectorType{{opnd5, b3Opnd2}}, // modelOutputs + RemapVectorType{{opnd3, b3Opnd1}}, // tempsAsStepModelInputs + StepModelOutputSetType{}, // tempsAsStepModelOutputs + RemapVectorType{{opnd2, b3Opnd0}})); // outputsAsStepModelInputs + } // TODO: Make sure this still works when we have multiple devices // of same version available for slicing. An easy (?) choice would @@ -1494,25 +1562,25 @@ TEST_F(PartitioningTest, SliceModel) { TEST_F(PartitioningTest, SliceModelToEmpty) { PartitioningModel model; uint32_t opnd0 = model.addFloatOperand(); - uint32_t opnd1 = model.addFloatOperand(); - uint32_t opnd2 = model.addOperation2To1V1_2(0, opnd0, opnd1); - model.identifyInputsAndOutputs({opnd0, opnd1}, {opnd2}); + uint32_t opnd1 = model.addOperation1To1V1_3(0, opnd0); + model.identifyInputsAndOutputs({opnd0}, {opnd1}); model.finish(); ASSERT_TRUE(model.isValid()); - // Only the V1_2 device can handle any operations in the model. + // Only the V1_3 device can handle any operations in the model. // No need to compare the original model to the model from the plan -- we // didn't actually do any partitioning. const auto devices = makeDevices({{"V1_0", 0.6, HalVersion::V1_0, ~0U}, {"V1_1", 0.7, HalVersion::V1_1, ~0U, ~0U}, - {"V1_2", 0.8, HalVersion::V1_2, ~0U, ~0U, ~0U}}); + {"V1_2", 0.8, HalVersion::V1_2, ~0U, ~0U, ~0U}, + {"V1_3", 0.9, HalVersion::V1_3, ~0U, ~0U, ~0U, ~0U}}); ExecutionPlan plan; ASSERT_EQ(model.partitionTheWork(devices, ExecutePreference::PREFER_LOW_POWER, ExecutePriority::DEFAULT, {}, &plan), ANEURALNETWORKS_NO_ERROR); ASSERT_EQ(plan.forTest_getKind(), ExecutionPlan::Kind::SIMPLE); ASSERT_NE(plan.forTest_simpleGetDevice().get(), nullptr); - ASSERT_EQ(plan.forTest_simpleGetDevice()->getName(), "V1_2"); + ASSERT_EQ(plan.forTest_simpleGetDevice()->getName(), "V1_3"); } TEST_F(PartitioningTest, Cpu) { |