Compute RGG golden results from FP model.

Before this CL, the golden results in RGG tests are computed from nnapi-reference with the actual data type. This CL changes the logic to compute the golden results from an equivalent float32 model to avoid bias if possible. Currently only the golden results of single-operation models are computed from fp32 models. This is because there is no multi-operation quant model in RGG tests yet. Fixes: 150805665 Test: NNT_static Change-Id: I4e268d3aadeebdcdae1d7010a4d0564f45b11b82
author: Xusong Wang <xusongw@google.com> 2020-03-05 09:44:20 -0800
committer: Xusong Wang <xusongw@google.com> 2020-04-07 09:53:20 -0700
commit: a868d3a0c4b703216e93477fa32e477394093a0b (patch)
tree: 0db99b0095c2e8c35d8c0e7f21f0ee89e8c25960 /nn/runtime/test/fuzzing
parent: cd321da9fd9c50ff3bea39857945bcf30809f262 (diff)
download: ml-a868d3a0c4b703216e93477fa32e477394093a0b.tar.gz
1 files changed, 59 insertions, 31 deletions
diff --git a/nn/runtime/test/fuzzing/TestRandomGraph.cpp b/nn/runtime/test/fuzzing/TestRandomGraph.cpp
index 7aece223b..99d2aff80 100644
--- a/nn/runtime/test/fuzzing/TestRandomGraph.cpp
+++ b/nn/runtime/test/fuzzing/TestRandomGraph.cpp
@@ -223,13 +223,54 @@ class RandomGraphTest : public ::testing::TestWithParam<uint32_t> {
         return false;
     }
 
+    // Compute the golden output results of the test model on nnapi-reference. If possible, the
+    // golden results will be computed from an equivalent float32 model to avoid bias avoid bias
+    // from quantized CPU implementation.
+    void computeGoldenResults() {
+        SCOPED_TRACE("computeGoldenResults");
+
+        // Convert the test model to an equivalent float32 model if possible.
+        auto fpModel = convertToFloat32Model(mTestModel);
+        const TestModel& goldenModel = fpModel.has_value() ? fpModel.value() : mTestModel;
+
+        // Create model.
+        generated_tests::GeneratedModel model;
+        generated_tests::createModel(goldenModel, &model);
+        ASSERT_TRUE(model.isValid());
+        ASSERT_EQ(model.finish(), Result::NO_ERROR);
+
+        // Create compilation for nnapi-reference.
+        ASSERT_TRUE(mDevices.find(kRefDeviceName) != mDevices.end());
+        const auto refDevice = mDevices[kRefDeviceName];
+        test_wrapper::Compilation compilation;
+        ASSERT_EQ(compilation.createForDevice(&model, refDevice), Result::NO_ERROR);
+        ASSERT_EQ(compilation.finish(), Result::NO_ERROR);
+
+        // Create request.
+        test_wrapper::Execution execution(&compilation);
+        std::vector<TestBuffer> outputs;
+        generated_tests::createRequest(goldenModel, &execution, &outputs);
+
+        // Compute result.
+        ASSERT_EQ(execution.compute(), Result::NO_ERROR);
+
+        if (fpModel.has_value()) {
+            // Quantize the execution results as golden values.
+            setExpectedOutputsFromFloat32Results(outputs, &mTestModel);
+        } else {
+            for (uint32_t i = 0; i < outputs.size(); i++) {
+                auto outputIndex = mTestModel.main.outputIndexes[i];
+                mTestModel.main.operands[outputIndex].data = outputs[i];
+            }
+        }
+    }
+
     // Compile and execute the generated graph on a device selected by name.
     void computeAndVerifyResultsForDevice(const test_wrapper::Model* model, uint32_t numOps,
                                           const std::string& name) {
         SCOPED_TRACE("Device: " + name);
         ASSERT_TRUE(mDevices.find(name) != mDevices.end());
         const auto device = mDevices[name];
-        bool isRef = name.compare(kRefDeviceName) == 0;
 
         // Check if the device fully supports the graph.
         constexpr int kMaxNumberOperations = 1000;
@@ -239,8 +280,6 @@ class RandomGraphTest : public ::testing::TestWithParam<uint32_t> {
                                                                         1, supported),
                   ANEURALNETWORKS_NO_ERROR);
         if (!std::all_of(supported, supported + numOps, [](bool v) { return v; })) {
-            // The reference device should always support all operations.
-            ASSERT_FALSE(isRef);
             std::cout << "[          ]   SKIP: " << name << " does not support the graph.\n";
             return;
         }
@@ -251,7 +290,7 @@ class RandomGraphTest : public ::testing::TestWithParam<uint32_t> {
         int64_t featureLevel;
         ASSERT_EQ(ANeuralNetworksDevice_getFeatureLevel(device, &featureLevel),
                   ANEURALNETWORKS_NO_ERROR);
-        if (!isRef && shouldSkipTest(featureLevel)) return;
+        if (shouldSkipTest(featureLevel)) return;
 
         // Create compilation for device.
         test_wrapper::Compilation compilation;
@@ -261,7 +300,6 @@ class RandomGraphTest : public ::testing::TestWithParam<uint32_t> {
         // is supported, but model is too big (too many operations and/or too-large constants) for
         // device.
         if (compileReturn == Result::OP_FAILED) {
-            ASSERT_FALSE(isRef);
             std::cout << "[          ]   SKIP: " << name << " failed at compilation step.\n";
             return;
         }
@@ -278,24 +316,16 @@ class RandomGraphTest : public ::testing::TestWithParam<uint32_t> {
         // still fail, e.g. there may be operand shapes that are unknown until execution time, and
         // at execution time turn out to be too big.
         if (executeReturn == Result::OP_FAILED) {
-            ASSERT_FALSE(isRef);
             std::cout << "[          ]   SKIP: " << name << " failed at execution step.\n";
             return;
         }
         ASSERT_EQ(executeReturn, Result::NO_ERROR);
 
-        // Record the execution results as golden values.
-        if (isRef) {
-            for (uint32_t i = 0; i < outputs.size(); i++) {
-                auto outputIndex = mTestModel.main.outputIndexes[i];
-                mTestModel.main.operands[outputIndex].data = outputs[i];
-            }
-        }
-
-        if (featureLevel >= __ANDROID_API_Q__ && !isRef) {
+        if (featureLevel >= __ANDROID_API_Q__) {
             checkResults(mTestModel, outputs, mCriteria);
         }
 
+        // Dump test results on failure for debugging.
         if (::testing::Test::HasFailure() || mDumpSpec) {
             dumpTestResults(name, outputs);
         }
@@ -303,10 +333,12 @@ class RandomGraphTest : public ::testing::TestWithParam<uint32_t> {
 
     // Compile and execute the generated graph normally (i.e., allow runtime to
     // distribute across devices).
-    void computeAndVerifyResults(const test_wrapper::Model* model, bool shouldCheckResults) {
+    void computeAndVerifyResults(const std::string& name, const test_wrapper::Model* model,
+                                 bool shouldCheckResults) {
         // Because we're not using the introspection/control API, the CpuDevice
         // is available as a fallback, and hence we assume that compilation and
         // execution will succeed.
+        SCOPED_TRACE(name);
 
         // Create compilation.
         test_wrapper::Compilation compilation(model);
@@ -322,6 +354,11 @@ class RandomGraphTest : public ::testing::TestWithParam<uint32_t> {
         if (shouldCheckResults) {
             checkResults(mTestModel, outputs, mCriteria);
         }
+
+        // Dump test results on failure for debugging.
+        if (::testing::Test::HasFailure() || mDumpSpec) {
+            dumpTestResults(name, outputs);
+        }
     }
 
     // Main test entrance.
@@ -338,22 +375,19 @@ class RandomGraphTest : public ::testing::TestWithParam<uint32_t> {
         ASSERT_TRUE(model.isValid());
         ASSERT_EQ(model.finish(), Result::NO_ERROR);
 
-        // Compute reference result.
-        computeAndVerifyResultsForDevice(&model, numOperations, kRefDeviceName);
+        // Compute reference results.
+        computeGoldenResults();
 
         // Compute on each available device.
         for (auto& pair : mDevices) {
-            // Skip the nnapi reference device.
-            if (pair.first.compare(kRefDeviceName) == 0) continue;
             computeAndVerifyResultsForDevice(&model, numOperations, pair.first);
         }
 
         if (numOperations > 1) {
             if (!shouldSkipTest(mStandardDevicesFeatureLevel)) {
-                // Compute normally (i.e., allow runtime to distribute across
-                // devices).
-                SCOPED_TRACE("Compute normally");
-                computeAndVerifyResults(&model, mStandardDevicesFeatureLevel >= __ANDROID_API_Q__);
+                // Compute normally (i.e., allow runtime to distribute across devices).
+                computeAndVerifyResults("Compute normally", &model,
+                                        mStandardDevicesFeatureLevel >= __ANDROID_API_Q__);
             }
 
 #ifndef NNTEST_CTS
@@ -364,9 +398,8 @@ class RandomGraphTest : public ::testing::TestWithParam<uint32_t> {
                 // though some are of feature level < __ANDROID_API_Q__: In this
                 // case, we don't take feature level as an indication of
                 // reliability, as we do with real devices.
-                SCOPED_TRACE("Compute across synthetic devices");
                 DeviceManager::get()->forTest_setDevices(mSyntheticDevices);
-                computeAndVerifyResults(&model, true);
+                computeAndVerifyResults("Compute across synthetic devices", &model, true);
                 DeviceManager::get()->forTest_setDevices(mStandardDevices);
             }
 #endif
@@ -778,13 +811,8 @@ TEST_RANDOM_GRAPH_WITH_DATA_TYPE_AND_RANK(TENSOR_BOOL8, 3);
 TEST_RANDOM_GRAPH_WITH_DATA_TYPE_AND_RANK(TENSOR_BOOL8, 2);
 TEST_RANDOM_GRAPH_WITH_DATA_TYPE_AND_RANK(TENSOR_BOOL8, 1);
 
-#ifdef NNTEST_CTS
 INSTANTIATE_TEST_CASE_P(TestRandomGraph, SingleOperationTest, ::testing::Range(0u, 50u));
 INSTANTIATE_TEST_CASE_P(TestRandomGraph, RandomGraphTest, ::testing::Range(0u, 50u));
-#else
-INSTANTIATE_TEST_CASE_P(TestRandomGraph, SingleOperationTest, ::testing::Range(0u, 100u));
-INSTANTIATE_TEST_CASE_P(TestRandomGraph, RandomGraphTest, ::testing::Range(0u, 100u));
-#endif
 
 }  // namespace fuzzing_test
 }  // namespace nn
author	Xusong Wang <xusongw@google.com>	2020-03-05 09:44:20 -0800
committer	Xusong Wang <xusongw@google.com>	2020-04-07 09:53:20 -0700
commit	a868d3a0c4b703216e93477fa32e477394093a0b (patch)
tree	0db99b0095c2e8c35d8c0e7f21f0ee89e8c25960 /nn/runtime/test/fuzzing
parent	cd321da9fd9c50ff3bea39857945bcf30809f262 (diff)
download	ml-a868d3a0c4b703216e93477fa32e477394093a0b.tar.gz