summaryrefslogtreecommitdiff
path: root/nn
diff options
context:
space:
mode:
authorMika Raento <mikie@google.com>2018-06-04 16:43:25 +0100
committerMika Raento <mikie@google.com>2018-06-04 17:37:14 +0100
commit877baf0d0b19e97bfc9a55c90c2d61df7ffb6025 (patch)
treef16e7759b80b9a3e2a2b821e999a6a55f795b9cc /nn
parenta7ca183f502c1de952e5783f1144380970d86378 (diff)
downloadml-877baf0d0b19e97bfc9a55c90c2d61df7ffb6025.tar.gz
Openmp blocktime 1->20ms to keep cores enabled
With 1ms blocktime the benchmark app shows 3x performance degradation on Pixel 2 in float benchmarks (against no blocktime setting). In testing, 10ms was enough to give results comparable to no setting. 20ms was chosen as to have a 2x margin of safety while not keeping the threads busywaiting unnecessarily long. Bug: 109645291 Test: NeuralNetworksTest_static/*Openmp* Test: Run benchmark app multiple times Merged-In: Id2a909f6aefdbe29dd746019c2c9541e31d8c754 (cherry picked from commit fed420cab2704afb2beb69577dbe75392954a57e) Change-Id: Ie138b11c6713c10489c3aa7e946ac2bf47a3e87b
Diffstat (limited to 'nn')
-rw-r--r--nn/common/CpuExecutor.cpp2
-rw-r--r--nn/common/include/CpuExecutor.h6
-rw-r--r--nn/runtime/test/TestOpenmpSettings.cpp14
3 files changed, 12 insertions, 10 deletions
diff --git a/nn/common/CpuExecutor.cpp b/nn/common/CpuExecutor.cpp
index 0c6219308..8f8706776 100644
--- a/nn/common/CpuExecutor.cpp
+++ b/nn/common/CpuExecutor.cpp
@@ -1536,7 +1536,7 @@ int CpuExecutor::executeOperation(const Operation& operation) {
ScopedOpenmpSettings::ScopedOpenmpSettings() {
mBlocktimeInitial = kmp_get_blocktime();
- kmp_set_blocktime(1); // ms
+ kmp_set_blocktime(20); // ms, see b/109645291
#if NNAPI_LIMIT_CPU_THREADS
// Code not yet enabled. Choosing the number of threads to be based on
diff --git a/nn/common/include/CpuExecutor.h b/nn/common/include/CpuExecutor.h
index 64a46b65f..78b8910ea 100644
--- a/nn/common/include/CpuExecutor.h
+++ b/nn/common/include/CpuExecutor.h
@@ -142,9 +142,9 @@ private:
//
// Currently sets a low blocktime: the time OpenMP threads busy-wait for more
// work before going to sleep. See b/79159165, https://reviews.llvm.org/D18577.
-// The default is 200ms, we set to 1ms here. This should allow for the threads
-// to not sleep before the next operation, but release CPU to other work
-// quickly.
+// The default is 200ms, we set to 20ms here, see b/109645291. This keeps the
+// cores enabled throughout inference computation without too much extra power
+// consumption afterwards.
//
// The OpenMP settings are thread-local (applying only to worker threads formed
// from that thread), see https://software.intel.com/en-us/node/522688 and
diff --git a/nn/runtime/test/TestOpenmpSettings.cpp b/nn/runtime/test/TestOpenmpSettings.cpp
index 549473b46..59a794224 100644
--- a/nn/runtime/test/TestOpenmpSettings.cpp
+++ b/nn/runtime/test/TestOpenmpSettings.cpp
@@ -38,26 +38,28 @@ protected:
ASSERT_EQ(blocktimeRestored, kOpenmpDefaultBlockTime);
}
static const int kOpenmpDefaultBlockTime;
+ static const int kPreferredBlockTime;
};
const int OpenmpSettingsTest::kOpenmpDefaultBlockTime = 200;
+const int OpenmpSettingsTest::kPreferredBlockTime = 20;
using ::android::nn::ScopedOpenmpSettings;
-TEST_F(OpenmpSettingsTest, Test1) {
+TEST_F(OpenmpSettingsTest, TestkPreferredBlockTime) {
ScopedOpenmpSettings s;
const int blocktimeSet = kmp_get_blocktime();
- ASSERT_EQ(blocktimeSet, 1);
+ ASSERT_EQ(blocktimeSet, kPreferredBlockTime);
}
TEST_F(OpenmpSettingsTest, Test2) {
ScopedOpenmpSettings s1;
const int blocktimeSet1 = kmp_get_blocktime();
- ASSERT_EQ(blocktimeSet1, 1);
+ ASSERT_EQ(blocktimeSet1, kPreferredBlockTime);
ScopedOpenmpSettings s2;
const int blocktimeSet2 = kmp_get_blocktime();
- ASSERT_EQ(blocktimeSet2, 1);
+ ASSERT_EQ(blocktimeSet2, kPreferredBlockTime);
}
TEST_F(OpenmpSettingsTest, TestThreaded) {
@@ -74,12 +76,12 @@ TEST_F(OpenmpSettingsTest, TestThreaded) {
ScopedOpenmpSettings s;
const int blocktimeSet2 = kmp_get_blocktime();
- ASSERT_EQ(blocktimeSet2, 1);
+ ASSERT_EQ(blocktimeSet2, kPreferredBlockTime);
usleep(sleepFor);
const int blocktimeSet3 = kmp_get_blocktime();
- ASSERT_EQ(blocktimeSet3, 1);
+ ASSERT_EQ(blocktimeSet3, kPreferredBlockTime);
}));
}
std::for_each(threads.begin(), threads.end(), [](std::thread& t) {