diff options
author | Mika Raento <mikie@google.com> | 2018-06-04 16:43:25 +0100 |
---|---|---|
committer | Mika Raento <mikie@google.com> | 2018-06-04 17:37:14 +0100 |
commit | 877baf0d0b19e97bfc9a55c90c2d61df7ffb6025 (patch) | |
tree | f16e7759b80b9a3e2a2b821e999a6a55f795b9cc /nn | |
parent | a7ca183f502c1de952e5783f1144380970d86378 (diff) | |
download | ml-877baf0d0b19e97bfc9a55c90c2d61df7ffb6025.tar.gz |
Openmp blocktime 1->20ms to keep cores enabled
With 1ms blocktime the benchmark app shows 3x performance degradation on
Pixel 2 in float benchmarks (against no blocktime setting).
In testing, 10ms was enough to give results comparable to no setting.
20ms was chosen as to have a 2x margin of safety while not keeping the
threads busywaiting unnecessarily long.
Bug: 109645291
Test: NeuralNetworksTest_static/*Openmp*
Test: Run benchmark app multiple times
Merged-In: Id2a909f6aefdbe29dd746019c2c9541e31d8c754
(cherry picked from commit fed420cab2704afb2beb69577dbe75392954a57e)
Change-Id: Ie138b11c6713c10489c3aa7e946ac2bf47a3e87b
Diffstat (limited to 'nn')
-rw-r--r-- | nn/common/CpuExecutor.cpp | 2 | ||||
-rw-r--r-- | nn/common/include/CpuExecutor.h | 6 | ||||
-rw-r--r-- | nn/runtime/test/TestOpenmpSettings.cpp | 14 |
3 files changed, 12 insertions, 10 deletions
diff --git a/nn/common/CpuExecutor.cpp b/nn/common/CpuExecutor.cpp index 0c6219308..8f8706776 100644 --- a/nn/common/CpuExecutor.cpp +++ b/nn/common/CpuExecutor.cpp @@ -1536,7 +1536,7 @@ int CpuExecutor::executeOperation(const Operation& operation) { ScopedOpenmpSettings::ScopedOpenmpSettings() { mBlocktimeInitial = kmp_get_blocktime(); - kmp_set_blocktime(1); // ms + kmp_set_blocktime(20); // ms, see b/109645291 #if NNAPI_LIMIT_CPU_THREADS // Code not yet enabled. Choosing the number of threads to be based on diff --git a/nn/common/include/CpuExecutor.h b/nn/common/include/CpuExecutor.h index 64a46b65f..78b8910ea 100644 --- a/nn/common/include/CpuExecutor.h +++ b/nn/common/include/CpuExecutor.h @@ -142,9 +142,9 @@ private: // // Currently sets a low blocktime: the time OpenMP threads busy-wait for more // work before going to sleep. See b/79159165, https://reviews.llvm.org/D18577. -// The default is 200ms, we set to 1ms here. This should allow for the threads -// to not sleep before the next operation, but release CPU to other work -// quickly. +// The default is 200ms, we set to 20ms here, see b/109645291. This keeps the +// cores enabled throughout inference computation without too much extra power +// consumption afterwards. // // The OpenMP settings are thread-local (applying only to worker threads formed // from that thread), see https://software.intel.com/en-us/node/522688 and diff --git a/nn/runtime/test/TestOpenmpSettings.cpp b/nn/runtime/test/TestOpenmpSettings.cpp index 549473b46..59a794224 100644 --- a/nn/runtime/test/TestOpenmpSettings.cpp +++ b/nn/runtime/test/TestOpenmpSettings.cpp @@ -38,26 +38,28 @@ protected: ASSERT_EQ(blocktimeRestored, kOpenmpDefaultBlockTime); } static const int kOpenmpDefaultBlockTime; + static const int kPreferredBlockTime; }; const int OpenmpSettingsTest::kOpenmpDefaultBlockTime = 200; +const int OpenmpSettingsTest::kPreferredBlockTime = 20; using ::android::nn::ScopedOpenmpSettings; -TEST_F(OpenmpSettingsTest, Test1) { +TEST_F(OpenmpSettingsTest, TestkPreferredBlockTime) { ScopedOpenmpSettings s; const int blocktimeSet = kmp_get_blocktime(); - ASSERT_EQ(blocktimeSet, 1); + ASSERT_EQ(blocktimeSet, kPreferredBlockTime); } TEST_F(OpenmpSettingsTest, Test2) { ScopedOpenmpSettings s1; const int blocktimeSet1 = kmp_get_blocktime(); - ASSERT_EQ(blocktimeSet1, 1); + ASSERT_EQ(blocktimeSet1, kPreferredBlockTime); ScopedOpenmpSettings s2; const int blocktimeSet2 = kmp_get_blocktime(); - ASSERT_EQ(blocktimeSet2, 1); + ASSERT_EQ(blocktimeSet2, kPreferredBlockTime); } TEST_F(OpenmpSettingsTest, TestThreaded) { @@ -74,12 +76,12 @@ TEST_F(OpenmpSettingsTest, TestThreaded) { ScopedOpenmpSettings s; const int blocktimeSet2 = kmp_get_blocktime(); - ASSERT_EQ(blocktimeSet2, 1); + ASSERT_EQ(blocktimeSet2, kPreferredBlockTime); usleep(sleepFor); const int blocktimeSet3 = kmp_get_blocktime(); - ASSERT_EQ(blocktimeSet3, 1); + ASSERT_EQ(blocktimeSet3, kPreferredBlockTime); })); } std::for_each(threads.begin(), threads.end(), [](std::thread& t) { |