aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Gross <dgross@google.com>2018-06-28 15:07:59 -0700
committerDavid Gross <dgross@google.com>2018-06-29 17:44:26 -0700
commita183646e0b227bd59eb0fdc64b814d9c3bb2260d (patch)
tree1a1a2ac4ec09a442d27bd7725347ae53c857e911
parent1d5d34618a9c4d8340eb4f6d597fa84563371d46 (diff)
downloadeigen-a183646e0b227bd59eb0fdc64b814d9c3bb2260d.tar.gz
Fix for HDRNet producing nondeterministic results.
Eigen reads and writes volatile scalars to allow thread A to communicate to thread B that data written by thread A is available for thread B to read. However, volatile on Android doesn't guarantee ordered accesses, so it's not certain that after thread B sees the volatile write from thread A that data written by thread A is actually visible to thread B. The solution is to replace those volatile scalars with std::atomic<>. There might be an alternative with better performance (such as imposing looser ordering constraints than the default std::atomic<> ordering constraints). Bug: 109953668 Test: NeuralNetworksTest_static Test: adb shell am instrument -e class \ "com.example.android.nn.benchmark.NNTest#testNNAPI11Seconds[hdrnet_float]" \ -w com.example.android.nn.benchmark/android.support.test.runner.AndroidJUnitRunner # test device (including neural networks runtime) is aosp_walleye-userdebug # bench is from goog/master # also ran with mobilenet_float in place of hdrnet_float # could not reproduce problem in this environment; reproduced on # an internal branch on unreleased hardware Change-Id: Idea6e4b0a3db168dce5915c1beacb643267f3ec6
-rw-r--r--Eigen/src/Core/products/GeneralMatrixMatrix.h1
-rw-r--r--Eigen/src/Core/products/Parallelizer.h6
2 files changed, 4 insertions, 3 deletions
diff --git a/Eigen/src/Core/products/GeneralMatrixMatrix.h b/Eigen/src/Core/products/GeneralMatrixMatrix.h
index 6440e1d09..41cfb0e03 100644
--- a/Eigen/src/Core/products/GeneralMatrixMatrix.h
+++ b/Eigen/src/Core/products/GeneralMatrixMatrix.h
@@ -146,7 +146,6 @@ static void run(Index rows, Index cols, Index depth,
// Release all the sub blocks A'_i of A' for the current thread,
// i.e., we simply decrement the number of users by 1
for(Index i=0; i<threads; ++i)
- #pragma omp atomic
info[i].users -= 1;
}
}
diff --git a/Eigen/src/Core/products/Parallelizer.h b/Eigen/src/Core/products/Parallelizer.h
index c2f084c82..c0ddc0c06 100644
--- a/Eigen/src/Core/products/Parallelizer.h
+++ b/Eigen/src/Core/products/Parallelizer.h
@@ -10,6 +10,8 @@
#ifndef EIGEN_PARALLELIZER_H
#define EIGEN_PARALLELIZER_H
+#include <atomic>
+
namespace Eigen {
namespace internal {
@@ -75,8 +77,8 @@ template<typename Index> struct GemmParallelInfo
{
GemmParallelInfo() : sync(-1), users(0), lhs_start(0), lhs_length(0) {}
- Index volatile sync;
- int volatile users;
+ std::atomic<Index> sync;
+ std::atomic<int> users;
Index lhs_start;
Index lhs_length;