diff options
author | David Gross <dgross@google.com> | 2018-06-28 15:07:59 -0700 |
---|---|---|
committer | David Gross <dgross@google.com> | 2018-06-29 17:44:26 -0700 |
commit | a183646e0b227bd59eb0fdc64b814d9c3bb2260d (patch) | |
tree | 1a1a2ac4ec09a442d27bd7725347ae53c857e911 | |
parent | 1d5d34618a9c4d8340eb4f6d597fa84563371d46 (diff) | |
download | eigen-a183646e0b227bd59eb0fdc64b814d9c3bb2260d.tar.gz |
Fix for HDRNet producing nondeterministic results.
Eigen reads and writes volatile scalars to allow thread A to communicate
to thread B that data written by thread A is available for thread B to
read. However, volatile on Android doesn't guarantee ordered accesses,
so it's not certain that after thread B sees the volatile write from
thread A that data written by thread A is actually visible to thread B.
The solution is to replace those volatile scalars with std::atomic<>.
There might be an alternative with better performance (such as imposing
looser ordering constraints than the default std::atomic<> ordering
constraints).
Bug: 109953668
Test: NeuralNetworksTest_static
Test: adb shell am instrument -e class \
"com.example.android.nn.benchmark.NNTest#testNNAPI11Seconds[hdrnet_float]" \
-w com.example.android.nn.benchmark/android.support.test.runner.AndroidJUnitRunner
# test device (including neural networks runtime) is aosp_walleye-userdebug
# bench is from goog/master
# also ran with mobilenet_float in place of hdrnet_float
# could not reproduce problem in this environment; reproduced on
# an internal branch on unreleased hardware
Change-Id: Idea6e4b0a3db168dce5915c1beacb643267f3ec6
-rw-r--r-- | Eigen/src/Core/products/GeneralMatrixMatrix.h | 1 | ||||
-rw-r--r-- | Eigen/src/Core/products/Parallelizer.h | 6 |
2 files changed, 4 insertions, 3 deletions
diff --git a/Eigen/src/Core/products/GeneralMatrixMatrix.h b/Eigen/src/Core/products/GeneralMatrixMatrix.h index 6440e1d09..41cfb0e03 100644 --- a/Eigen/src/Core/products/GeneralMatrixMatrix.h +++ b/Eigen/src/Core/products/GeneralMatrixMatrix.h @@ -146,7 +146,6 @@ static void run(Index rows, Index cols, Index depth, // Release all the sub blocks A'_i of A' for the current thread, // i.e., we simply decrement the number of users by 1 for(Index i=0; i<threads; ++i) - #pragma omp atomic info[i].users -= 1; } } diff --git a/Eigen/src/Core/products/Parallelizer.h b/Eigen/src/Core/products/Parallelizer.h index c2f084c82..c0ddc0c06 100644 --- a/Eigen/src/Core/products/Parallelizer.h +++ b/Eigen/src/Core/products/Parallelizer.h @@ -10,6 +10,8 @@ #ifndef EIGEN_PARALLELIZER_H #define EIGEN_PARALLELIZER_H +#include <atomic> + namespace Eigen { namespace internal { @@ -75,8 +77,8 @@ template<typename Index> struct GemmParallelInfo { GemmParallelInfo() : sync(-1), users(0), lhs_start(0), lhs_length(0) {} - Index volatile sync; - int volatile users; + std::atomic<Index> sync; + std::atomic<int> users; Index lhs_start; Index lhs_length; |