aboutsummaryrefslogtreecommitdiff
path: root/Eigen/src/Core/products/Parallelizer.h
diff options
context:
space:
mode:
Diffstat (limited to 'Eigen/src/Core/products/Parallelizer.h')
-rw-r--r--Eigen/src/Core/products/Parallelizer.h25
1 files changed, 20 insertions, 5 deletions
diff --git a/Eigen/src/Core/products/Parallelizer.h b/Eigen/src/Core/products/Parallelizer.h
index c0ddc0c06..8f91879e4 100644
--- a/Eigen/src/Core/products/Parallelizer.h
+++ b/Eigen/src/Core/products/Parallelizer.h
@@ -10,7 +10,9 @@
#ifndef EIGEN_PARALLELIZER_H
#define EIGEN_PARALLELIZER_H
+#if EIGEN_HAS_CXX11_ATOMIC
#include <atomic>
+#endif
namespace Eigen {
@@ -19,7 +21,8 @@ namespace internal {
/** \internal */
inline void manage_multi_threading(Action action, int* v)
{
- static EIGEN_UNUSED int m_maxThreads = -1;
+ static int m_maxThreads = -1;
+ EIGEN_UNUSED_VARIABLE(m_maxThreads)
if(action==SetAction)
{
@@ -77,8 +80,17 @@ template<typename Index> struct GemmParallelInfo
{
GemmParallelInfo() : sync(-1), users(0), lhs_start(0), lhs_length(0) {}
+ // volatile is not enough on all architectures (see bug 1572)
+ // to guarantee that when thread A says to thread B that it is
+ // done with packing a block, then all writes have been really
+ // carried out... C++11 memory model+atomic guarantees this.
+#if EIGEN_HAS_CXX11_ATOMIC
std::atomic<Index> sync;
std::atomic<int> users;
+#else
+ Index volatile sync;
+ int volatile users;
+#endif
Index lhs_start;
Index lhs_length;
@@ -89,11 +101,14 @@ void parallelize_gemm(const Functor& func, Index rows, Index cols, Index depth,
{
// TODO when EIGEN_USE_BLAS is defined,
// we should still enable OMP for other scalar types
-#if !(defined (EIGEN_HAS_OPENMP)) || defined (EIGEN_USE_BLAS)
+ // Without C++11, we have to disable GEMM's parallelization on
+ // non x86 architectures because there volatile is not enough for our purpose.
+ // See bug 1572.
+#if (! defined(EIGEN_HAS_OPENMP)) || defined(EIGEN_USE_BLAS) || ((!EIGEN_HAS_CXX11_ATOMIC) && !(EIGEN_ARCH_i386_OR_x86_64))
// FIXME the transpose variable is only needed to properly split
// the matrix product when multithreading is enabled. This is a temporary
// fix to support row-major destination matrices. This whole
- // parallelizer mechanism has to be redisigned anyway.
+ // parallelizer mechanism has to be redesigned anyway.
EIGEN_UNUSED_VARIABLE(depth);
EIGEN_UNUSED_VARIABLE(transpose);
func(0,rows, 0,cols);
@@ -114,12 +129,12 @@ void parallelize_gemm(const Functor& func, Index rows, Index cols, Index depth,
double work = static_cast<double>(rows) * static_cast<double>(cols) *
static_cast<double>(depth);
double kMinTaskSize = 50000; // FIXME improve this heuristic.
- pb_max_threads = std::max<Index>(1, std::min<Index>(pb_max_threads, work / kMinTaskSize));
+ pb_max_threads = std::max<Index>(1, std::min<Index>(pb_max_threads, static_cast<Index>( work / kMinTaskSize ) ));
// compute the number of threads we are going to use
Index threads = std::min<Index>(nbThreads(), pb_max_threads);
- // if multi-threading is explicitely disabled, not useful, or if we already are in a parallel session,
+ // if multi-threading is explicitly disabled, not useful, or if we already are in a parallel session,
// then abort multi-threading
// FIXME omp_get_num_threads()>1 only works for openmp, what if the user does not use openmp?
if((!Condition) || (threads==1) || (omp_get_num_threads()>1))