aboutsummaryrefslogtreecommitdiff
path: root/Eigen/src/SparseCore/SparseDenseProduct.h
diff options
context:
space:
mode:
Diffstat (limited to 'Eigen/src/SparseCore/SparseDenseProduct.h')
-rw-r--r--Eigen/src/SparseCore/SparseDenseProduct.h38
1 files changed, 30 insertions, 8 deletions
diff --git a/Eigen/src/SparseCore/SparseDenseProduct.h b/Eigen/src/SparseCore/SparseDenseProduct.h
index 0547db596..f005a18a1 100644
--- a/Eigen/src/SparseCore/SparseDenseProduct.h
+++ b/Eigen/src/SparseCore/SparseDenseProduct.h
@@ -88,10 +88,11 @@ struct sparse_time_dense_product_impl<SparseLhsType,DenseRhsType,DenseResType, A
typedef typename internal::remove_all<SparseLhsType>::type Lhs;
typedef typename internal::remove_all<DenseRhsType>::type Rhs;
typedef typename internal::remove_all<DenseResType>::type Res;
- typedef typename evaluator<Lhs>::InnerIterator LhsInnerIterator;
+ typedef evaluator<Lhs> LhsEval;
+ typedef typename LhsEval::InnerIterator LhsInnerIterator;
static void run(const SparseLhsType& lhs, const DenseRhsType& rhs, DenseResType& res, const AlphaType& alpha)
{
- evaluator<Lhs> lhsEval(lhs);
+ LhsEval lhsEval(lhs);
for(Index c=0; c<rhs.cols(); ++c)
{
for(Index j=0; j<lhs.outerSize(); ++j)
@@ -111,17 +112,38 @@ struct sparse_time_dense_product_impl<SparseLhsType,DenseRhsType,DenseResType, t
typedef typename internal::remove_all<SparseLhsType>::type Lhs;
typedef typename internal::remove_all<DenseRhsType>::type Rhs;
typedef typename internal::remove_all<DenseResType>::type Res;
- typedef typename evaluator<Lhs>::InnerIterator LhsInnerIterator;
+ typedef evaluator<Lhs> LhsEval;
+ typedef typename LhsEval::InnerIterator LhsInnerIterator;
static void run(const SparseLhsType& lhs, const DenseRhsType& rhs, DenseResType& res, const typename Res::Scalar& alpha)
{
- evaluator<Lhs> lhsEval(lhs);
- for(Index j=0; j<lhs.outerSize(); ++j)
+ Index n = lhs.rows();
+ LhsEval lhsEval(lhs);
+
+#ifdef EIGEN_HAS_OPENMP
+ Eigen::initParallel();
+ Index threads = Eigen::nbThreads();
+ // This 20000 threshold has been found experimentally on 2D and 3D Poisson problems.
+ // It basically represents the minimal amount of work to be done to be worth it.
+ if(threads>1 && lhsEval.nonZerosEstimate()*rhs.cols() > 20000)
{
- typename Res::RowXpr res_j(res.row(j));
- for(LhsInnerIterator it(lhsEval,j); it ;++it)
- res_j += (alpha*it.value()) * rhs.row(it.index());
+ #pragma omp parallel for schedule(dynamic,(n+threads*4-1)/(threads*4)) num_threads(threads)
+ for(Index i=0; i<n; ++i)
+ processRow(lhsEval,rhs,res,alpha,i);
+ }
+ else
+#endif
+ {
+ for(Index i=0; i<n; ++i)
+ processRow(lhsEval, rhs, res, alpha, i);
}
}
+
+ static void processRow(const LhsEval& lhsEval, const DenseRhsType& rhs, Res& res, const typename Res::Scalar& alpha, Index i)
+ {
+ typename Res::RowXpr res_i(res.row(i));
+ for(LhsInnerIterator it(lhsEval,i); it ;++it)
+ res_i += (alpha*it.value()) * rhs.row(it.index());
+ }
};
template<typename SparseLhsType, typename DenseRhsType, typename DenseResType>