diff options
author | Yi Kong <yikong@google.com> | 2022-02-25 16:32:14 +0800 |
---|---|---|
committer | Yi Kong <yikong@google.com> | 2022-02-25 15:08:55 +0000 |
commit | 2aab794c004027d008d6b0b64165bf1961d5d2bb (patch) | |
tree | 83bb8f19c67bcafdb2ca4a98414af1b17392ec36 /Eigen/src/SparseCore/SparseDenseProduct.h | |
parent | ca5aa72016f062fd0712bcb86370478de332bca3 (diff) | |
download | eigen-2aab794c004027d008d6b0b64165bf1961d5d2bb.tar.gz |
Upgrade eigen to 3.4.0
Steps:
* Removed common files between Android copy and the matching upstream copy
* Obtained latest upstream tarball (see README.version)
* Extracted over the directory
Bug: 148287349
Test: presubmit
Change-Id: Iee2744719075fdf000b315e973645923da766111
Diffstat (limited to 'Eigen/src/SparseCore/SparseDenseProduct.h')
-rw-r--r-- | Eigen/src/SparseCore/SparseDenseProduct.h | 38 |
1 files changed, 30 insertions, 8 deletions
diff --git a/Eigen/src/SparseCore/SparseDenseProduct.h b/Eigen/src/SparseCore/SparseDenseProduct.h index 0547db596..f005a18a1 100644 --- a/Eigen/src/SparseCore/SparseDenseProduct.h +++ b/Eigen/src/SparseCore/SparseDenseProduct.h @@ -88,10 +88,11 @@ struct sparse_time_dense_product_impl<SparseLhsType,DenseRhsType,DenseResType, A typedef typename internal::remove_all<SparseLhsType>::type Lhs; typedef typename internal::remove_all<DenseRhsType>::type Rhs; typedef typename internal::remove_all<DenseResType>::type Res; - typedef typename evaluator<Lhs>::InnerIterator LhsInnerIterator; + typedef evaluator<Lhs> LhsEval; + typedef typename LhsEval::InnerIterator LhsInnerIterator; static void run(const SparseLhsType& lhs, const DenseRhsType& rhs, DenseResType& res, const AlphaType& alpha) { - evaluator<Lhs> lhsEval(lhs); + LhsEval lhsEval(lhs); for(Index c=0; c<rhs.cols(); ++c) { for(Index j=0; j<lhs.outerSize(); ++j) @@ -111,17 +112,38 @@ struct sparse_time_dense_product_impl<SparseLhsType,DenseRhsType,DenseResType, t typedef typename internal::remove_all<SparseLhsType>::type Lhs; typedef typename internal::remove_all<DenseRhsType>::type Rhs; typedef typename internal::remove_all<DenseResType>::type Res; - typedef typename evaluator<Lhs>::InnerIterator LhsInnerIterator; + typedef evaluator<Lhs> LhsEval; + typedef typename LhsEval::InnerIterator LhsInnerIterator; static void run(const SparseLhsType& lhs, const DenseRhsType& rhs, DenseResType& res, const typename Res::Scalar& alpha) { - evaluator<Lhs> lhsEval(lhs); - for(Index j=0; j<lhs.outerSize(); ++j) + Index n = lhs.rows(); + LhsEval lhsEval(lhs); + +#ifdef EIGEN_HAS_OPENMP + Eigen::initParallel(); + Index threads = Eigen::nbThreads(); + // This 20000 threshold has been found experimentally on 2D and 3D Poisson problems. + // It basically represents the minimal amount of work to be done to be worth it. + if(threads>1 && lhsEval.nonZerosEstimate()*rhs.cols() > 20000) { - typename Res::RowXpr res_j(res.row(j)); - for(LhsInnerIterator it(lhsEval,j); it ;++it) - res_j += (alpha*it.value()) * rhs.row(it.index()); + #pragma omp parallel for schedule(dynamic,(n+threads*4-1)/(threads*4)) num_threads(threads) + for(Index i=0; i<n; ++i) + processRow(lhsEval,rhs,res,alpha,i); + } + else +#endif + { + for(Index i=0; i<n; ++i) + processRow(lhsEval, rhs, res, alpha, i); } } + + static void processRow(const LhsEval& lhsEval, const DenseRhsType& rhs, Res& res, const typename Res::Scalar& alpha, Index i) + { + typename Res::RowXpr res_i(res.row(i)); + for(LhsInnerIterator it(lhsEval,i); it ;++it) + res_i += (alpha*it.value()) * rhs.row(it.index()); + } }; template<typename SparseLhsType, typename DenseRhsType, typename DenseResType> |