aboutsummaryrefslogtreecommitdiff
path: root/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h
diff options
context:
space:
mode:
Diffstat (limited to 'Eigen/src/SparseCore/ConservativeSparseSparseProduct.h')
-rw-r--r--Eigen/src/SparseCore/ConservativeSparseSparseProduct.h216
1 files changed, 158 insertions, 58 deletions
diff --git a/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h b/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h
index 5c320e2d2..492eb0a29 100644
--- a/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h
+++ b/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h
@@ -1,7 +1,7 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
-// Copyright (C) 2008-2011 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2008-2015 Gael Guennebaud <gael.guennebaud@inria.fr>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
@@ -15,27 +15,31 @@ namespace Eigen {
namespace internal {
template<typename Lhs, typename Rhs, typename ResultType>
-static void conservative_sparse_sparse_product_impl(const Lhs& lhs, const Rhs& rhs, ResultType& res)
+static void conservative_sparse_sparse_product_impl(const Lhs& lhs, const Rhs& rhs, ResultType& res, bool sortedInsertion = false)
{
typedef typename remove_all<Lhs>::type::Scalar Scalar;
- typedef typename remove_all<Lhs>::type::Index Index;
// make sure to call innerSize/outerSize since we fake the storage order.
Index rows = lhs.innerSize();
Index cols = rhs.outerSize();
eigen_assert(lhs.outerSize() == rhs.innerSize());
-
- std::vector<bool> mask(rows,false);
- Matrix<Scalar,Dynamic,1> values(rows);
- Matrix<Index,Dynamic,1> indices(rows);
-
+
+ ei_declare_aligned_stack_constructed_variable(bool, mask, rows, 0);
+ ei_declare_aligned_stack_constructed_variable(Scalar, values, rows, 0);
+ ei_declare_aligned_stack_constructed_variable(Index, indices, rows, 0);
+
+ std::memset(mask,0,sizeof(bool)*rows);
+
+ evaluator<Lhs> lhsEval(lhs);
+ evaluator<Rhs> rhsEval(rhs);
+
// estimate the number of non zero entries
// given a rhs column containing Y non zeros, we assume that the respective Y columns
// of the lhs differs in average of one non zeros, thus the number of non zeros for
// the product of a rhs column with the lhs is X+Y where X is the average number of non zero
// per column of the lhs.
// Therefore, we have nnz(lhs*rhs) = nnz(lhs) + nnz(rhs)
- Index estimated_nnz_prod = lhs.nonZeros() + rhs.nonZeros();
+ Index estimated_nnz_prod = lhsEval.nonZerosEstimate() + rhsEval.nonZerosEstimate();
res.setZero();
res.reserve(Index(estimated_nnz_prod));
@@ -45,11 +49,11 @@ static void conservative_sparse_sparse_product_impl(const Lhs& lhs, const Rhs& r
res.startVec(j);
Index nnz = 0;
- for (typename Rhs::InnerIterator rhsIt(rhs, j); rhsIt; ++rhsIt)
+ for (typename evaluator<Rhs>::InnerIterator rhsIt(rhsEval, j); rhsIt; ++rhsIt)
{
Scalar y = rhsIt.value();
Index k = rhsIt.index();
- for (typename Lhs::InnerIterator lhsIt(lhs, k); lhsIt; ++lhsIt)
+ for (typename evaluator<Lhs>::InnerIterator lhsIt(lhsEval, k); lhsIt; ++lhsIt)
{
Index i = lhsIt.index();
Scalar x = lhsIt.value();
@@ -64,53 +68,51 @@ static void conservative_sparse_sparse_product_impl(const Lhs& lhs, const Rhs& r
values[i] += x * y;
}
}
-
- // unordered insertion
- for(Index k=0; k<nnz; ++k)
- {
- Index i = indices[k];
- res.insertBackByOuterInnerUnordered(j,i) = values[i];
- mask[i] = false;
- }
-
-#if 0
- // alternative ordered insertion code:
-
- Index t200 = rows/(log2(200)*1.39);
- Index t = (rows*100)/139;
-
- // FIXME reserve nnz non zeros
- // FIXME implement fast sort algorithms for very small nnz
- // if the result is sparse enough => use a quick sort
- // otherwise => loop through the entire vector
- // In order to avoid to perform an expensive log2 when the
- // result is clearly very sparse we use a linear bound up to 200.
- //if((nnz<200 && nnz<t200) || nnz * log2(nnz) < t)
- //res.startVec(j);
- if(true)
+ if(!sortedInsertion)
{
- if(nnz>1) std::sort(indices.data(),indices.data()+nnz);
+ // unordered insertion
for(Index k=0; k<nnz; ++k)
{
Index i = indices[k];
- res.insertBackByOuterInner(j,i) = values[i];
+ res.insertBackByOuterInnerUnordered(j,i) = values[i];
mask[i] = false;
}
}
else
{
- // dense path
- for(Index i=0; i<rows; ++i)
+ // alternative ordered insertion code:
+ const Index t200 = rows/11; // 11 == (log2(200)*1.39)
+ const Index t = (rows*100)/139;
+
+ // FIXME reserve nnz non zeros
+ // FIXME implement faster sorting algorithms for very small nnz
+ // if the result is sparse enough => use a quick sort
+ // otherwise => loop through the entire vector
+ // In order to avoid to perform an expensive log2 when the
+ // result is clearly very sparse we use a linear bound up to 200.
+ if((nnz<200 && nnz<t200) || nnz * numext::log2(int(nnz)) < t)
{
- if(mask[i])
+ if(nnz>1) std::sort(indices,indices+nnz);
+ for(Index k=0; k<nnz; ++k)
{
- mask[i] = false;
+ Index i = indices[k];
res.insertBackByOuterInner(j,i) = values[i];
+ mask[i] = false;
+ }
+ }
+ else
+ {
+ // dense path
+ for(Index i=0; i<rows; ++i)
+ {
+ if(mask[i])
+ {
+ mask[i] = false;
+ res.insertBackByOuterInner(j,i) = values[i];
+ }
}
}
}
-#endif
-
}
res.finalize();
}
@@ -134,13 +136,28 @@ struct conservative_sparse_sparse_product_selector<Lhs,Rhs,ResultType,ColMajor,C
static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)
{
- typedef SparseMatrix<typename ResultType::Scalar,RowMajor,typename ResultType::Index> RowMajorMatrix;
- typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename ResultType::Index> ColMajorMatrix;
- ColMajorMatrix resCol(lhs.rows(),rhs.cols());
- internal::conservative_sparse_sparse_product_impl<Lhs,Rhs,ColMajorMatrix>(lhs, rhs, resCol);
- // sort the non zeros:
- RowMajorMatrix resRow(resCol);
- res = resRow;
+ typedef SparseMatrix<typename ResultType::Scalar,RowMajor,typename ResultType::StorageIndex> RowMajorMatrix;
+ typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename ResultType::StorageIndex> ColMajorMatrixAux;
+ typedef typename sparse_eval<ColMajorMatrixAux,ResultType::RowsAtCompileTime,ResultType::ColsAtCompileTime,ColMajorMatrixAux::Flags>::type ColMajorMatrix;
+
+ // If the result is tall and thin (in the extreme case a column vector)
+ // then it is faster to sort the coefficients inplace instead of transposing twice.
+ // FIXME, the following heuristic is probably not very good.
+ if(lhs.rows()>rhs.cols())
+ {
+ ColMajorMatrix resCol(lhs.rows(),rhs.cols());
+ // perform sorted insertion
+ internal::conservative_sparse_sparse_product_impl<Lhs,Rhs,ColMajorMatrix>(lhs, rhs, resCol, true);
+ res = resCol.markAsRValue();
+ }
+ else
+ {
+ ColMajorMatrixAux resCol(lhs.rows(),rhs.cols());
+ // ressort to transpose to sort the entries
+ internal::conservative_sparse_sparse_product_impl<Lhs,Rhs,ColMajorMatrixAux>(lhs, rhs, resCol, false);
+ RowMajorMatrix resRow(resCol);
+ res = resRow.markAsRValue();
+ }
}
};
@@ -149,7 +166,7 @@ struct conservative_sparse_sparse_product_selector<Lhs,Rhs,ResultType,RowMajor,C
{
static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)
{
- typedef SparseMatrix<typename ResultType::Scalar,RowMajor,typename ResultType::Index> RowMajorMatrix;
+ typedef SparseMatrix<typename ResultType::Scalar,RowMajor,typename ResultType::StorageIndex> RowMajorMatrix;
RowMajorMatrix rhsRow = rhs;
RowMajorMatrix resRow(lhs.rows(), rhs.cols());
internal::conservative_sparse_sparse_product_impl<RowMajorMatrix,Lhs,RowMajorMatrix>(rhsRow, lhs, resRow);
@@ -162,7 +179,7 @@ struct conservative_sparse_sparse_product_selector<Lhs,Rhs,ResultType,ColMajor,R
{
static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)
{
- typedef SparseMatrix<typename ResultType::Scalar,RowMajor,typename ResultType::Index> RowMajorMatrix;
+ typedef SparseMatrix<typename ResultType::Scalar,RowMajor,typename ResultType::StorageIndex> RowMajorMatrix;
RowMajorMatrix lhsRow = lhs;
RowMajorMatrix resRow(lhs.rows(), rhs.cols());
internal::conservative_sparse_sparse_product_impl<Rhs,RowMajorMatrix,RowMajorMatrix>(rhs, lhsRow, resRow);
@@ -175,7 +192,7 @@ struct conservative_sparse_sparse_product_selector<Lhs,Rhs,ResultType,RowMajor,R
{
static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)
{
- typedef SparseMatrix<typename ResultType::Scalar,RowMajor,typename ResultType::Index> RowMajorMatrix;
+ typedef SparseMatrix<typename ResultType::Scalar,RowMajor,typename ResultType::StorageIndex> RowMajorMatrix;
RowMajorMatrix resRow(lhs.rows(), rhs.cols());
internal::conservative_sparse_sparse_product_impl<Rhs,Lhs,RowMajorMatrix>(rhs, lhs, resRow);
res = resRow;
@@ -190,7 +207,7 @@ struct conservative_sparse_sparse_product_selector<Lhs,Rhs,ResultType,ColMajor,C
static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)
{
- typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename ResultType::Index> ColMajorMatrix;
+ typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename ResultType::StorageIndex> ColMajorMatrix;
ColMajorMatrix resCol(lhs.rows(), rhs.cols());
internal::conservative_sparse_sparse_product_impl<Lhs,Rhs,ColMajorMatrix>(lhs, rhs, resCol);
res = resCol;
@@ -202,7 +219,7 @@ struct conservative_sparse_sparse_product_selector<Lhs,Rhs,ResultType,RowMajor,C
{
static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)
{
- typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename ResultType::Index> ColMajorMatrix;
+ typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename ResultType::StorageIndex> ColMajorMatrix;
ColMajorMatrix lhsCol = lhs;
ColMajorMatrix resCol(lhs.rows(), rhs.cols());
internal::conservative_sparse_sparse_product_impl<ColMajorMatrix,Rhs,ColMajorMatrix>(lhsCol, rhs, resCol);
@@ -215,7 +232,7 @@ struct conservative_sparse_sparse_product_selector<Lhs,Rhs,ResultType,ColMajor,R
{
static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)
{
- typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename ResultType::Index> ColMajorMatrix;
+ typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename ResultType::StorageIndex> ColMajorMatrix;
ColMajorMatrix rhsCol = rhs;
ColMajorMatrix resCol(lhs.rows(), rhs.cols());
internal::conservative_sparse_sparse_product_impl<Lhs,ColMajorMatrix,ColMajorMatrix>(lhs, rhsCol, resCol);
@@ -228,8 +245,8 @@ struct conservative_sparse_sparse_product_selector<Lhs,Rhs,ResultType,RowMajor,R
{
static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)
{
- typedef SparseMatrix<typename ResultType::Scalar,RowMajor,typename ResultType::Index> RowMajorMatrix;
- typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename ResultType::Index> ColMajorMatrix;
+ typedef SparseMatrix<typename ResultType::Scalar,RowMajor,typename ResultType::StorageIndex> RowMajorMatrix;
+ typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename ResultType::StorageIndex> ColMajorMatrix;
RowMajorMatrix resRow(lhs.rows(),rhs.cols());
internal::conservative_sparse_sparse_product_impl<Rhs,Lhs,RowMajorMatrix>(rhs, lhs, resRow);
// sort the non zeros:
@@ -240,6 +257,89 @@ struct conservative_sparse_sparse_product_selector<Lhs,Rhs,ResultType,RowMajor,R
} // end namespace internal
+
+namespace internal {
+
+template<typename Lhs, typename Rhs, typename ResultType>
+static void sparse_sparse_to_dense_product_impl(const Lhs& lhs, const Rhs& rhs, ResultType& res)
+{
+ typedef typename remove_all<Lhs>::type::Scalar Scalar;
+ Index cols = rhs.outerSize();
+ eigen_assert(lhs.outerSize() == rhs.innerSize());
+
+ evaluator<Lhs> lhsEval(lhs);
+ evaluator<Rhs> rhsEval(rhs);
+
+ for (Index j=0; j<cols; ++j)
+ {
+ for (typename evaluator<Rhs>::InnerIterator rhsIt(rhsEval, j); rhsIt; ++rhsIt)
+ {
+ Scalar y = rhsIt.value();
+ Index k = rhsIt.index();
+ for (typename evaluator<Lhs>::InnerIterator lhsIt(lhsEval, k); lhsIt; ++lhsIt)
+ {
+ Index i = lhsIt.index();
+ Scalar x = lhsIt.value();
+ res.coeffRef(i,j) += x * y;
+ }
+ }
+ }
+}
+
+
+} // end namespace internal
+
+namespace internal {
+
+template<typename Lhs, typename Rhs, typename ResultType,
+ int LhsStorageOrder = (traits<Lhs>::Flags&RowMajorBit) ? RowMajor : ColMajor,
+ int RhsStorageOrder = (traits<Rhs>::Flags&RowMajorBit) ? RowMajor : ColMajor>
+struct sparse_sparse_to_dense_product_selector;
+
+template<typename Lhs, typename Rhs, typename ResultType>
+struct sparse_sparse_to_dense_product_selector<Lhs,Rhs,ResultType,ColMajor,ColMajor>
+{
+ static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)
+ {
+ internal::sparse_sparse_to_dense_product_impl<Lhs,Rhs,ResultType>(lhs, rhs, res);
+ }
+};
+
+template<typename Lhs, typename Rhs, typename ResultType>
+struct sparse_sparse_to_dense_product_selector<Lhs,Rhs,ResultType,RowMajor,ColMajor>
+{
+ static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)
+ {
+ typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename ResultType::StorageIndex> ColMajorMatrix;
+ ColMajorMatrix lhsCol(lhs);
+ internal::sparse_sparse_to_dense_product_impl<ColMajorMatrix,Rhs,ResultType>(lhsCol, rhs, res);
+ }
+};
+
+template<typename Lhs, typename Rhs, typename ResultType>
+struct sparse_sparse_to_dense_product_selector<Lhs,Rhs,ResultType,ColMajor,RowMajor>
+{
+ static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)
+ {
+ typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename ResultType::StorageIndex> ColMajorMatrix;
+ ColMajorMatrix rhsCol(rhs);
+ internal::sparse_sparse_to_dense_product_impl<Lhs,ColMajorMatrix,ResultType>(lhs, rhsCol, res);
+ }
+};
+
+template<typename Lhs, typename Rhs, typename ResultType>
+struct sparse_sparse_to_dense_product_selector<Lhs,Rhs,ResultType,RowMajor,RowMajor>
+{
+ static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)
+ {
+ Transpose<ResultType> trRes(res);
+ internal::sparse_sparse_to_dense_product_impl<Rhs,Lhs,Transpose<ResultType> >(rhs, lhs, trRes);
+ }
+};
+
+
+} // end namespace internal
+
} // end namespace Eigen
#endif // EIGEN_CONSERVATIVESPARSESPARSEPRODUCT_H