// This file is part of Eigen, a lightweight C++ template library // for linear algebra. // // Copyright (C) 2008-2015 Gael Guennebaud // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. #ifndef EIGEN_CONSERVATIVESPARSESPARSEPRODUCT_H #define EIGEN_CONSERVATIVESPARSESPARSEPRODUCT_H // IWYU pragma: private #include "./InternalHeaderCheck.h" namespace Eigen { namespace internal { template static void conservative_sparse_sparse_product_impl(const Lhs& lhs, const Rhs& rhs, ResultType& res, bool sortedInsertion = false) { typedef typename remove_all_t::Scalar LhsScalar; typedef typename remove_all_t::Scalar RhsScalar; typedef typename remove_all_t::Scalar ResScalar; // make sure to call innerSize/outerSize since we fake the storage order. Index rows = lhs.innerSize(); Index cols = rhs.outerSize(); eigen_assert(lhs.outerSize() == rhs.innerSize()); ei_declare_aligned_stack_constructed_variable(bool, mask, rows, 0); ei_declare_aligned_stack_constructed_variable(ResScalar, values, rows, 0); ei_declare_aligned_stack_constructed_variable(Index, indices, rows, 0); std::memset(mask, 0, sizeof(bool) * rows); evaluator lhsEval(lhs); evaluator rhsEval(rhs); // estimate the number of non zero entries // given a rhs column containing Y non zeros, we assume that the respective Y columns // of the lhs differs in average of one non zeros, thus the number of non zeros for // the product of a rhs column with the lhs is X+Y where X is the average number of non zero // per column of the lhs. // Therefore, we have nnz(lhs*rhs) = nnz(lhs) + nnz(rhs) Index estimated_nnz_prod = lhsEval.nonZerosEstimate() + rhsEval.nonZerosEstimate(); res.setZero(); res.reserve(Index(estimated_nnz_prod)); // we compute each column of the result, one after the other for (Index j = 0; j < cols; ++j) { res.startVec(j); Index nnz = 0; for (typename evaluator::InnerIterator rhsIt(rhsEval, j); rhsIt; ++rhsIt) { RhsScalar y = rhsIt.value(); Index k = rhsIt.index(); for (typename evaluator::InnerIterator lhsIt(lhsEval, k); lhsIt; ++lhsIt) { Index i = lhsIt.index(); LhsScalar x = lhsIt.value(); if (!mask[i]) { mask[i] = true; values[i] = x * y; indices[nnz] = i; ++nnz; } else values[i] += x * y; } } if (!sortedInsertion) { // unordered insertion for (Index k = 0; k < nnz; ++k) { Index i = indices[k]; res.insertBackByOuterInnerUnordered(j, i) = values[i]; mask[i] = false; } } else { // alternative ordered insertion code: const Index t200 = rows / 11; // 11 == (log2(200)*1.39) const Index t = (rows * 100) / 139; // FIXME reserve nnz non zeros // FIXME implement faster sorting algorithms for very small nnz // if the result is sparse enough => use a quick sort // otherwise => loop through the entire vector // In order to avoid to perform an expensive log2 when the // result is clearly very sparse we use a linear bound up to 200. if ((nnz < 200 && nnz < t200) || nnz * numext::log2(int(nnz)) < t) { if (nnz > 1) std::sort(indices, indices + nnz); for (Index k = 0; k < nnz; ++k) { Index i = indices[k]; res.insertBackByOuterInner(j, i) = values[i]; mask[i] = false; } } else { // dense path for (Index i = 0; i < rows; ++i) { if (mask[i]) { mask[i] = false; res.insertBackByOuterInner(j, i) = values[i]; } } } } } res.finalize(); } } // end namespace internal namespace internal { // Helper template to generate new sparse matrix types template using WithStorageOrder = SparseMatrix; template ::Flags & RowMajorBit) ? RowMajor : ColMajor, int RhsStorageOrder = (traits::Flags & RowMajorBit) ? RowMajor : ColMajor, int ResStorageOrder = (traits::Flags & RowMajorBit) ? RowMajor : ColMajor> struct conservative_sparse_sparse_product_selector; template struct conservative_sparse_sparse_product_selector { typedef remove_all_t LhsCleaned; typedef typename LhsCleaned::Scalar Scalar; static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res) { using RowMajorMatrix = WithStorageOrder; using ColMajorMatrixAux = WithStorageOrder; // If the result is tall and thin (in the extreme case a column vector) // then it is faster to sort the coefficients inplace instead of transposing twice. // FIXME, the following heuristic is probably not very good. if (lhs.rows() > rhs.cols()) { using ColMajorMatrix = typename sparse_eval::type; ColMajorMatrix resCol(lhs.rows(), rhs.cols()); // perform sorted insertion internal::conservative_sparse_sparse_product_impl(lhs, rhs, resCol, true); res = resCol.markAsRValue(); } else { ColMajorMatrixAux resCol(lhs.rows(), rhs.cols()); // resort to transpose to sort the entries internal::conservative_sparse_sparse_product_impl(lhs, rhs, resCol, false); RowMajorMatrix resRow(resCol); res = resRow.markAsRValue(); } } }; template struct conservative_sparse_sparse_product_selector { static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res) { using RowMajorRhs = WithStorageOrder; using RowMajorRes = WithStorageOrder; RowMajorRhs rhsRow = rhs; RowMajorRes resRow(lhs.rows(), rhs.cols()); internal::conservative_sparse_sparse_product_impl(rhsRow, lhs, resRow); res = resRow; } }; template struct conservative_sparse_sparse_product_selector { static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res) { using RowMajorLhs = WithStorageOrder; using RowMajorRes = WithStorageOrder; RowMajorLhs lhsRow = lhs; RowMajorRes resRow(lhs.rows(), rhs.cols()); internal::conservative_sparse_sparse_product_impl(rhs, lhsRow, resRow); res = resRow; } }; template struct conservative_sparse_sparse_product_selector { static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res) { using RowMajorRes = WithStorageOrder; RowMajorRes resRow(lhs.rows(), rhs.cols()); internal::conservative_sparse_sparse_product_impl(rhs, lhs, resRow); res = resRow; } }; template struct conservative_sparse_sparse_product_selector { typedef typename traits>::Scalar Scalar; static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res) { using ColMajorRes = WithStorageOrder; ColMajorRes resCol(lhs.rows(), rhs.cols()); internal::conservative_sparse_sparse_product_impl(lhs, rhs, resCol); res = resCol; } }; template struct conservative_sparse_sparse_product_selector { static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res) { using ColMajorLhs = WithStorageOrder; using ColMajorRes = WithStorageOrder; ColMajorLhs lhsCol = lhs; ColMajorRes resCol(lhs.rows(), rhs.cols()); internal::conservative_sparse_sparse_product_impl(lhsCol, rhs, resCol); res = resCol; } }; template struct conservative_sparse_sparse_product_selector { static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res) { using ColMajorRhs = WithStorageOrder; using ColMajorRes = WithStorageOrder; ColMajorRhs rhsCol = rhs; ColMajorRes resCol(lhs.rows(), rhs.cols()); internal::conservative_sparse_sparse_product_impl(lhs, rhsCol, resCol); res = resCol; } }; template struct conservative_sparse_sparse_product_selector { static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res) { using ColMajorRes = WithStorageOrder; using RowMajorRes = WithStorageOrder; RowMajorRes resRow(lhs.rows(), rhs.cols()); internal::conservative_sparse_sparse_product_impl(rhs, lhs, resRow); // sort the non zeros: ColMajorRes resCol(resRow); res = resCol; } }; } // end namespace internal namespace internal { template static void sparse_sparse_to_dense_product_impl(const Lhs& lhs, const Rhs& rhs, ResultType& res) { typedef typename remove_all_t::Scalar LhsScalar; typedef typename remove_all_t::Scalar RhsScalar; Index cols = rhs.outerSize(); eigen_assert(lhs.outerSize() == rhs.innerSize()); evaluator lhsEval(lhs); evaluator rhsEval(rhs); for (Index j = 0; j < cols; ++j) { for (typename evaluator::InnerIterator rhsIt(rhsEval, j); rhsIt; ++rhsIt) { RhsScalar y = rhsIt.value(); Index k = rhsIt.index(); for (typename evaluator::InnerIterator lhsIt(lhsEval, k); lhsIt; ++lhsIt) { Index i = lhsIt.index(); LhsScalar x = lhsIt.value(); res.coeffRef(i, j) += x * y; } } } } } // end namespace internal namespace internal { template ::Flags & RowMajorBit) ? RowMajor : ColMajor, int RhsStorageOrder = (traits::Flags & RowMajorBit) ? RowMajor : ColMajor> struct sparse_sparse_to_dense_product_selector; template struct sparse_sparse_to_dense_product_selector { static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res) { internal::sparse_sparse_to_dense_product_impl(lhs, rhs, res); } }; template struct sparse_sparse_to_dense_product_selector { static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res) { using ColMajorLhs = WithStorageOrder; ColMajorLhs lhsCol(lhs); internal::sparse_sparse_to_dense_product_impl(lhsCol, rhs, res); } }; template struct sparse_sparse_to_dense_product_selector { static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res) { using ColMajorRhs = WithStorageOrder; ColMajorRhs rhsCol(rhs); internal::sparse_sparse_to_dense_product_impl(lhs, rhsCol, res); } }; template struct sparse_sparse_to_dense_product_selector { static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res) { Transpose trRes(res); internal::sparse_sparse_to_dense_product_impl>(rhs, lhs, trRes); } }; } // end namespace internal } // end namespace Eigen #endif // EIGEN_CONSERVATIVESPARSESPARSEPRODUCT_H