From c98881e1306c94c53ad3de77f9e9036d98dbcf2a Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Sun, 23 Feb 2014 22:51:13 +0100 Subject: [PATCH] By-pass ProductBase for triangular and selfadjoint products and get rid of ProductBase --- Eigen/src/Core/GeneralProduct.h | 1 - Eigen/src/Core/ProductBase.h | 4 +- Eigen/src/Core/ProductEvaluators.h | 20 +-- Eigen/src/Core/SelfAdjointView.h | 2 + .../Core/products/SelfadjointMatrixMatrix.h | 52 ++++++++ .../Core/products/SelfadjointMatrixVector.h | 104 ++++++++++++++++ .../Core/products/TriangularMatrixMatrix.h | 53 +++++++- .../Core/products/TriangularMatrixVector.h | 117 +++++++++++------- 8 files changed, 299 insertions(+), 54 deletions(-) diff --git a/Eigen/src/Core/GeneralProduct.h b/Eigen/src/Core/GeneralProduct.h index 57d5d3c38..06aa05ee6 100644 --- a/Eigen/src/Core/GeneralProduct.h +++ b/Eigen/src/Core/GeneralProduct.h @@ -688,7 +688,6 @@ template<> struct gemv_selector typedef typename Lhs::Scalar LhsScalar; typedef typename Rhs::Scalar RhsScalar; typedef typename Dest::Scalar ResScalar; - typedef typename Dest::RealScalar RealScalar; typedef internal::blas_traits LhsBlasTraits; typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType; diff --git a/Eigen/src/Core/ProductBase.h b/Eigen/src/Core/ProductBase.h index f7cef9a9e..3b2246fd8 100644 --- a/Eigen/src/Core/ProductBase.h +++ b/Eigen/src/Core/ProductBase.h @@ -11,6 +11,8 @@ #define EIGEN_PRODUCTBASE_H namespace Eigen { + +#ifndef EIGEN_TEST_EVALUATORS /** \class ProductBase * \ingroup Core_Module @@ -174,8 +176,6 @@ class ProductBase : public MatrixBase mutable PlainObject m_result; }; -#ifndef EIGEN_TEST_EVALUATORS - // here we need to overload the nested rule for products // such that the nested type is a const reference to a plain matrix namespace internal { diff --git a/Eigen/src/Core/ProductEvaluators.h b/Eigen/src/Core/ProductEvaluators.h index 5f9cf69a2..186ae4a34 100644 --- a/Eigen/src/Core/ProductEvaluators.h +++ b/Eigen/src/Core/ProductEvaluators.h @@ -532,6 +532,10 @@ struct etor_product_packet_impl /*************************************************************************** * Triangular products ***************************************************************************/ +template +struct triangular_product_impl; template struct generic_product_impl @@ -542,8 +546,8 @@ struct generic_product_impl template static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) { - // TODO bypass TriangularProduct class - TriangularProduct(lhs.nestedExpression(),rhs).scaleAndAddTo(dst, alpha); + triangular_product_impl + ::run(dst, lhs.nestedExpression(), rhs, alpha); } }; @@ -576,8 +580,7 @@ struct generic_product_impl template static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) { - // TODO bypass TriangularProduct class - TriangularProduct(lhs,rhs.nestedExpression()).scaleAndAddTo(dst, alpha); + triangular_product_impl::run(dst, lhs, rhs.nestedExpression(), alpha); } }; @@ -605,6 +608,9 @@ protected: /*************************************************************************** * SelfAdjoint products ***************************************************************************/ +template +struct selfadjoint_product_impl; template struct generic_product_impl @@ -615,8 +621,7 @@ struct generic_product_impl template static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) { - // TODO bypass SelfadjointProductMatrix class - SelfadjointProductMatrix(lhs.nestedExpression(),rhs).scaleAndAddTo(dst, alpha); + selfadjoint_product_impl::run(dst, lhs.nestedExpression(), rhs, alpha); } }; @@ -649,8 +654,7 @@ struct generic_product_impl template static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) { - // TODO bypass SelfadjointProductMatrix class - SelfadjointProductMatrix(lhs,rhs.nestedExpression()).scaleAndAddTo(dst, alpha); + selfadjoint_product_impl::run(dst, lhs, rhs.nestedExpression(), alpha); } }; diff --git a/Eigen/src/Core/SelfAdjointView.h b/Eigen/src/Core/SelfAdjointView.h index 2cc1815fd..f7f512cf4 100644 --- a/Eigen/src/Core/SelfAdjointView.h +++ b/Eigen/src/Core/SelfAdjointView.h @@ -45,9 +45,11 @@ struct traits > : traits }; } +#ifndef EIGEN_TEST_EVALUATORS template struct SelfadjointProductMatrix; +#endif // FIXME could also be called SelfAdjointWrapper to be consistent with DiagonalWrapper ?? template class SelfAdjointView diff --git a/Eigen/src/Core/products/SelfadjointMatrixMatrix.h b/Eigen/src/Core/products/SelfadjointMatrixMatrix.h index 99cf9e0ae..f252aef85 100644 --- a/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +++ b/Eigen/src/Core/products/SelfadjointMatrixMatrix.h @@ -381,6 +381,7 @@ EIGEN_DONT_INLINE void product_selfadjoint_matrix struct traits > @@ -430,6 +431,57 @@ struct SelfadjointProductMatrix ); } }; +#else // EIGEN_TEST_EVALUATORS +namespace internal { + +template +struct selfadjoint_product_impl +{ + typedef typename Product::Scalar Scalar; + typedef typename Product::Index Index; + + typedef internal::blas_traits LhsBlasTraits; + typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType; + typedef internal::blas_traits RhsBlasTraits; + typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType; + + enum { + LhsIsUpper = (LhsMode&(Upper|Lower))==Upper, + LhsIsSelfAdjoint = (LhsMode&SelfAdjoint)==SelfAdjoint, + RhsIsUpper = (RhsMode&(Upper|Lower))==Upper, + RhsIsSelfAdjoint = (RhsMode&SelfAdjoint)==SelfAdjoint + }; + + template + static void run(Dest &dst, const Lhs &a_lhs, const Rhs &a_rhs, const Scalar& alpha) + { + eigen_assert(dst.rows()==a_lhs.rows() && dst.cols()==a_rhs.cols()); + + typename internal::add_const_on_value_type::type lhs = LhsBlasTraits::extract(a_lhs); + typename internal::add_const_on_value_type::type rhs = RhsBlasTraits::extract(a_rhs); + + Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(a_lhs) + * RhsBlasTraits::extractScalarFactor(a_rhs); + + internal::product_selfadjoint_matrix::Flags &RowMajorBit) ? RowMajor : ColMajor, LhsIsSelfAdjoint, + NumTraits::IsComplex && EIGEN_LOGICAL_XOR(LhsIsUpper,bool(LhsBlasTraits::NeedToConjugate)), + EIGEN_LOGICAL_XOR(RhsIsUpper,internal::traits::Flags &RowMajorBit) ? RowMajor : ColMajor, RhsIsSelfAdjoint, + NumTraits::IsComplex && EIGEN_LOGICAL_XOR(RhsIsUpper,bool(RhsBlasTraits::NeedToConjugate)), + internal::traits::Flags&RowMajorBit ? RowMajor : ColMajor> + ::run( + lhs.rows(), rhs.cols(), // sizes + &lhs.coeffRef(0,0), lhs.outerStride(), // lhs info + &rhs.coeffRef(0,0), rhs.outerStride(), // rhs info + &dst.coeffRef(0,0), dst.outerStride(), // result info + actualAlpha // alpha + ); + } +}; + +} // end namespace internal + +#endif } // end namespace Eigen diff --git a/Eigen/src/Core/products/SelfadjointMatrixVector.h b/Eigen/src/Core/products/SelfadjointMatrixVector.h index f698f67f9..ddc07d535 100644 --- a/Eigen/src/Core/products/SelfadjointMatrixVector.h +++ b/Eigen/src/Core/products/SelfadjointMatrixVector.h @@ -168,6 +168,7 @@ EIGEN_DONT_INLINE void selfadjoint_matrix_vector_product struct traits > @@ -276,6 +277,109 @@ struct SelfadjointProductMatrix } }; +#else // EIGEN_TEST_EVALUATORS + +namespace internal { + +template +struct selfadjoint_product_impl +{ + typedef typename Product::Scalar Scalar; + typedef typename Product::Index Index; + + typedef internal::blas_traits LhsBlasTraits; + typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType; + typedef typename internal::remove_all::type ActualLhsTypeCleaned; + + typedef internal::blas_traits RhsBlasTraits; + typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType; + typedef typename internal::remove_all::type ActualRhsTypeCleaned; + + enum { LhsUpLo = LhsMode&(Upper|Lower) }; + + template + static void run(Dest& dest, const Lhs &a_lhs, const Rhs &a_rhs, const Scalar& alpha) + { + typedef typename Dest::Scalar ResScalar; + typedef typename Rhs::Scalar RhsScalar; + typedef Map, Aligned> MappedDest; + + eigen_assert(dest.rows()==a_lhs.rows() && dest.cols()==a_rhs.cols()); + + typename internal::add_const_on_value_type::type lhs = LhsBlasTraits::extract(a_lhs); + typename internal::add_const_on_value_type::type rhs = RhsBlasTraits::extract(a_rhs); + + Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(a_lhs) + * RhsBlasTraits::extractScalarFactor(a_rhs); + + enum { + EvalToDest = (Dest::InnerStrideAtCompileTime==1), + UseRhs = (ActualRhsTypeCleaned::InnerStrideAtCompileTime==1) + }; + + internal::gemv_static_vector_if static_dest; + internal::gemv_static_vector_if static_rhs; + + ei_declare_aligned_stack_constructed_variable(ResScalar,actualDestPtr,dest.size(), + EvalToDest ? dest.data() : static_dest.data()); + + ei_declare_aligned_stack_constructed_variable(RhsScalar,actualRhsPtr,rhs.size(), + UseRhs ? const_cast(rhs.data()) : static_rhs.data()); + + if(!EvalToDest) + { + #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN + int size = dest.size(); + EIGEN_DENSE_STORAGE_CTOR_PLUGIN + #endif + MappedDest(actualDestPtr, dest.size()) = dest; + } + + if(!UseRhs) + { + #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN + int size = rhs.size(); + EIGEN_DENSE_STORAGE_CTOR_PLUGIN + #endif + Map(actualRhsPtr, rhs.size()) = rhs; + } + + + internal::selfadjoint_matrix_vector_product::Flags&RowMajorBit) ? RowMajor : ColMajor, + int(LhsUpLo), bool(LhsBlasTraits::NeedToConjugate), bool(RhsBlasTraits::NeedToConjugate)>::run + ( + lhs.rows(), // size + &lhs.coeffRef(0,0), lhs.outerStride(), // lhs info + actualRhsPtr, 1, // rhs info + actualDestPtr, // result info + actualAlpha // scale factor + ); + + if(!EvalToDest) + dest = MappedDest(actualDestPtr, dest.size()); + } +}; + +template +struct selfadjoint_product_impl +{ + typedef typename Product::Scalar Scalar; + enum { RhsUpLo = RhsMode&(Upper|Lower) }; + + template + static void run(Dest& dest, const Lhs &a_lhs, const Rhs &a_rhs, const Scalar& alpha) + { + // let's simply transpose the product + Transpose destT(dest); + selfadjoint_product_impl, int(RhsUpLo)==Upper ? Lower : Upper, false, + Transpose, 0, true>::run(destT, a_rhs.transpose(), a_lhs.transpose(), alpha); + } +}; + +} // end namespace internal + +#endif // EIGEN_TEST_EVALUATORS + } // end namespace Eigen #endif // EIGEN_SELFADJOINT_MATRIX_VECTOR_H diff --git a/Eigen/src/Core/products/TriangularMatrixMatrix.h b/Eigen/src/Core/products/TriangularMatrixMatrix.h index 8110507b5..e654b45b1 100644 --- a/Eigen/src/Core/products/TriangularMatrixMatrix.h +++ b/Eigen/src/Core/products/TriangularMatrixMatrix.h @@ -372,7 +372,6 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix struct traits > : traits, Lhs, Rhs> > @@ -380,6 +379,7 @@ struct traits > } // end namespace internal +#ifndef EIGEN_TEST_EVALUATORS template struct TriangularProduct : public ProductBase, Lhs, Rhs > @@ -421,6 +421,57 @@ struct TriangularProduct ); } }; +#else // EIGEN_TEST_EVALUATORS +namespace internal { +template +struct triangular_product_impl +{ + template static void run(Dest& dst, const Lhs &a_lhs, const Rhs &a_rhs, const typename Dest::Scalar& alpha) + { + typedef typename Dest::Index Index; + typedef typename Dest::Scalar Scalar; + + typedef internal::blas_traits LhsBlasTraits; + typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType; + typedef typename internal::remove_all::type ActualLhsTypeCleaned; + typedef internal::blas_traits RhsBlasTraits; + typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType; + typedef typename internal::remove_all::type ActualRhsTypeCleaned; + + typename internal::add_const_on_value_type::type lhs = LhsBlasTraits::extract(a_lhs); + typename internal::add_const_on_value_type::type rhs = RhsBlasTraits::extract(a_rhs); + + Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(a_lhs) + * RhsBlasTraits::extractScalarFactor(a_rhs); + + typedef internal::gemm_blocking_space<(Dest::Flags&RowMajorBit) ? RowMajor : ColMajor,Scalar,Scalar, + Lhs::MaxRowsAtCompileTime, Rhs::MaxColsAtCompileTime, Lhs::MaxColsAtCompileTime,4> BlockingType; + + enum { IsLower = (Mode&Lower) == Lower }; + Index stripedRows = ((!LhsIsTriangular) || (IsLower)) ? lhs.rows() : (std::min)(lhs.rows(),lhs.cols()); + Index stripedCols = ((LhsIsTriangular) || (!IsLower)) ? rhs.cols() : (std::min)(rhs.cols(),rhs.rows()); + Index stripedDepth = LhsIsTriangular ? ((!IsLower) ? lhs.cols() : (std::min)(lhs.cols(),lhs.rows())) + : ((IsLower) ? rhs.rows() : (std::min)(rhs.rows(),rhs.cols())); + + BlockingType blocking(stripedRows, stripedCols, stripedDepth); + + internal::product_triangular_matrix_matrix::Flags&RowMajorBit) ? RowMajor : ColMajor, LhsBlasTraits::NeedToConjugate, + (internal::traits::Flags&RowMajorBit) ? RowMajor : ColMajor, RhsBlasTraits::NeedToConjugate, + (internal::traits::Flags&RowMajorBit) ? RowMajor : ColMajor> + ::run( + stripedRows, stripedCols, stripedDepth, // sizes + &lhs.coeffRef(0,0), lhs.outerStride(), // lhs info + &rhs.coeffRef(0,0), rhs.outerStride(), // rhs info + &dst.coeffRef(0,0), dst.outerStride(), // result info + actualAlpha, blocking + ); + } +}; + +} // end namespace internal +#endif // EIGEN_TEST_EVALUATORS } // end namespace Eigen diff --git a/Eigen/src/Core/products/TriangularMatrixVector.h b/Eigen/src/Core/products/TriangularMatrixVector.h index 6117d5a82..eed7f4258 100644 --- a/Eigen/src/Core/products/TriangularMatrixVector.h +++ b/Eigen/src/Core/products/TriangularMatrixVector.h @@ -168,11 +168,12 @@ struct traits > {}; -template +template struct trmv_selector; } // end namespace internal +#ifndef EIGEN_TEST_EVALUATORS template struct TriangularProduct : public ProductBase, Lhs, Rhs > @@ -185,7 +186,7 @@ struct TriangularProduct { eigen_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols()); - internal::trmv_selector<(int(internal::traits::Flags)&RowMajorBit) ? RowMajor : ColMajor>::run(*this, dst, alpha); + internal::trmv_selector::Flags)&RowMajorBit) ? RowMajor : ColMajor>::run(m_lhs, m_rhs, dst, alpha); } }; @@ -201,39 +202,71 @@ struct TriangularProduct { eigen_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols()); - typedef TriangularProduct<(Mode & (UnitDiag|ZeroDiag)) | ((Mode & Lower) ? Upper : Lower),true,Transpose,false,Transpose,true> TriangularProductTranspose; Transpose dstT(dst); - internal::trmv_selector<(int(internal::traits::Flags)&RowMajorBit) ? ColMajor : RowMajor>::run( - TriangularProductTranspose(m_rhs.transpose(),m_lhs.transpose()), dstT, alpha); + internal::trmv_selector<(Mode & (UnitDiag|ZeroDiag)) | ((Mode & Lower) ? Upper : Lower), + (int(internal::traits::Flags)&RowMajorBit) ? ColMajor : RowMajor> + ::run(m_rhs.transpose(),m_lhs.transpose(), dstT, alpha); } }; +#else // EIGEN_TEST_EVALUATORS +namespace internal { + +template +struct triangular_product_impl +{ + template static void run(Dest& dst, const Lhs &lhs, const Rhs &rhs, const typename Dest::Scalar& alpha) + { + eigen_assert(dst.rows()==lhs.rows() && dst.cols()==rhs.cols()); + + internal::trmv_selector::Flags)&RowMajorBit) ? RowMajor : ColMajor>::run(lhs, rhs, dst, alpha); + } +}; + +template +struct triangular_product_impl +{ + template static void run(Dest& dst, const Lhs &lhs, const Rhs &rhs, const typename Dest::Scalar& alpha) + { + eigen_assert(dst.rows()==lhs.rows() && dst.cols()==rhs.cols()); + + Transpose dstT(dst); + internal::trmv_selector<(Mode & (UnitDiag|ZeroDiag)) | ((Mode & Lower) ? Upper : Lower), + (int(internal::traits::Flags)&RowMajorBit) ? ColMajor : RowMajor> + ::run(rhs.transpose(),lhs.transpose(), dstT, alpha); + } +}; + +} // end namespace internal +#endif // EIGEN_TEST_EVALUATORS + namespace internal { // TODO: find a way to factorize this piece of code with gemv_selector since the logic is exactly the same. -template<> struct trmv_selector +template struct trmv_selector { - template - static void run(const TriangularProduct& prod, Dest& dest, const typename TriangularProduct::Scalar& alpha) + template + static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha) { - typedef TriangularProduct ProductType; - typedef typename ProductType::Index Index; - typedef typename ProductType::LhsScalar LhsScalar; - typedef typename ProductType::RhsScalar RhsScalar; - typedef typename ProductType::Scalar ResScalar; - typedef typename ProductType::RealScalar RealScalar; - typedef typename ProductType::ActualLhsType ActualLhsType; - typedef typename ProductType::ActualRhsType ActualRhsType; - typedef typename ProductType::LhsBlasTraits LhsBlasTraits; - typedef typename ProductType::RhsBlasTraits RhsBlasTraits; + typedef typename Dest::Index Index; + typedef typename Lhs::Scalar LhsScalar; + typedef typename Rhs::Scalar RhsScalar; + typedef typename Dest::Scalar ResScalar; + typedef typename Dest::RealScalar RealScalar; + + typedef internal::blas_traits LhsBlasTraits; + typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType; + typedef internal::blas_traits RhsBlasTraits; + typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType; + typedef Map, Aligned> MappedDest; - typename internal::add_const_on_value_type::type actualLhs = LhsBlasTraits::extract(prod.lhs()); - typename internal::add_const_on_value_type::type actualRhs = RhsBlasTraits::extract(prod.rhs()); + typename internal::add_const_on_value_type::type actualLhs = LhsBlasTraits::extract(lhs); + typename internal::add_const_on_value_type::type actualRhs = RhsBlasTraits::extract(rhs); - ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs()) - * RhsBlasTraits::extractScalarFactor(prod.rhs()); + ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(lhs) + * RhsBlasTraits::extractScalarFactor(rhs); enum { // FIXME find a way to allow an inner stride on the result if packet_traits::size==1 @@ -288,33 +321,33 @@ template<> struct trmv_selector } }; -template<> struct trmv_selector +template struct trmv_selector { - template - static void run(const TriangularProduct& prod, Dest& dest, const typename TriangularProduct::Scalar& alpha) + template + static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha) { - typedef TriangularProduct ProductType; - typedef typename ProductType::LhsScalar LhsScalar; - typedef typename ProductType::RhsScalar RhsScalar; - typedef typename ProductType::Scalar ResScalar; - typedef typename ProductType::Index Index; - typedef typename ProductType::ActualLhsType ActualLhsType; - typedef typename ProductType::ActualRhsType ActualRhsType; - typedef typename ProductType::_ActualRhsType _ActualRhsType; - typedef typename ProductType::LhsBlasTraits LhsBlasTraits; - typedef typename ProductType::RhsBlasTraits RhsBlasTraits; + typedef typename Dest::Index Index; + typedef typename Lhs::Scalar LhsScalar; + typedef typename Rhs::Scalar RhsScalar; + typedef typename Dest::Scalar ResScalar; + + typedef internal::blas_traits LhsBlasTraits; + typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType; + typedef internal::blas_traits RhsBlasTraits; + typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType; + typedef typename internal::remove_all::type ActualRhsTypeCleaned; - typename add_const::type actualLhs = LhsBlasTraits::extract(prod.lhs()); - typename add_const::type actualRhs = RhsBlasTraits::extract(prod.rhs()); + typename add_const::type actualLhs = LhsBlasTraits::extract(lhs); + typename add_const::type actualRhs = RhsBlasTraits::extract(rhs); - ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs()) - * RhsBlasTraits::extractScalarFactor(prod.rhs()); + ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(lhs) + * RhsBlasTraits::extractScalarFactor(rhs); enum { - DirectlyUseRhs = _ActualRhsType::InnerStrideAtCompileTime==1 + DirectlyUseRhs = ActualRhsTypeCleaned::InnerStrideAtCompileTime==1 }; - gemv_static_vector_if static_rhs; + gemv_static_vector_if static_rhs; ei_declare_aligned_stack_constructed_variable(RhsScalar,actualRhsPtr,actualRhs.size(), DirectlyUseRhs ? const_cast(actualRhs.data()) : static_rhs.data()); @@ -325,7 +358,7 @@ template<> struct trmv_selector int size = actualRhs.size(); EIGEN_DENSE_STORAGE_CTOR_PLUGIN #endif - Map(actualRhsPtr, actualRhs.size()) = actualRhs; + Map(actualRhsPtr, actualRhs.size()) = actualRhs; } internal::triangular_matrix_vector_product