From 2577ef90c027d8bad4dd632022fa65cec6313159 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Wed, 10 Nov 2010 18:58:55 +0100 Subject: [PATCH] generalize our internal rank K update routine to support more general A*B product while evaluating only one triangular part and make it available via, e.g.: R.triangularView() += s * A * B; --- Eigen/Core | 1 + Eigen/src/Core/TriangularMatrix.h | 50 +++- .../products/GeneralMatrixMatrixTriangular.h | 225 ++++++++++++++++++ Eigen/src/Core/products/SelfadjointProduct.h | 176 +------------- test/CMakeLists.txt | 1 + test/product_mmtr.cpp | 80 +++++++ 6 files changed, 360 insertions(+), 173 deletions(-) create mode 100644 Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h create mode 100644 test/product_mmtr.cpp diff --git a/Eigen/Core b/Eigen/Core index 5e81d10d1..48a5cb2c1 100644 --- a/Eigen/Core +++ b/Eigen/Core @@ -308,6 +308,7 @@ using std::size_t; #include "src/Core/products/GeneralBlockPanelKernel.h" #include "src/Core/products/GeneralMatrixVector.h" #include "src/Core/products/GeneralMatrixMatrix.h" +#include "src/Core/products/GeneralMatrixMatrixTriangular.h" #include "src/Core/products/SelfadjointMatrixVector.h" #include "src/Core/products/SelfadjointMatrixMatrix.h" #include "src/Core/products/SelfadjointProduct.h" diff --git a/Eigen/src/Core/TriangularMatrix.h b/Eigen/src/Core/TriangularMatrix.h index e361af643..ba0f2741e 100644 --- a/Eigen/src/Core/TriangularMatrix.h +++ b/Eigen/src/Core/TriangularMatrix.h @@ -189,9 +189,9 @@ template class TriangularView inline Index innerStride() const { return m_matrix.innerStride(); } /** \sa MatrixBase::operator+=() */ - template TriangularView& operator+=(const Other& other) { return *this = m_matrix + other; } + template TriangularView& operator+=(const DenseBase& other) { return *this = m_matrix + other.derived(); } /** \sa MatrixBase::operator-=() */ - template TriangularView& operator-=(const Other& other) { return *this = m_matrix - other; } + template TriangularView& operator-=(const DenseBase& other) { return *this = m_matrix - other.derived(); } /** \sa MatrixBase::operator*=() */ TriangularView& operator*=(const typename internal::traits::Scalar& other) { return *this = m_matrix * other; } /** \sa MatrixBase::operator/=() */ @@ -292,7 +292,6 @@ template class TriangularView (lhs.derived(),rhs.m_matrix); } - template typename internal::plain_matrix_type_column_major::type solve(const MatrixBase& other) const; @@ -341,8 +340,51 @@ template class TriangularView else return m_matrix.diagonal().prod(); } - + + // TODO simplify the following: + template + EIGEN_STRONG_INLINE TriangularView& operator=(const ProductBase& other) + { + setZero(); + return assignProduct(other,1); + } + + template + EIGEN_STRONG_INLINE TriangularView& operator+=(const ProductBase& other) + { + return assignProduct(other,1); + } + + template + EIGEN_STRONG_INLINE TriangularView& operator-=(const ProductBase& other) + { + return assignProduct(other,-1); + } + + + template + EIGEN_STRONG_INLINE TriangularView& operator=(const ScaledProduct& other) + { + setZero(); + return assignProduct(other,other.alpha()); + } + + template + EIGEN_STRONG_INLINE TriangularView& operator+=(const ScaledProduct& other) + { + return assignProduct(other,other.alpha()); + } + + template + EIGEN_STRONG_INLINE TriangularView& operator-=(const ScaledProduct& other) + { + return assignProduct(other,-other.alpha()); + } + protected: + + template + EIGEN_STRONG_INLINE TriangularView& assignProduct(const ProductBase& prod, const Scalar& alpha); const MatrixTypeNested m_matrix; }; diff --git a/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h b/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h new file mode 100644 index 000000000..c29ea90fd --- /dev/null +++ b/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h @@ -0,0 +1,225 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009-2010 Gael Guennebaud +// +// Eigen is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 3 of the License, or (at your option) any later version. +// +// Alternatively, you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 2 of +// the License, or (at your option) any later version. +// +// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License and a copy of the GNU General Public License along with +// Eigen. If not, see . + +#ifndef EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_H +#define EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_H + +namespace internal { + +/********************************************************************** +* This file implements a general A * B product while +* evaluating only one triangular part of the product. +* This is more general version of self adjoint product (C += A A^T) +* as the level 3 SYRK Blas routine. +**********************************************************************/ + +// forward declarations (defined at the end of this file) +template +struct tribb_kernel; + +/* Optimized matrix-matrix product evaluating only one triangular half */ +template +struct general_matrix_matrix_triangular_product; + +// as usual if the result is row major => we transpose the product +template +struct general_matrix_matrix_triangular_product +{ + typedef typename scalar_product_traits::ReturnType ResScalar; + static EIGEN_STRONG_INLINE void run(Index size, Index depth,const LhsScalar* lhs, Index lhsStride, + const RhsScalar* rhs, Index rhsStride, ResScalar* res, Index resStride, ResScalar alpha) + { + general_matrix_matrix_triangular_product + ::run(size,depth,rhs,rhsStride,lhs,lhsStride,res,resStride,alpha); + } +}; + +template +struct general_matrix_matrix_triangular_product +{ + typedef typename scalar_product_traits::ReturnType ResScalar; + static EIGEN_STRONG_INLINE void run(Index size, Index depth,const LhsScalar* _lhs, Index lhsStride, + const RhsScalar* _rhs, Index rhsStride, ResScalar* res, Index resStride, ResScalar alpha) + { + const_blas_data_mapper lhs(_lhs,lhsStride); + const_blas_data_mapper rhs(_rhs,rhsStride); + + typedef gebp_traits Traits; + + Index kc = depth; // cache block size along the K direction + Index mc = size; // cache block size along the M direction + Index nc = size; // cache block size along the N direction + computeProductBlockingSizes(kc, mc, nc); + // !!! mc must be a multiple of nr: + if(mc > Traits::nr) + mc = (mc/Traits::nr)*Traits::nr; + + LhsScalar* blockA = ei_aligned_stack_new(LhsScalar, kc*mc); + std::size_t sizeW = kc*Traits::WorkSpaceFactor; + std::size_t sizeB = sizeW + kc*size; + RhsScalar* allocatedBlockB = ei_aligned_stack_new(RhsScalar, sizeB); + RhsScalar* blockB = allocatedBlockB + sizeW; + + gemm_pack_lhs pack_lhs; + gemm_pack_rhs pack_rhs; + gebp_kernel gebp; + tribb_kernel sybb; + + for(Index k2=0; k2 processed with gebp or skipped + // 2 - the actual_mc x actual_mc symmetric block => processed with a special kernel + // 3 - after the diagonal => processed with gebp or skipped + if (UpLo==Lower) + gebp(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, std::min(size,i2), alpha, + -1, -1, 0, 0, allocatedBlockB); + + sybb(res+resStride*i2 + i2, resStride, blockA, blockB + actual_kc*i2, actual_mc, actual_kc, alpha, allocatedBlockB); + + if (UpLo==Upper) + { + Index j2 = i2+actual_mc; + gebp(res+resStride*j2+i2, resStride, blockA, blockB+actual_kc*j2, actual_mc, actual_kc, std::max(Index(0), size-j2), alpha, + -1, -1, 0, 0, allocatedBlockB); + } + } + } + ei_aligned_stack_delete(LhsScalar, blockA, kc*mc); + ei_aligned_stack_delete(RhsScalar, allocatedBlockB, sizeB); + } +}; + +// Optimized packed Block * packed Block product kernel evaluating only one given triangular part +// This kernel is built on top of the gebp kernel: +// - the current destination block is processed per panel of actual_mc x BlockSize +// where BlockSize is set to the minimal value allowing gebp to be as fast as possible +// - then, as usual, each panel is split into three parts along the diagonal, +// the sub blocks above and below the diagonal are processed as usual, +// while the triangular block overlapping the diagonal is evaluated into a +// small temporary buffer which is then accumulated into the result using a +// triangular traversal. +template +struct tribb_kernel +{ + typedef gebp_traits Traits; + typedef typename Traits::ResScalar ResScalar; + + enum { + BlockSize = EIGEN_PLAIN_ENUM_MAX(mr,nr) + }; + void operator()(ResScalar* res, Index resStride, const LhsScalar* blockA, const RhsScalar* blockB, Index size, Index depth, ResScalar alpha, RhsScalar* workspace) + { + gebp_kernel gebp_kernel; + Matrix buffer; + + // let's process the block per panel of actual_mc x BlockSize, + // again, each is split into three parts, etc. + for (Index j=0; j(BlockSize,size - j); + const RhsScalar* actual_b = blockB+j*depth; + + if(UpLo==Upper) + gebp_kernel(res+j*resStride, resStride, blockA, actual_b, j, depth, actualBlockSize, alpha, + -1, -1, 0, 0, workspace); + + // selfadjoint micro block + { + Index i = j; + buffer.setZero(); + // 1 - apply the kernel on the temporary buffer + gebp_kernel(buffer.data(), BlockSize, blockA+depth*i, actual_b, actualBlockSize, depth, actualBlockSize, alpha, + -1, -1, 0, 0, workspace); + // 2 - triangular accumulation + for(Index j1=0; j1 +template +TriangularView& TriangularView::assignProduct(const ProductBase& prod, const Scalar& alpha) +{ + typedef internal::blas_traits LhsBlasTraits; + typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhs; + typedef typename internal::remove_all::type _ActualLhs; + const ActualLhs actualLhs = LhsBlasTraits::extract(prod.lhs()); + + typedef internal::blas_traits RhsBlasTraits; + typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhs; + typedef typename internal::remove_all::type _ActualRhs; + const ActualRhs actualRhs = RhsBlasTraits::extract(prod.rhs()); + + typename ProductDerived::Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs().derived()) * RhsBlasTraits::extractScalarFactor(prod.rhs().derived()); + + internal::general_matrix_matrix_triangular_product + ::run(m_matrix.cols(), actualLhs.cols(), + &actualLhs.coeff(0,0), actualLhs.outerStride(), &actualRhs.coeff(0,0), actualRhs.outerStride(), + const_cast(m_matrix.data()), m_matrix.outerStride(), actualAlpha); + + return *this; +} + +#endif // EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_H diff --git a/Eigen/src/Core/products/SelfadjointProduct.h b/Eigen/src/Core/products/SelfadjointProduct.h index 16320fa17..c6a5287b5 100644 --- a/Eigen/src/Core/products/SelfadjointProduct.h +++ b/Eigen/src/Core/products/SelfadjointProduct.h @@ -25,175 +25,12 @@ #ifndef EIGEN_SELFADJOINT_PRODUCT_H #define EIGEN_SELFADJOINT_PRODUCT_H -namespace internal { - /********************************************************************** * This file implements a self adjoint product: C += A A^T updating only * half of the selfadjoint matrix C. * It corresponds to the level 3 SYRK Blas routine. **********************************************************************/ -// forward declarations (defined at the end of this file) -template -struct sybb_kernel; - -/* Optimized selfadjoint product (_SYRK) */ -template -struct selfadjoint_product; - -// as usual if the result is row major => we transpose the product -template -struct selfadjoint_product -{ - static EIGEN_STRONG_INLINE void run(Index size, Index depth, const Scalar* mat, Index matStride, Scalar* res, Index resStride, Scalar alpha) - { - selfadjoint_product - ::run(size, depth, mat, matStride, res, resStride, alpha); - } -}; - -template -struct selfadjoint_product -{ - - static EIGEN_DONT_INLINE void run( - Index size, Index depth, - const Scalar* _mat, Index matStride, - Scalar* res, Index resStride, - Scalar alpha) - { - const_blas_data_mapper mat(_mat,matStride); - -// if(AAT) -// alpha = conj(alpha); - - typedef gebp_traits Traits; - - Index kc = depth; // cache block size along the K direction - Index mc = size; // cache block size along the M direction - Index nc = size; // cache block size along the N direction - computeProductBlockingSizes(kc, mc, nc); - // !!! mc must be a multiple of nr: - if(mc>Traits::nr) - mc = (mc/Traits::nr)*Traits::nr; - - Scalar* blockA = ei_aligned_stack_new(Scalar, kc*mc); - std::size_t sizeW = kc*Traits::WorkSpaceFactor; - std::size_t sizeB = sizeW + kc*size; - Scalar* allocatedBlockB = ei_aligned_stack_new(Scalar, sizeB); - Scalar* blockB = allocatedBlockB + sizeW; - - // note that the actual rhs is the transpose/adjoint of mat - enum { - ConjLhs = NumTraits::IsComplex && !AAT, - ConjRhs = NumTraits::IsComplex && AAT - }; - - gebp_kernel gebp_kernel; - gemm_pack_rhs pack_rhs; - gemm_pack_lhs pack_lhs; - sybb_kernel sybb; - - for(Index k2=0; k2 processed with gebp or skipped - // 2 - the actual_mc x actual_mc symmetric block => processed with a special kernel - // 3 - after the diagonal => processed with gebp or skipped - if (UpLo==Lower) - gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, std::min(size,i2), alpha, - -1, -1, 0, 0, allocatedBlockB); - - sybb(res+resStride*i2 + i2, resStride, blockA, blockB + actual_kc*i2, actual_mc, actual_kc, alpha, allocatedBlockB); - - if (UpLo==Upper) - { - Index j2 = i2+actual_mc; - gebp_kernel(res+resStride*j2+i2, resStride, blockA, blockB+actual_kc*j2, actual_mc, actual_kc, std::max(Index(0), size-j2), alpha, - -1, -1, 0, 0, allocatedBlockB); - } - } - } - ei_aligned_stack_delete(Scalar, blockA, kc*mc); - ei_aligned_stack_delete(Scalar, allocatedBlockB, sizeB); - } -}; - -// Optimized SYmmetric packed Block * packed Block product kernel. -// This kernel is built on top of the gebp kernel: -// - the current selfadjoint block (res) is processed per panel of actual_mc x BlockSize -// where BlockSize is set to the minimal value allowing gebp to be as fast as possible -// - then, as usual, each panel is split into three parts along the diagonal, -// the sub blocks above and below the diagonal are processed as usual, -// while the selfadjoint block overlapping the diagonal is evaluated into a -// small temporary buffer which is then accumulated into the result using a -// triangular traversal. -template -struct sybb_kernel -{ - enum { - PacketSize = packet_traits::size, - BlockSize = EIGEN_PLAIN_ENUM_MAX(mr,nr) - }; - void operator()(Scalar* res, Index resStride, const Scalar* blockA, const Scalar* blockB, Index size, Index depth, Scalar alpha, Scalar* workspace) - { - gebp_kernel gebp_kernel; - Matrix buffer; - - // let's process the block per panel of actual_mc x BlockSize, - // again, each is split into three parts, etc. - for (Index j=0; j(BlockSize,size - j); - const Scalar* actual_b = blockB+j*depth; - - if(UpLo==Upper) - gebp_kernel(res+j*resStride, resStride, blockA, actual_b, j, depth, actualBlockSize, alpha, - -1, -1, 0, 0, workspace); - - // selfadjoint micro block - { - Index i = j; - buffer.setZero(); - // 1 - apply the kernel on the temporary buffer - gebp_kernel(buffer.data(), BlockSize, blockA+depth*i, actual_b, actualBlockSize, depth, actualBlockSize, alpha, - -1, -1, 0, 0, workspace); - // 2 - triangular accumulation - for(Index j1=0; j1 @@ -209,12 +46,13 @@ SelfAdjointView& SelfAdjointView Scalar actualAlpha = alpha * UBlasTraits::extractScalarFactor(u.derived()); enum { IsRowMajor = (internal::traits::Flags&RowMajorBit) ? 1 : 0 }; - - internal::selfadjoint_product - ::run(_expression().cols(), actualU.cols(), &actualU.coeff(0,0), actualU.outerStride(), + + internal::general_matrix_matrix_triangular_product::IsComplex, + Scalar, _ActualUType::Flags&RowMajorBit ? ColMajor : RowMajor, (!UBlasTraits::NeedToConjugate) && NumTraits::IsComplex, + MatrixType::Flags&RowMajorBit ? RowMajor : ColMajor, UpLo> + ::run(_expression().cols(), actualU.cols(), + &actualU.coeff(0,0), actualU.outerStride(), &actualU.coeff(0,0), actualU.outerStride(), const_cast(_expression().data()), _expression().outerStride(), actualAlpha); return *this; diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 507eef31d..5f6e50a08 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -73,6 +73,7 @@ ei_add_test(product_syrk) ei_add_test(product_trmv) ei_add_test(product_trmm) ei_add_test(product_trsolve) +ei_add_test(product_mmtr) ei_add_test(product_notemporary) ei_add_test(stable_norm) ei_add_test(bandmatrix) diff --git a/test/product_mmtr.cpp b/test/product_mmtr.cpp new file mode 100644 index 000000000..1048a894d --- /dev/null +++ b/test/product_mmtr.cpp @@ -0,0 +1,80 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2010 Gael Guennebaud +// +// Eigen is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 3 of the License, or (at your option) any later version. +// +// Alternatively, you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 2 of +// the License, or (at your option) any later version. +// +// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License and a copy of the GNU General Public License along with +// Eigen. If not, see . + +#include "main.h" + +#define CHECK_MMTR(DEST, TRI, OP) { \ + ref2 = ref1 = DEST; \ + DEST.template triangularView() OP; \ + ref1 OP; \ + ref2.template triangularView() = ref1; \ + VERIFY_IS_APPROX(DEST,ref2); \ + } + +template void mmtr(int size) +{ + typedef typename NumTraits::Real RealScalar; + + typedef Matrix MatrixColMaj; + typedef Matrix MatrixRowMaj; + + DenseIndex othersize = internal::random(1,200); + + MatrixColMaj matc(size, size); + MatrixRowMaj matr(size, size); + MatrixColMaj ref1(size, size), ref2(size, size); + + MatrixColMaj soc(size,othersize); soc.setRandom(); + MatrixColMaj osc(othersize,size); osc.setRandom(); + MatrixRowMaj sor(size,othersize); sor.setRandom(); + MatrixRowMaj osr(othersize,size); osr.setRandom(); + + Scalar s = internal::random(); + + CHECK_MMTR(matc, Lower, = s*soc*sor.adjoint()); + CHECK_MMTR(matc, Upper, = s*(soc*soc.adjoint())); + CHECK_MMTR(matr, Lower, = s*soc*soc.adjoint()); + CHECK_MMTR(matr, Upper, = soc*(s*sor.adjoint())); + + CHECK_MMTR(matc, Lower, += s*soc*soc.adjoint()); + CHECK_MMTR(matc, Upper, += s*(soc*sor.transpose())); + CHECK_MMTR(matr, Lower, += s*sor*soc.adjoint()); + CHECK_MMTR(matr, Upper, += soc*(s*soc.adjoint())); + + CHECK_MMTR(matc, Lower, -= s*soc*soc.adjoint()); + CHECK_MMTR(matc, Upper, -= s*(osc.transpose()*osc.conjugate())); + CHECK_MMTR(matr, Lower, -= s*soc*soc.adjoint()); + CHECK_MMTR(matr, Upper, -= soc*(s*soc.adjoint())); +} + +void test_product_mmtr() +{ + for(int i = 0; i < g_repeat ; i++) + { + CALL_SUBTEST_1((mmtr(internal::random(1,320)))); + CALL_SUBTEST_2((mmtr(internal::random(1,320)))); + CALL_SUBTEST_3((mmtr >(internal::random(1,200)))); + CALL_SUBTEST_4((mmtr >(internal::random(1,200)))); + } +}