From 931814d7c08f83e12b39f6185a437cb10698c0ec Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Thu, 24 Mar 2011 23:19:53 +0100 Subject: [PATCH] improve performance of trsm --- .../Core/products/TriangularSolverMatrix.h | 30 +++++++++++-------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/Eigen/src/Core/products/TriangularSolverMatrix.h b/Eigen/src/Core/products/TriangularSolverMatrix.h index a6ad9c322..4f5f85cbc 100644 --- a/Eigen/src/Core/products/TriangularSolverMatrix.h +++ b/Eigen/src/Core/products/TriangularSolverMatrix.h @@ -75,6 +75,7 @@ struct triangular_solve_matrix conj; gebp_kernel gebp_kernel; @@ -92,16 +93,19 @@ struct triangular_solve_matrix general block copy (done during the next step) - // - R1 = L1^-1 B => tricky part + // - R1 = A11^-1 B => tricky part // - update B from the new R1 => actually this has to be performed continuously during the above step - // - R2 = L2 * B => GEPP + // - R2 -= A21 * B => GEPP - // The tricky part: compute R1 = L1^-1 B while updating B from R1 - // The idea is to split L1 into multiple small vertical panels. - // Each panel can be split into a small triangular part A1 which is processed without optimization, - // and the remaining small part A2 which is processed using gebp with appropriate block strides + // The tricky part: compute R1 = A11^-1 B while updating B from R1 + // The idea is to split A11 into multiple small vertical panels. + // Each panel can be split into a small triangular part T1k which is processed without optimization, + // and the remaining small part T2k which is processed using gebp with appropriate block strides + Index subcols = (kc/Traits::nr)*Traits::nr; // TODO kc might not be an ideal choice here + for(Index j2=0; j2(actual_kc-k1, SmallPanelWidth); @@ -114,7 +118,7 @@ struct triangular_solve_matrix0) @@ -152,13 +156,13 @@ struct triangular_solve_matrix GEPP + + // R2 -= A21 * B => GEPP { Index start = IsLower ? k2+kc : 0; Index end = IsLower ? size : k2-kc;