From ac9aa47bbc3ab6a6921c2df9d2430bc054196be6 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Mon, 23 Jun 2008 15:50:28 +0000 Subject: [PATCH] optimize linear vectorization both in Assign and Sum (optimal amortized perf) --- Eigen/src/Core/Assign.h | 17 +++++++++++------ Eigen/src/Core/Sum.h | 35 +++++++++++++---------------------- 2 files changed, 24 insertions(+), 28 deletions(-) diff --git a/Eigen/src/Core/Assign.h b/Eigen/src/Core/Assign.h index 63eda1e85..c28a0371b 100644 --- a/Eigen/src/Core/Assign.h +++ b/Eigen/src/Core/Assign.h @@ -307,12 +307,17 @@ struct ei_assign_impl int index = 0; // do the vectorizable part of the assignment - for ( ; index(row, col, src.template packet(row, col)); + int start = rowMajor ? col : row; + int end = std::min(innerSize, start + alignedSize-index); + for ( ; (rowMajor ? col : row)(row, col, src.template packet(row, col)); + index += (rowMajor ? col : row) - start; + row = rowMajor ? index/innerSize : index%innerSize; + col = rowMajor ? index%innerSize : index/innerSize; } // now we must do the rest without vectorization. @@ -380,7 +385,7 @@ struct ei_assign_impl const int innerSize = rowMajor ? dst.cols() : dst.rows(); const int outerSize = rowMajor ? dst.rows() : dst.cols(); const int alignedInnerSize = (innerSize/packetSize)*packetSize; - + for(int i = 0; i < outerSize; i++) { // do the vectorizable part of the assignment diff --git a/Eigen/src/Core/Sum.h b/Eigen/src/Core/Sum.h index c6b7cacce..d638f0979 100644 --- a/Eigen/src/Core/Sum.h +++ b/Eigen/src/Core/Sum.h @@ -54,7 +54,7 @@ public: Unrolling = Cost <= UnrollingLimit ? CompleteUnrolling : NoUnrolling - }; + }; }; /*************************************************************************** @@ -62,7 +62,7 @@ public: ***************************************************************************/ /*** no vectorization ***/ - + template struct ei_sum_novec_unroller { @@ -194,32 +194,23 @@ struct ei_sum_impl // do the vectorizable part of the sum if(size >= packetSize) { - asm("#begin"); - PacketScalar packet_res; packet_res = mat.template packet(0, 0); - int index; - if(Derived::IsVectorAtCompileTime) + int row = 0; + int col = 0; + int index = packetSize; + while (index(row, col)); - } - } - else - { - for(index = packetSize; index(row, col)); - } + index += (rowMajor ? col : row) - start; } res = ei_predux(packet_res); - asm("#end"); // now we must do the rest without vectorization. if(alignedSize == size) return res;