From 40865fa28cab9473959458ec890d68b9df398dc9 Mon Sep 17 00:00:00 2001 From: Benoit Jacob Date: Wed, 18 Nov 2009 17:20:39 -0500 Subject: [PATCH] fix bugs, old and new: * old bug: in CwiseBinaryOp: only set the LinearAccessBit if both sides have the same storage order. * new bug: in Assign.h, only consider linear traversal if both sides have the same storage order. --- Eigen/src/Core/Assign.h | 12 +++++++----- Eigen/src/Core/CwiseBinaryOp.h | 11 ++++++++--- 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/Eigen/src/Core/Assign.h b/Eigen/src/Core/Assign.h index 32f9623e8..00febdc5d 100644 --- a/Eigen/src/Core/Assign.h +++ b/Eigen/src/Core/Assign.h @@ -28,7 +28,7 @@ #define EIGEN_ASSIGN_H /*************************************************************************** -* Part 1 : the logic deciding a strategy for vectorization and unrolling +* Part 1 : the logic deciding a strategy for traversal and unrolling * ***************************************************************************/ template @@ -53,11 +53,12 @@ private: }; enum { - MightVectorize = (int(Derived::Flags) & int(OtherDerived::Flags) & ActualPacketAccessBit) - && ((int(Derived::Flags)&RowMajorBit)==(int(OtherDerived::Flags)&RowMajorBit)), + StorageOrdersAgree = (int(Derived::Flags)&RowMajorBit)==(int(OtherDerived::Flags)&RowMajorBit), + MightVectorize = StorageOrdersAgree + && (int(Derived::Flags) & int(OtherDerived::Flags) & ActualPacketAccessBit), MayInnerVectorize = MightVectorize && int(InnerSize)!=Dynamic && int(InnerSize)%int(PacketSize)==0 && int(DstIsAligned) && int(SrcIsAligned), - MayLinearize = (int(Derived::Flags) & int(OtherDerived::Flags) & LinearAccessBit), + MayLinearize = StorageOrdersAgree && (int(Derived::Flags) & int(OtherDerived::Flags) & LinearAccessBit), MayLinearVectorize = MightVectorize && MayLinearize && (DstIsAligned || InnerMaxSize == Dynamic), /* If the destination isn't aligned, we have to do runtime checks and we don't unroll, @@ -73,7 +74,7 @@ public: Traversal = int(MayInnerVectorize) ? int(InnerVectorizedTraversal) : int(MayLinearVectorize) ? int(LinearVectorizedTraversal) : int(MaySliceVectorize) ? int(SliceVectorizedTraversal) -// : int(MayLinearize) ? int(LinearTraversal) + : int(MayLinearize) ? int(LinearTraversal) : int(DefaultTraversal), Vectorized = int(Traversal) == InnerVectorizedTraversal || int(Traversal) == LinearVectorizedTraversal @@ -110,6 +111,7 @@ public: EIGEN_DEBUG_VAR(InnerSize) EIGEN_DEBUG_VAR(InnerMaxSize) EIGEN_DEBUG_VAR(PacketSize) + EIGEN_DEBUG_VAR(StorageOrdersAgree) EIGEN_DEBUG_VAR(MightVectorize) EIGEN_DEBUG_VAR(MayInnerVectorize) EIGEN_DEBUG_VAR(MayLinearVectorize) diff --git a/Eigen/src/Core/CwiseBinaryOp.h b/Eigen/src/Core/CwiseBinaryOp.h index 875bc9aa5..4bea0425f 100644 --- a/Eigen/src/Core/CwiseBinaryOp.h +++ b/Eigen/src/Core/CwiseBinaryOp.h @@ -67,11 +67,16 @@ struct ei_traits > ColsAtCompileTime = Lhs::ColsAtCompileTime, MaxRowsAtCompileTime = Lhs::MaxRowsAtCompileTime, MaxColsAtCompileTime = Lhs::MaxColsAtCompileTime, + StorageOrdersAgree = (int(Lhs::Flags)&RowMajorBit)==(int(Rhs::Flags)&RowMajorBit), Flags = (int(LhsFlags) | int(RhsFlags)) & ( HereditaryBits - | (int(LhsFlags) & int(RhsFlags) & (LinearAccessBit | AlignedBit)) - | (ei_functor_traits::PacketAccess && ((int(LhsFlags) & RowMajorBit)==(int(RhsFlags) & RowMajorBit)) - ? (int(LhsFlags) & int(RhsFlags) & PacketAccessBit) : 0)), + | (int(LhsFlags) & int(RhsFlags) & + ( AlignedBit + | (StorageOrdersAgree ? LinearAccessBit : 0) + | (ei_functor_traits::PacketAccess && StorageOrdersAgree ? PacketAccessBit : 0) + ) + ) + ), CoeffReadCost = LhsCoeffReadCost + RhsCoeffReadCost + ei_functor_traits::Cost }; };