Hey, finally the copyCoeff stuff is not only used to implement swap anymore :)

Add an internal pseudo expression allowing to optimize operators like +=, *= using
the copyCoeff stuff.
This allows to easily enforce aligned load for the destination matrix everywhere.
This commit is contained in:
Gael Guennebaud
2009-11-20 15:39:38 +01:00
parent e3d890bc5a
commit eb8f450071
11 changed files with 229 additions and 51 deletions

View File

@@ -99,6 +99,7 @@ ei_add_test(vectorization_logic)
ei_add_test(basicstuff)
ei_add_test(linearstructure)
ei_add_test(cwiseop)
ei_add_test(unalignedcount)
ei_add_test(redux)
ei_add_test(visitor)
ei_add_test(product_small)

View File

@@ -35,7 +35,7 @@ template<typename VectorType> void map_class(const VectorType& m)
Scalar* array2 = ei_aligned_new<Scalar>(size);
Scalar* array3 = new Scalar[size+1];
Scalar* array3unaligned = size_t(array3)%16 == 0 ? array3+1 : array3;
Map<VectorType, Aligned>(array1, size) = VectorType::Random(size);
Map<VectorType, Aligned>(array2, size) = Map<VectorType,Aligned>(array1, size);
Map<VectorType>(array3unaligned, size) = Map<VectorType>(array1, size);
@@ -62,7 +62,7 @@ template<typename VectorType> void map_static_methods(const VectorType& m)
Scalar* array2 = ei_aligned_new<Scalar>(size);
Scalar* array3 = new Scalar[size+1];
Scalar* array3unaligned = size_t(array3)%16 == 0 ? array3+1 : array3;
VectorType::MapAligned(array1, size) = VectorType::Random(size);
VectorType::Map(array2, size) = VectorType::Map(array1, size);
VectorType::Map(array3unaligned, size) = VectorType::Map(array1, size);
@@ -71,7 +71,7 @@ template<typename VectorType> void map_static_methods(const VectorType& m)
VectorType ma3 = VectorType::Map(array3unaligned, size);
VERIFY_IS_APPROX(ma1, ma2);
VERIFY_IS_APPROX(ma1, ma3);
ei_aligned_delete(array1, size);
ei_aligned_delete(array2, size);
delete[] array3;

56
test/unalignedcount.cpp Normal file
View File

@@ -0,0 +1,56 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2009 Gael Guennebaud <g.gael@free.fr>
//
// Eigen is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 3 of the License, or (at your option) any later version.
//
// Alternatively, you can redistribute it and/or
// modify it under the terms of the GNU General Public License as
// published by the Free Software Foundation; either version 2 of
// the License, or (at your option) any later version.
//
// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License and a copy of the GNU General Public License along with
// Eigen. If not, see <http://www.gnu.org/licenses/>.
static int nb_load;
static int nb_loadu;
static int nb_store;
static int nb_storeu;
#define EIGEN_DEBUG_ALIGNED_LOAD { nb_load++; }
#define EIGEN_DEBUG_UNALIGNED_LOAD { nb_loadu++; }
#define EIGEN_DEBUG_ALIGNED_STORE { nb_store++; }
#define EIGEN_DEBUG_UNALIGNED_STORE { nb_storeu++; }
#define VERIFY_ALIGNED_UNALIGNED_COUNT(XPR,AL,UL,AS,US) {\
nb_load = nb_loadu = nb_store = nb_storeu = 0; \
XPR; \
if(!(nb_load==AL && nb_loadu==UL && nb_store==AS && nb_storeu==US)) \
std::cerr << " >> " << nb_load << ", " << nb_loadu << ", " << nb_store << ", " << nb_storeu << "\n"; \
VERIFY( (#XPR) && nb_load==AL && nb_loadu==UL && nb_store==AS && nb_storeu==US ); \
}
#include "main.h"
void test_unalignedcount()
{
#ifdef EIGEN_VECTORIZE_SSE
VectorXf a(40), b(40);
VERIFY_ALIGNED_UNALIGNED_COUNT(a += b, 20, 0, 10, 0);
VERIFY_ALIGNED_UNALIGNED_COUNT(a.segment(0,40) += b.segment(0,40), 10, 10, 10, 0);
VERIFY_ALIGNED_UNALIGNED_COUNT(a.segment(0,40) -= b.segment(0,40), 10, 10, 10, 0);
VERIFY_ALIGNED_UNALIGNED_COUNT(a.segment(0,40) *= 3.5, 10, 0, 10, 0);
VERIFY_ALIGNED_UNALIGNED_COUNT(a.segment(0,40) /= 3.5, 10, 0, 10, 0);
#endif
}