Generalize first_aligned to take the requested alignment as a template parameter, and add a first_default_aligned variante calling first_aligned with the requirement of the largest packet for the given scalar type.

This commit is contained in:
Gael Guennebaud
2015-08-06 17:52:01 +02:00
parent 1f5024332e
commit 2afdef6a54
14 changed files with 69 additions and 48 deletions

View File

@@ -39,9 +39,9 @@ void sparselu_gemm(Index m, Index n, Index d, const Scalar* A, Index lda, const
};
Index d_end = (d/RK)*RK; // number of columns of A (rows of B) suitable for full register blocking
Index n_end = (n/RN)*RN; // number of columns of B-C suitable for processing RN columns at once
Index i0 = internal::first_aligned(A,m);
Index i0 = internal::first_default_aligned(A,m);
eigen_internal_assert(((lda%PacketSize)==0) && ((ldc%PacketSize)==0) && (i0==internal::first_aligned(C,m)));
eigen_internal_assert(((lda%PacketSize)==0) && ((ldc%PacketSize)==0) && (i0==internal::first_default_aligned(C,m)));
// handle the non aligned rows of A and C without any optimization:
for(Index i=0; i<i0; ++i)

View File

@@ -66,8 +66,8 @@ EIGEN_DONT_INLINE void LU_kernel_bmod<SegSizeAtCompileTime>::run(const Index seg
const Index PacketSize = internal::packet_traits<Scalar>::size;
Index ldl = internal::first_multiple(nrow, PacketSize);
Map<Matrix<Scalar,Dynamic,SegSizeAtCompileTime>, 0, OuterStride<> > B( &(lusup.data()[luptr]), nrow, segsize, OuterStride<>(lda) );
Index aligned_offset = internal::first_aligned(tempv.data()+segsize, PacketSize);
Index aligned_with_B_offset = (PacketSize-internal::first_aligned(B.data(), PacketSize))%PacketSize;
Index aligned_offset = internal::first_default_aligned(tempv.data()+segsize, PacketSize);
Index aligned_with_B_offset = (PacketSize-internal::first_default_aligned(B.data(), PacketSize))%PacketSize;
Map<Matrix<Scalar,Dynamic,1>, 0, OuterStride<> > l(tempv.data()+segsize+aligned_offset+aligned_with_B_offset, nrow, OuterStride<>(ldl) );
l.setZero();

View File

@@ -145,7 +145,7 @@ void SparseLUImpl<Scalar,StorageIndex>::panel_bmod(const Index m, const Index w,
eigen_assert(tempv.size()>w*ldu + nrow*w + 1);
Index ldl = internal::first_multiple<Index>(nrow, PacketSize);
Index offset = (PacketSize-internal::first_aligned(B.data(), PacketSize)) % PacketSize;
Index offset = (PacketSize-internal::first_default_aligned(B.data(), PacketSize)) % PacketSize;
Map<Matrix<Scalar,Dynamic,Dynamic>, 0, OuterStride<> > L(tempv.data()+w*ldu+offset, nrow, u_cols, OuterStride<>(ldl));
L.setZero();