* bugfix in SolveTriangular found by Timothy Hunter (did not compiled for very small fixed size matrices)

* bugfix in Dot unroller
* added special random generator for the unit tests and reduced the tolerance threshold by an order of magnitude
  this fixes issues with sum.cpp but other tests still failed sometimes, this have to be carefully checked...
This commit is contained in:
Gael Guennebaud
2008-08-22 17:48:36 +00:00
parent a95c1e190b
commit f0394edfa7
14 changed files with 103 additions and 65 deletions

View File

@@ -221,11 +221,18 @@ template<typename Derived1, typename Derived2>
struct ei_dot_impl<Derived1, Derived2, LinearVectorization, CompleteUnrolling>
{
typedef typename Derived1::Scalar Scalar;
typedef typename ei_packet_traits<Scalar>::type PacketScalar;
enum {
PacketSize = ei_packet_traits<Scalar>::size,
Size = Derived1::SizeAtCompileTime,
VectorizationSize = (Size / PacketSize) * PacketSize
};
static Scalar run(const Derived1& v1, const Derived2& v2)
{
return ei_predux(
ei_dot_vec_unroller<Derived1, Derived2, 0, Derived1::SizeAtCompileTime>::run(v1, v2)
);
Scalar res = ei_predux(ei_dot_vec_unroller<Derived1, Derived2, 0, VectorizationSize>::run(v1, v2));
if (VectorizationSize != Size)
res += ei_dot_novec_unroller<Derived1, Derived2, VectorizationSize, Size>::run(v1, v2);
return res;
}
};

View File

@@ -95,7 +95,8 @@ struct ei_solve_triangular_selector<Lhs,Rhs,UpLo,RowMajor>
int endBlock = startBlock + (IsLower ? 4 : -4);
/* Process the i cols times 4 rows block, and keep the result in a temporary vector */
Matrix<Scalar,4,1> btmp;
// FIXME use fixed size block but take care to small fixed size matrices...
Matrix<Scalar,Dynamic,1> btmp(4);
if (IsLower)
btmp = lhs.block(startBlock,0,4,i) * other.col(c).start(i);
else

View File

@@ -220,7 +220,7 @@ struct ei_palign_impl<Offset,__m128>
inline static void run(__m128& first, const __m128& second)
{
if (Offset!=0)
first = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(second), _mm_castps_si128(first), (Offset)*4));
first = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(second), _mm_castps_si128(first), Offset*4));
}
};
@@ -230,7 +230,7 @@ struct ei_palign_impl<Offset,__m128i>
inline static void run(__m128i& first, const __m128i& second)
{
if (Offset!=0)
first = _mm_alignr_epi8(second,first, (Offset)*4);
first = _mm_alignr_epi8(second,first, Offset*4);
}
};