mirror of
https://gitlab.com/libeigen/eigen.git
synced 2026-04-10 11:34:33 +08:00
Add partial vectorization for matrices and tensors of bool. This speeds up boolean operations on Tensors by up to 25x.
Benchmark numbers for the logical and of two NxN tensors: name old time/op new time/op delta BM_booleanAnd_1T/3 [using 1 threads] 14.6ns ± 0% 14.4ns ± 0% -0.96% BM_booleanAnd_1T/4 [using 1 threads] 20.5ns ±12% 9.0ns ± 0% -56.07% BM_booleanAnd_1T/7 [using 1 threads] 41.7ns ± 0% 10.5ns ± 0% -74.87% BM_booleanAnd_1T/8 [using 1 threads] 52.1ns ± 0% 10.1ns ± 0% -80.59% BM_booleanAnd_1T/10 [using 1 threads] 76.3ns ± 0% 13.8ns ± 0% -81.87% BM_booleanAnd_1T/15 [using 1 threads] 167ns ± 0% 16ns ± 0% -90.45% BM_booleanAnd_1T/16 [using 1 threads] 188ns ± 0% 16ns ± 0% -91.57% BM_booleanAnd_1T/31 [using 1 threads] 667ns ± 0% 34ns ± 0% -94.83% BM_booleanAnd_1T/32 [using 1 threads] 710ns ± 0% 35ns ± 0% -95.01% BM_booleanAnd_1T/64 [using 1 threads] 2.80µs ± 0% 0.11µs ± 0% -95.93% BM_booleanAnd_1T/128 [using 1 threads] 11.2µs ± 0% 0.4µs ± 0% -96.11% BM_booleanAnd_1T/256 [using 1 threads] 44.6µs ± 0% 2.5µs ± 0% -94.31% BM_booleanAnd_1T/512 [using 1 threads] 178µs ± 0% 10µs ± 0% -94.35% BM_booleanAnd_1T/1k [using 1 threads] 717µs ± 0% 78µs ± 1% -89.07% BM_booleanAnd_1T/2k [using 1 threads] 2.87ms ± 0% 0.31ms ± 1% -89.08% BM_booleanAnd_1T/4k [using 1 threads] 11.7ms ± 0% 1.9ms ± 4% -83.55% BM_booleanAnd_1T/10k [using 1 threads] 70.3ms ± 0% 17.2ms ± 4% -75.48%
This commit is contained in:
@@ -70,6 +70,23 @@ void test_cast() {
|
||||
test_cast_helper<FromScalar, FromPacket, ToScalar, ToPacket, CanCast>::run();
|
||||
}
|
||||
|
||||
template<typename Scalar,typename Packet> void packetmath_boolean()
|
||||
{
|
||||
const int PacketSize = internal::unpacket_traits<Packet>::size;
|
||||
const int size = 2*PacketSize;
|
||||
EIGEN_ALIGN_MAX Scalar data1[size];
|
||||
EIGEN_ALIGN_MAX Scalar data2[size];
|
||||
EIGEN_ALIGN_MAX Scalar ref[size];
|
||||
|
||||
for (int i=0; i<size; ++i)
|
||||
{
|
||||
data1[i] = internal::random<Scalar>();
|
||||
}
|
||||
CHECK_CWISE2_IF(true, internal::por, internal::por);
|
||||
CHECK_CWISE2_IF(true, internal::pxor, internal::pxor);
|
||||
CHECK_CWISE2_IF(true, internal::pand, internal::pand);
|
||||
}
|
||||
|
||||
template<typename Scalar,typename Packet> void packetmath()
|
||||
{
|
||||
typedef internal::packet_traits<Scalar> PacketTraits;
|
||||
@@ -337,21 +354,6 @@ template<typename Scalar,typename Packet> void packetmath()
|
||||
VERIFY(test::areApprox(ref, data2, PacketSize) && "internal::pinsertlast");
|
||||
}
|
||||
|
||||
{
|
||||
for (int i=0; i<PacketSize; ++i)
|
||||
{
|
||||
data1[i] = internal::random<Scalar>();
|
||||
unsigned char v = internal::random<bool>() ? 0xff : 0;
|
||||
char* bytes = (char*)(data1+PacketSize+i);
|
||||
for(int k=0; k<int(sizeof(Scalar)); ++k) {
|
||||
bytes[k] = v;
|
||||
}
|
||||
}
|
||||
CHECK_CWISE2_IF(true, internal::por, internal::por);
|
||||
CHECK_CWISE2_IF(true, internal::pxor, internal::pxor);
|
||||
CHECK_CWISE2_IF(true, internal::pand, internal::pand);
|
||||
CHECK_CWISE2_IF(true, internal::pandnot, internal::pandnot);
|
||||
}
|
||||
{
|
||||
for (int i = 0; i < PacketSize; ++i) {
|
||||
// "if" mask
|
||||
@@ -377,8 +379,17 @@ template<typename Scalar,typename Packet> void packetmath()
|
||||
}
|
||||
|
||||
CHECK_CWISE1_IF(PacketTraits::HasSqrt, numext::sqrt, internal::psqrt);
|
||||
|
||||
for (int i=0; i<size; ++i)
|
||||
{
|
||||
data1[i] = internal::random<Scalar>();
|
||||
}
|
||||
CHECK_CWISE2_IF(true, internal::pandnot, internal::pandnot);
|
||||
|
||||
packetmath_boolean<Scalar, Packet>();
|
||||
}
|
||||
|
||||
|
||||
template<typename Scalar,typename Packet> void packetmath_real()
|
||||
{
|
||||
typedef internal::packet_traits<Scalar> PacketTraits;
|
||||
@@ -807,6 +818,9 @@ EIGEN_DECLARE_TEST(packetmath)
|
||||
CALL_SUBTEST_11( test::runner<std::complex<float> >::run() );
|
||||
CALL_SUBTEST_12( test::runner<std::complex<double> >::run() );
|
||||
CALL_SUBTEST_13(( packetmath<half,internal::packet_traits<half>::type>() ));
|
||||
#ifdef EIGEN_PACKET_MATH_SSE_H
|
||||
CALL_SUBTEST_14(( packetmath_boolean<bool,internal::packet_traits<bool>::type>() ));
|
||||
#endif
|
||||
g_first_pass = false;
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user