Clang-format tests, examples, libraries, benchmarks, etc.

This commit is contained in:
Antonio Sánchez
2023-12-05 21:22:55 +00:00
committed by Rasmus Munk Larsen
parent 3252ecc7a4
commit 46e9cdb7fe
876 changed files with 33453 additions and 37795 deletions

View File

@@ -14,122 +14,118 @@ using namespace Eigen;
typedef float Scalar;
__attribute__ ((noinline)) void benchVec(Scalar* a, Scalar* b, Scalar* c, int size);
__attribute__ ((noinline)) void benchVec(MatrixXf& a, MatrixXf& b, MatrixXf& c);
__attribute__ ((noinline)) void benchVec(VectorXf& a, VectorXf& b, VectorXf& c);
__attribute__((noinline)) void benchVec(Scalar* a, Scalar* b, Scalar* c, int size);
__attribute__((noinline)) void benchVec(MatrixXf& a, MatrixXf& b, MatrixXf& c);
__attribute__((noinline)) void benchVec(VectorXf& a, VectorXf& b, VectorXf& c);
int main(int argc, char* argv[])
{
int size = SIZE * 8;
int size2 = size * size;
Scalar* a = internal::aligned_new<Scalar>(size2);
Scalar* b = internal::aligned_new<Scalar>(size2+4)+1;
Scalar* c = internal::aligned_new<Scalar>(size2);
for (int i=0; i<size; ++i)
{
a[i] = b[i] = c[i] = 0;
}
BenchTimer timer;
timer.reset();
for (int k=0; k<10; ++k)
{
int main(int argc, char* argv[]) {
int size = SIZE * 8;
int size2 = size * size;
Scalar* a = internal::aligned_new<Scalar>(size2);
Scalar* b = internal::aligned_new<Scalar>(size2 + 4) + 1;
Scalar* c = internal::aligned_new<Scalar>(size2);
for (int i = 0; i < size; ++i) {
a[i] = b[i] = c[i] = 0;
}
BenchTimer timer;
timer.reset();
for (int k = 0; k < 10; ++k) {
timer.start();
benchVec(a, b, c, size2);
timer.stop();
}
std::cout << timer.value() << "s " << (double(size2 * REPEAT) / timer.value()) / (1024. * 1024. * 1024.)
<< " GFlops\n";
return 0;
for (int innersize = size; innersize > 2; --innersize) {
if (size2 % innersize == 0) {
int outersize = size2 / innersize;
MatrixXf ma = Map<MatrixXf>(a, innersize, outersize);
MatrixXf mb = Map<MatrixXf>(b, innersize, outersize);
MatrixXf mc = Map<MatrixXf>(c, innersize, outersize);
timer.reset();
for (int k = 0; k < 3; ++k) {
timer.start();
benchVec(a, b, c, size2);
benchVec(ma, mb, mc);
timer.stop();
}
std::cout << innersize << " x " << outersize << " " << timer.value() << "s "
<< (double(size2 * REPEAT) / timer.value()) / (1024. * 1024. * 1024.) << " GFlops\n";
}
std::cout << timer.value() << "s " << (double(size2*REPEAT)/timer.value())/(1024.*1024.*1024.) << " GFlops\n";
return 0;
for (int innersize = size; innersize>2 ; --innersize)
{
if (size2%innersize==0)
{
int outersize = size2/innersize;
MatrixXf ma = Map<MatrixXf>(a, innersize, outersize );
MatrixXf mb = Map<MatrixXf>(b, innersize, outersize );
MatrixXf mc = Map<MatrixXf>(c, innersize, outersize );
timer.reset();
for (int k=0; k<3; ++k)
{
timer.start();
benchVec(ma, mb, mc);
timer.stop();
}
std::cout << innersize << " x " << outersize << " " << timer.value() << "s " << (double(size2*REPEAT)/timer.value())/(1024.*1024.*1024.) << " GFlops\n";
}
}
VectorXf va = Map<VectorXf>(a, size2);
VectorXf vb = Map<VectorXf>(b, size2);
VectorXf vc = Map<VectorXf>(c, size2);
timer.reset();
for (int k = 0; k < 3; ++k) {
timer.start();
benchVec(va, vb, vc);
timer.stop();
}
std::cout << timer.value() << "s " << (double(size2 * REPEAT) / timer.value()) / (1024. * 1024. * 1024.)
<< " GFlops\n";
return 0;
}
void benchVec(MatrixXf& a, MatrixXf& b, MatrixXf& c) {
for (int k = 0; k < REPEAT; ++k) a = a + b;
}
void benchVec(VectorXf& a, VectorXf& b, VectorXf& c) {
for (int k = 0; k < REPEAT; ++k) a = a + b;
}
void benchVec(Scalar* a, Scalar* b, Scalar* c, int size) {
typedef internal::packet_traits<Scalar>::type PacketScalar;
const int PacketSize = internal::packet_traits<Scalar>::size;
PacketScalar a0, a1, a2, a3, b0, b1, b2, b3;
for (int k = 0; k < REPEAT; ++k)
for (int i = 0; i < size; i += PacketSize * 8) {
// a0 = internal::pload(&a[i]);
// b0 = internal::pload(&b[i]);
// a1 = internal::pload(&a[i+1*PacketSize]);
// b1 = internal::pload(&b[i+1*PacketSize]);
// a2 = internal::pload(&a[i+2*PacketSize]);
// b2 = internal::pload(&b[i+2*PacketSize]);
// a3 = internal::pload(&a[i+3*PacketSize]);
// b3 = internal::pload(&b[i+3*PacketSize]);
// internal::pstore(&a[i], internal::padd(a0, b0));
// a0 = internal::pload(&a[i+4*PacketSize]);
// b0 = internal::pload(&b[i+4*PacketSize]);
//
// internal::pstore(&a[i+1*PacketSize], internal::padd(a1, b1));
// a1 = internal::pload(&a[i+5*PacketSize]);
// b1 = internal::pload(&b[i+5*PacketSize]);
//
// internal::pstore(&a[i+2*PacketSize], internal::padd(a2, b2));
// a2 = internal::pload(&a[i+6*PacketSize]);
// b2 = internal::pload(&b[i+6*PacketSize]);
//
// internal::pstore(&a[i+3*PacketSize], internal::padd(a3, b3));
// a3 = internal::pload(&a[i+7*PacketSize]);
// b3 = internal::pload(&b[i+7*PacketSize]);
//
// internal::pstore(&a[i+4*PacketSize], internal::padd(a0, b0));
// internal::pstore(&a[i+5*PacketSize], internal::padd(a1, b1));
// internal::pstore(&a[i+6*PacketSize], internal::padd(a2, b2));
// internal::pstore(&a[i+7*PacketSize], internal::padd(a3, b3));
internal::pstore(&a[i + 2 * PacketSize], internal::padd(internal::ploadu(&a[i + 2 * PacketSize]),
internal::ploadu(&b[i + 2 * PacketSize])));
internal::pstore(&a[i + 3 * PacketSize], internal::padd(internal::ploadu(&a[i + 3 * PacketSize]),
internal::ploadu(&b[i + 3 * PacketSize])));
internal::pstore(&a[i + 4 * PacketSize], internal::padd(internal::ploadu(&a[i + 4 * PacketSize]),
internal::ploadu(&b[i + 4 * PacketSize])));
internal::pstore(&a[i + 5 * PacketSize], internal::padd(internal::ploadu(&a[i + 5 * PacketSize]),
internal::ploadu(&b[i + 5 * PacketSize])));
internal::pstore(&a[i + 6 * PacketSize], internal::padd(internal::ploadu(&a[i + 6 * PacketSize]),
internal::ploadu(&b[i + 6 * PacketSize])));
internal::pstore(&a[i + 7 * PacketSize], internal::padd(internal::ploadu(&a[i + 7 * PacketSize]),
internal::ploadu(&b[i + 7 * PacketSize])));
}
VectorXf va = Map<VectorXf>(a, size2);
VectorXf vb = Map<VectorXf>(b, size2);
VectorXf vc = Map<VectorXf>(c, size2);
timer.reset();
for (int k=0; k<3; ++k)
{
timer.start();
benchVec(va, vb, vc);
timer.stop();
}
std::cout << timer.value() << "s " << (double(size2*REPEAT)/timer.value())/(1024.*1024.*1024.) << " GFlops\n";
return 0;
}
void benchVec(MatrixXf& a, MatrixXf& b, MatrixXf& c)
{
for (int k=0; k<REPEAT; ++k)
a = a + b;
}
void benchVec(VectorXf& a, VectorXf& b, VectorXf& c)
{
for (int k=0; k<REPEAT; ++k)
a = a + b;
}
void benchVec(Scalar* a, Scalar* b, Scalar* c, int size)
{
typedef internal::packet_traits<Scalar>::type PacketScalar;
const int PacketSize = internal::packet_traits<Scalar>::size;
PacketScalar a0, a1, a2, a3, b0, b1, b2, b3;
for (int k=0; k<REPEAT; ++k)
for (int i=0; i<size; i+=PacketSize*8)
{
// a0 = internal::pload(&a[i]);
// b0 = internal::pload(&b[i]);
// a1 = internal::pload(&a[i+1*PacketSize]);
// b1 = internal::pload(&b[i+1*PacketSize]);
// a2 = internal::pload(&a[i+2*PacketSize]);
// b2 = internal::pload(&b[i+2*PacketSize]);
// a3 = internal::pload(&a[i+3*PacketSize]);
// b3 = internal::pload(&b[i+3*PacketSize]);
// internal::pstore(&a[i], internal::padd(a0, b0));
// a0 = internal::pload(&a[i+4*PacketSize]);
// b0 = internal::pload(&b[i+4*PacketSize]);
//
// internal::pstore(&a[i+1*PacketSize], internal::padd(a1, b1));
// a1 = internal::pload(&a[i+5*PacketSize]);
// b1 = internal::pload(&b[i+5*PacketSize]);
//
// internal::pstore(&a[i+2*PacketSize], internal::padd(a2, b2));
// a2 = internal::pload(&a[i+6*PacketSize]);
// b2 = internal::pload(&b[i+6*PacketSize]);
//
// internal::pstore(&a[i+3*PacketSize], internal::padd(a3, b3));
// a3 = internal::pload(&a[i+7*PacketSize]);
// b3 = internal::pload(&b[i+7*PacketSize]);
//
// internal::pstore(&a[i+4*PacketSize], internal::padd(a0, b0));
// internal::pstore(&a[i+5*PacketSize], internal::padd(a1, b1));
// internal::pstore(&a[i+6*PacketSize], internal::padd(a2, b2));
// internal::pstore(&a[i+7*PacketSize], internal::padd(a3, b3));
internal::pstore(&a[i+2*PacketSize], internal::padd(internal::ploadu(&a[i+2*PacketSize]), internal::ploadu(&b[i+2*PacketSize])));
internal::pstore(&a[i+3*PacketSize], internal::padd(internal::ploadu(&a[i+3*PacketSize]), internal::ploadu(&b[i+3*PacketSize])));
internal::pstore(&a[i+4*PacketSize], internal::padd(internal::ploadu(&a[i+4*PacketSize]), internal::ploadu(&b[i+4*PacketSize])));
internal::pstore(&a[i+5*PacketSize], internal::padd(internal::ploadu(&a[i+5*PacketSize]), internal::ploadu(&b[i+5*PacketSize])));
internal::pstore(&a[i+6*PacketSize], internal::padd(internal::ploadu(&a[i+6*PacketSize]), internal::ploadu(&b[i+6*PacketSize])));
internal::pstore(&a[i+7*PacketSize], internal::padd(internal::ploadu(&a[i+7*PacketSize]), internal::ploadu(&b[i+7*PacketSize])));
}
}