mirror of
https://gitlab.com/libeigen/eigen.git
synced 2026-04-10 11:34:33 +08:00
@@ -106,22 +106,22 @@ void sparselu_gemm(Index m, Index n, Index d, const Scalar* A, Index lda, const
|
||||
|
||||
#define KMADD(c, a, b, tmp) {tmp = b; tmp = pmul(a,tmp); c = padd(c,tmp);}
|
||||
#define WORK(I) \
|
||||
c0 = pload<Packet>(C0+i+(I)*PacketSize); \
|
||||
c1 = pload<Packet>(C1+i+(I)*PacketSize); \
|
||||
KMADD(c0, a0, b00, t0) \
|
||||
KMADD(c1, a0, b01, t1) \
|
||||
a0 = pload<Packet>(A0+i+(I+1)*PacketSize); \
|
||||
KMADD(c0, a1, b10, t0) \
|
||||
KMADD(c1, a1, b11, t1) \
|
||||
a1 = pload<Packet>(A1+i+(I+1)*PacketSize); \
|
||||
if(RK==4) KMADD(c0, a2, b20, t0) \
|
||||
if(RK==4) KMADD(c1, a2, b21, t1) \
|
||||
if(RK==4) a2 = pload<Packet>(A2+i+(I+1)*PacketSize); \
|
||||
if(RK==4) KMADD(c0, a3, b30, t0) \
|
||||
if(RK==4) KMADD(c1, a3, b31, t1) \
|
||||
if(RK==4) a3 = pload<Packet>(A3+i+(I+1)*PacketSize); \
|
||||
pstore(C0+i+(I)*PacketSize, c0); \
|
||||
pstore(C1+i+(I)*PacketSize, c1)
|
||||
c0 = pload<Packet>(C0+i+(I)*PacketSize); \
|
||||
c1 = pload<Packet>(C1+i+(I)*PacketSize); \
|
||||
KMADD(c0, a0, b00, t0) \
|
||||
KMADD(c1, a0, b01, t1) \
|
||||
a0 = pload<Packet>(A0+i+(I+1)*PacketSize); \
|
||||
KMADD(c0, a1, b10, t0) \
|
||||
KMADD(c1, a1, b11, t1) \
|
||||
a1 = pload<Packet>(A1+i+(I+1)*PacketSize); \
|
||||
if(RK==4){ KMADD(c0, a2, b20, t0) }\
|
||||
if(RK==4){ KMADD(c1, a2, b21, t1) }\
|
||||
if(RK==4){ a2 = pload<Packet>(A2+i+(I+1)*PacketSize); }\
|
||||
if(RK==4){ KMADD(c0, a3, b30, t0) }\
|
||||
if(RK==4){ KMADD(c1, a3, b31, t1) }\
|
||||
if(RK==4){ a3 = pload<Packet>(A3+i+(I+1)*PacketSize); }\
|
||||
pstore(C0+i+(I)*PacketSize, c0); \
|
||||
pstore(C1+i+(I)*PacketSize, c1)
|
||||
|
||||
// process rows of A' - C' with aggressive vectorization and peeling
|
||||
for(Index i=0; i<actual_b_end1; i+=PacketSize*8)
|
||||
@@ -131,14 +131,15 @@ void sparselu_gemm(Index m, Index n, Index d, const Scalar* A, Index lda, const
|
||||
prefetch((A1+i+(5)*PacketSize));
|
||||
if(RK==4) prefetch((A2+i+(5)*PacketSize));
|
||||
if(RK==4) prefetch((A3+i+(5)*PacketSize));
|
||||
WORK(0);
|
||||
WORK(1);
|
||||
WORK(2);
|
||||
WORK(3);
|
||||
WORK(4);
|
||||
WORK(5);
|
||||
WORK(6);
|
||||
WORK(7);
|
||||
|
||||
WORK(0);
|
||||
WORK(1);
|
||||
WORK(2);
|
||||
WORK(3);
|
||||
WORK(4);
|
||||
WORK(5);
|
||||
WORK(6);
|
||||
WORK(7);
|
||||
}
|
||||
// process the remaining rows with vectorization only
|
||||
for(Index i=actual_b_end1; i<actual_b_end2; i+=PacketSize)
|
||||
@@ -203,16 +204,16 @@ void sparselu_gemm(Index m, Index n, Index d, const Scalar* A, Index lda, const
|
||||
}
|
||||
|
||||
#define WORK(I) \
|
||||
c0 = pload<Packet>(C0+i+(I)*PacketSize); \
|
||||
KMADD(c0, a0, b00, t0) \
|
||||
a0 = pload<Packet>(A0+i+(I+1)*PacketSize); \
|
||||
KMADD(c0, a1, b10, t0) \
|
||||
a1 = pload<Packet>(A1+i+(I+1)*PacketSize); \
|
||||
if(RK==4) KMADD(c0, a2, b20, t0) \
|
||||
if(RK==4) a2 = pload<Packet>(A2+i+(I+1)*PacketSize); \
|
||||
if(RK==4) KMADD(c0, a3, b30, t0) \
|
||||
if(RK==4) a3 = pload<Packet>(A3+i+(I+1)*PacketSize); \
|
||||
pstore(C0+i+(I)*PacketSize, c0);
|
||||
c0 = pload<Packet>(C0+i+(I)*PacketSize); \
|
||||
KMADD(c0, a0, b00, t0) \
|
||||
a0 = pload<Packet>(A0+i+(I+1)*PacketSize); \
|
||||
KMADD(c0, a1, b10, t0) \
|
||||
a1 = pload<Packet>(A1+i+(I+1)*PacketSize); \
|
||||
if(RK==4){ KMADD(c0, a2, b20, t0) }\
|
||||
if(RK==4){ a2 = pload<Packet>(A2+i+(I+1)*PacketSize); }\
|
||||
if(RK==4){ KMADD(c0, a3, b30, t0) }\
|
||||
if(RK==4){ a3 = pload<Packet>(A3+i+(I+1)*PacketSize); }\
|
||||
pstore(C0+i+(I)*PacketSize, c0);
|
||||
|
||||
// agressive vectorization and peeling
|
||||
for(Index i=0; i<actual_b_end1; i+=PacketSize*8)
|
||||
|
||||
Reference in New Issue
Block a user