mirror of
https://gitlab.com/libeigen/eigen.git
synced 2026-04-10 11:34:33 +08:00
Compare commits
19 Commits
starting_n
...
before-3.4
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
3e819d83bf | ||
|
|
69adf26aa3 | ||
|
|
9357feedc7 | ||
|
|
a2c0542010 | ||
|
|
dfd6720d82 | ||
|
|
1e1c8a735c | ||
|
|
f6fc66aa75 | ||
|
|
d58678069c | ||
|
|
2859db0220 | ||
|
|
fcb5106c6e | ||
|
|
6197ce1a35 | ||
|
|
22edb46823 | ||
|
|
ace7f132ed | ||
|
|
90187a33e1 | ||
|
|
3ddc0974ce | ||
|
|
c24bee6120 | ||
|
|
e4233b6e3d | ||
|
|
ae95b74af9 | ||
|
|
5bbc9cea93 |
@@ -8,6 +8,8 @@
|
||||
# with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
stages:
|
||||
- buildsmoketests
|
||||
- smoketests
|
||||
- build
|
||||
- test
|
||||
|
||||
@@ -16,5 +18,6 @@ variables:
|
||||
EIGEN_CI_CMAKE_GENEATOR: "Ninja"
|
||||
|
||||
include:
|
||||
- "/ci/smoketests.gitlab-ci.yml"
|
||||
- "/ci/build.gitlab-ci.yml"
|
||||
- "/ci/test.gitlab-ci.yml"
|
||||
|
||||
@@ -350,9 +350,6 @@ using std::ptrdiff_t;
|
||||
#include "src/Core/arch/AltiVec/MatrixProduct.h"
|
||||
#elif defined EIGEN_VECTORIZE_NEON
|
||||
#include "src/Core/arch/NEON/GeneralBlockPanelKernel.h"
|
||||
#include "src/Core/arch/NEON/MatrixProduct.h"
|
||||
#include "src/Core/arch/NEON/PackingOps.h"
|
||||
#include "src/Core/arch/NEON/Kernels.h"
|
||||
#endif
|
||||
|
||||
#include "src/Core/BooleanRedux.h"
|
||||
|
||||
@@ -556,7 +556,7 @@ template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
||||
peven_mask(const Packet& /*a*/) {
|
||||
typedef typename unpacket_traits<Packet>::type Scalar;
|
||||
const size_t n = unpacket_traits<Packet>::size;
|
||||
Scalar elements[n];
|
||||
EIGEN_ALIGN_TO_BOUNDARY(sizeof(Packet)) Scalar elements[n];
|
||||
for(size_t i = 0; i < n; ++i) {
|
||||
memset(elements+i, ((i & 1) == 0 ? 0xff : 0), sizeof(Scalar));
|
||||
}
|
||||
@@ -731,7 +731,7 @@ EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type
|
||||
predux_helper(const Packet& a, Op op) {
|
||||
typedef typename unpacket_traits<Packet>::type Scalar;
|
||||
const size_t n = unpacket_traits<Packet>::size;
|
||||
Scalar elements[n];
|
||||
EIGEN_ALIGN_TO_BOUNDARY(sizeof(Packet)) Scalar elements[n];
|
||||
pstoreu<Scalar>(elements, a);
|
||||
for(size_t k = n / 2; k > 0; k /= 2) {
|
||||
for(size_t i = 0; i < k; ++i) {
|
||||
|
||||
@@ -7,6 +7,9 @@
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_STLITERATORS_H
|
||||
#define EIGEN_STLITERATORS_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
@@ -30,10 +33,10 @@ public:
|
||||
typedef Index difference_type;
|
||||
typedef std::random_access_iterator_tag iterator_category;
|
||||
|
||||
indexed_based_stl_iterator_base() : mp_xpr(0), m_index(0) {}
|
||||
indexed_based_stl_iterator_base(XprType& xpr, Index index) : mp_xpr(&xpr), m_index(index) {}
|
||||
indexed_based_stl_iterator_base() EIGEN_NO_THROW : mp_xpr(0), m_index(0) {}
|
||||
indexed_based_stl_iterator_base(XprType& xpr, Index index) EIGEN_NO_THROW : mp_xpr(&xpr), m_index(index) {}
|
||||
|
||||
indexed_based_stl_iterator_base(const non_const_iterator& other)
|
||||
indexed_based_stl_iterator_base(const non_const_iterator& other) EIGEN_NO_THROW
|
||||
: mp_xpr(other.mp_xpr), m_index(other.m_index)
|
||||
{}
|
||||
|
||||
@@ -190,17 +193,17 @@ public:
|
||||
typedef typename internal::conditional<bool(is_lvalue), value_type&, const value_type&>::type reference;
|
||||
|
||||
|
||||
pointer_based_stl_iterator() : m_ptr(0) {}
|
||||
pointer_based_stl_iterator(XprType& xpr, Index index) : m_incr(xpr.innerStride())
|
||||
pointer_based_stl_iterator() EIGEN_NO_THROW : m_ptr(0) {}
|
||||
pointer_based_stl_iterator(XprType& xpr, Index index) EIGEN_NO_THROW : m_incr(xpr.innerStride())
|
||||
{
|
||||
m_ptr = xpr.data() + index * m_incr.value();
|
||||
}
|
||||
|
||||
pointer_based_stl_iterator(const non_const_iterator& other)
|
||||
pointer_based_stl_iterator(const non_const_iterator& other) EIGEN_NO_THROW
|
||||
: m_ptr(other.m_ptr), m_incr(other.m_incr)
|
||||
{}
|
||||
|
||||
pointer_based_stl_iterator& operator=(const non_const_iterator& other)
|
||||
pointer_based_stl_iterator& operator=(const non_const_iterator& other) EIGEN_NO_THROW
|
||||
{
|
||||
m_ptr = other.m_ptr;
|
||||
m_incr.setValue(other.m_incr);
|
||||
@@ -456,3 +459,5 @@ inline typename DenseBase<Derived>::const_iterator DenseBase<Derived>::cend() co
|
||||
}
|
||||
|
||||
} // namespace Eigen
|
||||
|
||||
#endif // EIGEN_STLITERATORS_H
|
||||
|
||||
@@ -139,8 +139,8 @@ EIGEN_STRONG_INLINE Packet2cf pload2(const std::complex<float>* from0, const std
|
||||
__asm__ ("xxpermdi %x0, %x2, %x1, 0" : "=wa" (res0) : "wa" (res0), "wa" (res1));
|
||||
#endif
|
||||
#else
|
||||
*((std::complex<float> *)&res0[0]) = *from0;
|
||||
*((std::complex<float> *)&res1[0]) = *from1;
|
||||
*reinterpret_cast<std::complex<float> *>(&res0) = *from0;
|
||||
*reinterpret_cast<std::complex<float> *>(&res1) = *from1;
|
||||
res0 = vec_perm(res0, res1, p16uc_TRANSPOSE64_HI);
|
||||
#endif
|
||||
return Packet2cf(res0);
|
||||
|
||||
@@ -486,19 +486,28 @@ struct dhs_cpack {
|
||||
if(((StorageOrder == ColMajor) && UseLhs) || (((StorageOrder == RowMajor) && !UseLhs)))
|
||||
{
|
||||
if (UseLhs) {
|
||||
cblock.packet[0] = pload<PacketC>(&lhs(j + 0, i));
|
||||
cblock.packet[1] = pload<PacketC>(&lhs(j + 2, i));
|
||||
cblock.packet[0] = lhs.template loadPacket<PacketC>(j + 0, i);
|
||||
cblock.packet[1] = lhs.template loadPacket<PacketC>(j + 2, i);
|
||||
} else {
|
||||
cblock.packet[0] = pload<PacketC>(&lhs(i, j + 0));
|
||||
cblock.packet[1] = pload<PacketC>(&lhs(i, j + 2));
|
||||
cblock.packet[0] = lhs.template loadPacket<PacketC>(i, j + 0);
|
||||
cblock.packet[1] = lhs.template loadPacket<PacketC>(i, j + 2);
|
||||
}
|
||||
} else {
|
||||
const std::complex<Scalar> *lhs0, *lhs1;
|
||||
if (UseLhs) {
|
||||
cblock.packet[0] = pload2(&lhs(j + 0, i), &lhs(j + 1, i));
|
||||
cblock.packet[1] = pload2(&lhs(j + 2, i), &lhs(j + 3, i));
|
||||
lhs0 = &lhs(j + 0, i);
|
||||
lhs1 = &lhs(j + 1, i);
|
||||
cblock.packet[0] = pload2(lhs0, lhs1);
|
||||
lhs0 = &lhs(j + 2, i);
|
||||
lhs1 = &lhs(j + 3, i);
|
||||
cblock.packet[1] = pload2(lhs0, lhs1);
|
||||
} else {
|
||||
cblock.packet[0] = pload2(&lhs(i, j + 0), &lhs(i, j + 1));
|
||||
cblock.packet[1] = pload2(&lhs(i, j + 2), &lhs(i, j + 3));
|
||||
lhs0 = &lhs(i, j + 0);
|
||||
lhs1 = &lhs(i, j + 1);
|
||||
cblock.packet[0] = pload2(lhs0, lhs1);
|
||||
lhs0 = &lhs(i, j + 2);
|
||||
lhs1 = &lhs(i, j + 3);
|
||||
cblock.packet[1] = pload2(lhs0, lhs1);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -859,8 +868,8 @@ struct dhs_cpack<double, Index, DataMapper, Packet, PacketC, StorageOrder, Conju
|
||||
PacketBlock<Packet,1> blockr, blocki;
|
||||
PacketBlock<PacketC,2> cblock;
|
||||
|
||||
cblock.packet[0] = pload<PacketC>(&lhs(j + 0, i));
|
||||
cblock.packet[1] = pload<PacketC>(&lhs(j + 1, i));
|
||||
cblock.packet[0] = lhs.template loadPacket<PacketC>(j + 0, i);
|
||||
cblock.packet[1] = lhs.template loadPacket<PacketC>(j + 1, i);
|
||||
|
||||
blockr.packet[0] = vec_perm(cblock.packet[0].v, cblock.packet[1].v, p16uc_GETREAL64);
|
||||
blocki.packet[0] = vec_perm(cblock.packet[0].v, cblock.packet[1].v, p16uc_GETIMAG64);
|
||||
@@ -1100,7 +1109,7 @@ EIGEN_STRONG_INLINE void pgerc(PacketBlock<Packet,N>* accReal, PacketBlock<Packe
|
||||
template<typename Scalar, typename Packet>
|
||||
EIGEN_STRONG_INLINE Packet ploadLhs(const Scalar* lhs)
|
||||
{
|
||||
return *((Packet *)lhs);
|
||||
return *reinterpret_cast<Packet *>(const_cast<Scalar *>(lhs));
|
||||
}
|
||||
|
||||
// Zero the accumulator on PacketBlock.
|
||||
@@ -1799,24 +1808,6 @@ EIGEN_STRONG_INLINE void MICRO_COMPLEX_EXTRA_COL(
|
||||
else EIGEN_UNUSED_VARIABLE(rhs_ptr_imag);
|
||||
}
|
||||
|
||||
template<typename Scalar, typename Packetc, typename Index, const Index accCols>
|
||||
EIGEN_STRONG_INLINE void pstore_add_half(std::complex<Scalar>* to, Packetc &from)
|
||||
{
|
||||
#ifdef __VSX__
|
||||
Packetc from2;
|
||||
#ifndef _BIG_ENDIAN
|
||||
__asm__ ("xxswapd %x0, %x0" : : "wa" (from.v));
|
||||
#endif
|
||||
__asm__ ("lxsdx %x0,%y1" : "=wa" (from2.v) : "Z" (*to));
|
||||
from2 += from;
|
||||
__asm__ ("stxsdx %x0,%y1" : : "wa" (from2.v), "Z" (*to));
|
||||
#else
|
||||
std::complex<Scalar> mem[accColsC];
|
||||
pstoreu<std::complex<Scalar> >(mem, from);
|
||||
*to += *mem;
|
||||
#endif
|
||||
}
|
||||
|
||||
template<typename Scalar, typename Packet, typename Packetc, typename DataMapper, typename Index, const Index accRows, const Index accCols, bool ConjugateLhs, bool ConjugateRhs, bool LhsIsReal, bool RhsIsReal>
|
||||
EIGEN_STRONG_INLINE void gemm_complex_extra_col(
|
||||
const DataMapper& res,
|
||||
@@ -1886,12 +1877,12 @@ EIGEN_STRONG_INLINE void gemm_complex_extra_col(
|
||||
|
||||
if ((sizeof(Scalar) == sizeof(float)) && (remaining_rows == 1))
|
||||
{
|
||||
pstore_add_half<Scalar, Packetc, Index, accCols>(&res(row + 0, col + 0), acc0.packet[0]);
|
||||
res(row + 0, col + 0) += pfirst<Packetc>(acc0.packet[0]);
|
||||
} else {
|
||||
acc0.packet[0] += res.template loadPacket<Packetc>(row + 0, col + 0);
|
||||
res.template storePacketBlock<Packetc,1>(row + 0, col + 0, acc0);
|
||||
if(remaining_rows > accColsC) {
|
||||
pstore_add_half<Scalar, Packetc, Index, accCols>(&res(row + accColsC, col + 0), acc1.packet[0]);
|
||||
res(row + accColsC, col + 0) += pfirst<Packetc>(acc1.packet[0]);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1997,7 +1988,7 @@ asm("#gemm_complex begin");
|
||||
if ((sizeof(Scalar) == sizeof(float)) && (remaining_rows == 1))
|
||||
{
|
||||
for(Index j = 0; j < 4; j++) {
|
||||
pstore_add_half<Scalar, Packetc, Index, accCols>(&res(row + 0, col + j), acc0.packet[j]);
|
||||
res(row + 0, col + j) += pfirst<Packetc>(acc0.packet[j]);
|
||||
}
|
||||
} else {
|
||||
for(Index j = 0; j < 4; j++) {
|
||||
@@ -2005,7 +1996,7 @@ asm("#gemm_complex begin");
|
||||
acc2.packet[0] = res.template loadPacket<Packetc>(row + 0, col + j) + acc0.packet[j];
|
||||
res.template storePacketBlock<Packetc,1>(row + 0, col + j, acc2);
|
||||
if(remaining_rows > accColsC) {
|
||||
pstore_add_half<Scalar, Packetc, Index, accCols>(&res(row + accColsC, col + j), acc1.packet[j]);
|
||||
res(row + accColsC, col + j) += pfirst<Packetc>(acc1.packet[j]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -214,7 +214,7 @@ EIGEN_STRONG_INLINE void bcouple_common<Packet2d, Packet1cd>(PacketBlock<Packet2
|
||||
template<typename Scalar, typename Packet>
|
||||
EIGEN_STRONG_INLINE Packet ploadRhs(const Scalar* rhs)
|
||||
{
|
||||
return *((Packet *)rhs);
|
||||
return *reinterpret_cast<Packet *>(const_cast<Scalar *>(rhs));
|
||||
}
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,555 +0,0 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2021 Everton Constantino (everton.constantino@hotmail.com)
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_MATRIX_PRODUCT_NEON_H
|
||||
#define EIGEN_MATRIX_PRODUCT_NEON_H
|
||||
|
||||
#ifdef __DEBUG__
|
||||
#include <iostream>
|
||||
#endif
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
#ifndef __UNROLL__
|
||||
#define __UNROLL__ 8
|
||||
#endif
|
||||
|
||||
template<int Architecture, int CPU, typename LhsScalar, typename RhsScalar>
|
||||
constexpr int SHAPES_COUNT = 16;
|
||||
|
||||
constexpr int SHAPES_DIMENSION = 6;
|
||||
constexpr int SHAPES_LHS_DIMENSION = 0;
|
||||
constexpr int SHAPES_DEP_DIMENSION = 1;
|
||||
constexpr int SHAPES_RHS_DIMENSION = 2;
|
||||
constexpr int SHAPES_RHS_POINTER = 3;
|
||||
constexpr int SHAPES_LHS_POINTER = 4;
|
||||
constexpr int SHAPES_DEP_POINTER = 5;
|
||||
constexpr int SHAPES_POINTER_END = -1;
|
||||
|
||||
template<int Architecture, int CPU, typename Scalar, bool isLhs>
|
||||
constexpr int PACK_SHAPES_COUNT = 2;
|
||||
|
||||
template<int Architecture, int CPU, typename Scalar>
|
||||
constexpr int PACK_SHAPES_COUNT<Architecture, CPU, Scalar, true> = 4;
|
||||
|
||||
constexpr int PACK_SHAPES_DIMENSION = 3;
|
||||
constexpr int PACK_SHAPES_POINTER = 2;
|
||||
constexpr int PACK_SHAPES_END = -1;
|
||||
|
||||
template<typename Scalar>
|
||||
struct PacketMultiples
|
||||
{
|
||||
enum
|
||||
{
|
||||
half = unpacket_traits<typename packet_traits<Scalar>::half>::size,
|
||||
quarter = unpacket_traits<typename packet_traits<Scalar>::half>::size // Is this used?
|
||||
};
|
||||
};
|
||||
|
||||
// lhs_progress x depth_progress x rhs_progress (depth_progress > 1 matrix ops) x pointer to next rhs_progress on the shapes map
|
||||
template<int Architecture, int CPU, typename LhsScalar, typename RhsScalar>
|
||||
constexpr int SHAPES[SHAPES_COUNT<Architecture, CPU, LhsScalar,RhsScalar>][SHAPES_DIMENSION] =
|
||||
{ /* 00 */{ 1, 1,1,SHAPES_POINTER_END, SHAPES_POINTER_END, SHAPES_POINTER_END},
|
||||
/* 01 */{PacketMultiples<RhsScalar>::half, 1,1, 0, 0, SHAPES_POINTER_END},
|
||||
/* 02 */{1*packet_traits<RhsScalar>::size, 1,1, 0, 1, SHAPES_POINTER_END},
|
||||
/* 03 */{1*packet_traits<RhsScalar>::size,__UNROLL__,1, 0, 1, 2},
|
||||
/* 04 */{2*packet_traits<RhsScalar>::size, 1,1, 0, 3, SHAPES_POINTER_END},
|
||||
/* 05 */{2*packet_traits<RhsScalar>::size,__UNROLL__,1, 0, 3, 4},
|
||||
/* 06 */{3*packet_traits<RhsScalar>::size, 1,1, 0, 5, SHAPES_POINTER_END},
|
||||
/* 07 */{3*packet_traits<RhsScalar>::size,__UNROLL__,1, 0, 5, 6},
|
||||
/* 08 */{ 1, 1,4, 7, SHAPES_POINTER_END, SHAPES_POINTER_END},
|
||||
/* 09 */{PacketMultiples<RhsScalar>::half, 1,4, 7, 8, SHAPES_POINTER_END},
|
||||
/* 10 */{1*packet_traits<RhsScalar>::size, 1,4, 7, 9, SHAPES_POINTER_END},
|
||||
/* 11 */{1*packet_traits<RhsScalar>::size,__UNROLL__,4, 7, 9, 10},
|
||||
/* 12 */{2*packet_traits<RhsScalar>::size, 1,4, 7, 11, SHAPES_POINTER_END},
|
||||
/* 13 */{2*packet_traits<RhsScalar>::size,__UNROLL__,4, 7, 11, 12},
|
||||
/* 14 */{3*packet_traits<RhsScalar>::size, 1,4, 7, 13, SHAPES_POINTER_END},
|
||||
/* 15 */{3*packet_traits<RhsScalar>::size,__UNROLL__,4, 7, 13, 14}};
|
||||
|
||||
// d1progress x d2progress
|
||||
template<int Architecture, int CPU, typename Scalar, bool isLhs>
|
||||
constexpr int PACK_SHAPES[PACK_SHAPES_COUNT<Architecture, CPU, Scalar, isLhs>][PACK_SHAPES_DIMENSION] =
|
||||
{{ 1, 1, PACK_SHAPES_END},
|
||||
{ 4, 1, 0}};
|
||||
|
||||
template<int Architecture, int CPU, typename Scalar>
|
||||
constexpr int PACK_SHAPES<Architecture, CPU, Scalar, true>[PACK_SHAPES_COUNT<Architecture, CPU, Scalar, true>][PACK_SHAPES_DIMENSION] =
|
||||
{{ 1, 1, PACK_SHAPES_END},
|
||||
{1*packet_traits<Scalar>::size, 1, 0},
|
||||
{2*packet_traits<Scalar>::size, 1, 1},
|
||||
{3*packet_traits<Scalar>::size, 1, 2}};
|
||||
|
||||
template<int Architecture, int CPU, typename Index, typename Scalar, bool isLhs, typename DataMapper, bool Conjugate, bool PanelMode, int StorageOrder, int M, int N>
|
||||
struct PackingOperator
|
||||
{
|
||||
EIGEN_STRONG_INLINE Scalar* operator()(Index d1Idx, Index d2Idx, Scalar *block, const DataMapper& data)
|
||||
{
|
||||
#ifdef __DEBUG__
|
||||
std::cout << M << "x" << N << " ( " << d1Idx << ", " << d2Idx <<") -> ( " << d1Idx + M << ", " << d2Idx + N << ") ";
|
||||
#endif
|
||||
Scalar *c = block;
|
||||
for(auto i = 0; i < M; i++)
|
||||
for(auto j = 0; j < N; j++)
|
||||
{
|
||||
if(isLhs)
|
||||
*c = data(d1Idx + i, d2Idx + j);
|
||||
else
|
||||
*c = data(d2Idx + j, d1Idx + i);
|
||||
#ifdef __DEBUG__
|
||||
std::cout << *c << " ";
|
||||
#endif
|
||||
c++;
|
||||
}
|
||||
#ifdef __DEBUG__
|
||||
std::cout << std::endl;
|
||||
#endif
|
||||
return c;
|
||||
}
|
||||
};
|
||||
|
||||
template<int Architecture, int CPU, typename Index, typename Scalar, bool isLhs, typename DataMapper, bool Conjugate, bool PanelMode, int StorageOrder, int D1PROGRESS, int IDX>
|
||||
struct PackingInnerStruct
|
||||
{
|
||||
EIGEN_STRONG_INLINE Scalar* operator()(Index d1Idx, Index d2Idx, Scalar *block, const DataMapper& data, Index d1Size, Index d2Size, Index stride, Index offset)
|
||||
{
|
||||
constexpr auto d2Progress = PACK_SHAPES<Architecture, CPU, Scalar, isLhs>[IDX][1];
|
||||
PackingOperator<Architecture, CPU, Index, Scalar, isLhs, DataMapper, Conjugate, PanelMode, StorageOrder, D1PROGRESS, d2Progress> po;
|
||||
|
||||
for(;d2Idx + d2Progress <= d2Size; d2Idx+=d2Progress)
|
||||
{
|
||||
block = po(d1Idx, d2Idx, block, data);
|
||||
}
|
||||
|
||||
if(PACK_SHAPES<Architecture, CPU, Scalar, isLhs>[IDX-1][0] == D1PROGRESS)
|
||||
{
|
||||
PackingInnerStruct<Architecture, CPU, Index, Scalar, isLhs, DataMapper, Conjugate, PanelMode, StorageOrder, D1PROGRESS, IDX-1> pis;
|
||||
block = pis(d1Idx, d2Idx, block, data, d1Size, d2Size, stride, offset);
|
||||
}
|
||||
return block;
|
||||
}
|
||||
};
|
||||
|
||||
template<int Architecture, int CPU, typename Index, typename Scalar, bool isLhs, typename DataMapper, bool Conjugate, bool PanelMode, int StorageOrder, int D1PROGRESS>
|
||||
struct PackingInnerStruct<Architecture, CPU, Index, Scalar, isLhs, DataMapper, Conjugate, PanelMode, StorageOrder, D1PROGRESS, 0>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Scalar* operator()(Index d1Idx, Index d2Idx, Scalar *block, const DataMapper& data, Index d1Size, Index d2Size, Index stride, Index offset)
|
||||
{
|
||||
constexpr auto d2Progress = PACK_SHAPES<Architecture, CPU, Scalar, isLhs>[0][1];
|
||||
for(;d2Idx + d2Progress <= d2Size; d2Idx+=d2Progress)
|
||||
{
|
||||
PackingOperator<Architecture, CPU, Index, Scalar, isLhs, DataMapper, Conjugate, PanelMode, StorageOrder, D1PROGRESS, d2Progress> po;
|
||||
block = po(d1Idx, d2Idx, block, data);
|
||||
}
|
||||
return block;
|
||||
}
|
||||
};
|
||||
|
||||
template<int Architecture, int CPU, typename Index, typename Scalar, bool isLhs, typename DataMapper, bool Conjugate, bool PanelMode, int StorageOrder, int PACK_SHAPE_IDX>
|
||||
struct PackingStruct
|
||||
{
|
||||
PackingStruct<Architecture, CPU, Index, Scalar, isLhs, DataMapper, Conjugate, PanelMode, StorageOrder, PACK_SHAPES<Architecture, CPU, Scalar, isLhs>[PACK_SHAPE_IDX][PACK_SHAPES_POINTER]> ps;
|
||||
|
||||
EIGEN_STRONG_INLINE Scalar* operator()(Index d1Idx, Scalar *block, const DataMapper& data, Index d1Size, Index d2Size, Index stride, Index offset)
|
||||
{
|
||||
constexpr auto d1Progress = PACK_SHAPES<Architecture, CPU, Scalar, isLhs>[PACK_SHAPE_IDX][0];
|
||||
|
||||
for(; d1Idx + d1Progress <= d1Size; d1Idx += d1Progress)
|
||||
{
|
||||
PackingInnerStruct<Architecture, CPU, Index, Scalar, isLhs, DataMapper, Conjugate, PanelMode, StorageOrder, d1Progress, PACK_SHAPE_IDX> pis;
|
||||
block = pis(d1Idx, 0, block, data, d1Size, d2Size, stride, offset);
|
||||
}
|
||||
return ps(d1Idx, block, data, d1Size, d2Size, stride, offset);
|
||||
}
|
||||
};
|
||||
|
||||
template<int Architecture, int CPU, typename Index, typename Scalar, bool isLhs, typename DataMapper, bool Conjugate, bool PanelMode, int StorageOrder>
|
||||
struct PackingStruct<Architecture, CPU, Index, Scalar, isLhs, DataMapper, Conjugate, PanelMode, StorageOrder, -1>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Scalar* operator()(Index, Scalar *block, const DataMapper&, Index, Index, Index, Index) { return block; }
|
||||
};
|
||||
|
||||
template<int Architecture, int CPU, typename Index, typename Scalar, typename DataMapper, bool Conjugate, bool PanelMode, int StorageOrder>
|
||||
struct lhs_pack
|
||||
{
|
||||
EIGEN_STRONG_INLINE void operator()(Scalar *blockA, const DataMapper &lhs, Index depth, Index rows, Index stride, Index offset)
|
||||
{
|
||||
PackingStruct<Architecture, CPU, Index, Scalar, true, DataMapper, Conjugate, PanelMode, StorageOrder, PACK_SHAPES_COUNT<Architecture, CPU, Scalar, true>-1> ps;
|
||||
ps(0, blockA, lhs, rows, depth, stride, offset);
|
||||
}
|
||||
};
|
||||
|
||||
template<int Architecture, int CPU, typename Index, typename Scalar, typename DataMapper, bool Conjugate, bool PanelMode, int StorageOrder>
|
||||
struct rhs_pack
|
||||
{
|
||||
EIGEN_STRONG_INLINE void operator()(Scalar *blockB, const DataMapper &rhs, Index depth, Index cols, Index stride, Index offset)
|
||||
{
|
||||
PackingStruct<Architecture, CPU, Index, Scalar, false, DataMapper, Conjugate, PanelMode, StorageOrder, PACK_SHAPES_COUNT<Architecture, CPU, Scalar, false>-1> ps;
|
||||
ps(0, blockB, rhs, cols, depth, stride, offset);
|
||||
}
|
||||
};
|
||||
|
||||
template<int Architecture, int CPU, typename Index, typename Scalar, typename DataMapper, bool isLhs, int IDX>
|
||||
struct PackMapCalculator
|
||||
{
|
||||
PackMapCalculator<Architecture, CPU, Index, Scalar, DataMapper, isLhs, PACK_SHAPES<Architecture, CPU, Scalar, isLhs>[IDX][PACK_SHAPES_POINTER]> pmc;
|
||||
EIGEN_STRONG_INLINE Index getPosition(Index pos, Index d2Size)
|
||||
{
|
||||
constexpr auto d1Progress = PACK_SHAPES<Architecture, CPU, Scalar, isLhs>[IDX][0];
|
||||
Index v = (pos / d1Progress) * d1Progress;
|
||||
return v*d2Size + pmc.getPosition(pos - v, d2Size);
|
||||
}
|
||||
};
|
||||
|
||||
template<int Architecture, int CPU, typename Index, typename Scalar, typename DataMapper, bool isLhs>
|
||||
struct PackMapCalculator<Architecture, CPU, Index, Scalar, DataMapper, isLhs, -1>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Index getPosition(Index, Index) { return Index(0); }
|
||||
};
|
||||
|
||||
template<int Architecture, int CPU, typename Index, typename Scalar, typename DataMapper, bool isLhs>
|
||||
struct PackMap
|
||||
{
|
||||
const Scalar *pBase;
|
||||
const Scalar *pCur;
|
||||
Index stride;
|
||||
Index offset;
|
||||
Index d2Size;
|
||||
PackMapCalculator<Architecture, CPU, Index, Scalar, DataMapper, isLhs, PACK_SHAPES_COUNT<Architecture, CPU, Scalar, isLhs>-1> pmc;
|
||||
|
||||
PackMap(const Scalar *base, Index d2Size, Index stride, Index offset) : pBase(base), pCur(base), d2Size(d2Size), stride(stride), offset(offset) {}
|
||||
|
||||
EIGEN_STRONG_INLINE void resetCur() { pCur = pBase; }
|
||||
EIGEN_STRONG_INLINE void updateBase() { pBase = pCur; }
|
||||
EIGEN_STRONG_INLINE void moveTo(Index p1) { pCur = pBase + pmc.getPosition(p1, d2Size); }
|
||||
EIGEN_STRONG_INLINE void advance(Index progress) { pCur += progress; }
|
||||
|
||||
template<int D1Progress=-1, int D2Progress=-1>
|
||||
EIGEN_STRONG_INLINE void prefetch(Index amnt)
|
||||
{
|
||||
#ifdef __ENABLE_PREFETCH__
|
||||
internal::prefetch(pCur + amnt);
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
||||
template<int Architecture, int CPU, typename Scalar, typename ResScalar, typename DataMapper, int M, int N>
|
||||
struct Accumulator
|
||||
{
|
||||
Scalar dt[M][N];
|
||||
|
||||
EIGEN_STRONG_INLINE void zero()
|
||||
{
|
||||
for(auto i = 0; i < M; i++)
|
||||
{
|
||||
for(auto j = 0; j < N; j++)
|
||||
{
|
||||
dt[i][j] = Scalar(0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<int LhsProgress=-1, int DepthProgress=-1, int RhsProgress=-1>
|
||||
EIGEN_STRONG_INLINE void prefetch(const DataMapper&, Index, Index) {}
|
||||
|
||||
template<typename ResPacket>
|
||||
EIGEN_STRONG_INLINE void scale(ResScalar alpha, const ResPacket& pAlpha)
|
||||
{
|
||||
for(auto i = 0; i < M; i++)
|
||||
{
|
||||
for(auto j = 0; j < N; j++)
|
||||
{
|
||||
dt[i][j] *= alpha;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<typename ResPacket>
|
||||
EIGEN_STRONG_INLINE void store(const DataMapper& dest, Index row, Index col, ResScalar alpha, const ResPacket& pAlpha)
|
||||
{
|
||||
for(auto i = 0; i < M; i++)
|
||||
{
|
||||
for(auto j = 0; j < N; j++)
|
||||
{
|
||||
dest(row + i, col + j) += alpha*dt[i][j];
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template<int Architecture, int CPU, typename Index, typename LhsScalar, typename LhsPackMap, typename RhsScalar, typename RhsPackMap, typename AccScalar, typename ResScalar, typename Accumulator, int M, int K, int N>
|
||||
struct MicroKernel
|
||||
{
|
||||
EIGEN_STRONG_INLINE void operator()(LhsPackMap& lhsPackMap,
|
||||
RhsPackMap& rhsPackMap,
|
||||
Index rowIdx, Index colIdx, Index depthIdx,
|
||||
Accumulator& acc)
|
||||
{
|
||||
const RhsScalar *pRhs = rhsPackMap.pCur;
|
||||
for(auto i = 0; i < N; i++)
|
||||
{
|
||||
const LhsScalar *pLhs = lhsPackMap.pCur;
|
||||
for(auto j = 0; j < M; j++)
|
||||
{
|
||||
acc.dt[j][i] += pRhs[i]*pLhs[j];
|
||||
}
|
||||
}
|
||||
lhsPackMap.advance(M*K);
|
||||
rhsPackMap.advance(K*N);
|
||||
};
|
||||
};
|
||||
|
||||
template<int Architecture, int CPU, typename Index, typename LhsScalar, typename LhsPackMap, typename RhsScalar, typename RhsPackMap, typename AccScalar, typename ResScalar, typename ResPacket, typename DataMapper, typename AccumulatorType, int RHS_SHAPE_IDX, int LHS_SHAPE_IDX, int IDX>
|
||||
struct DepthLoopStruct
|
||||
{
|
||||
static constexpr auto PREVIOUS = SHAPES<Architecture, CPU, LhsScalar, RhsScalar>[IDX][SHAPES_DEP_POINTER];
|
||||
|
||||
DepthLoopStruct<Architecture, CPU, Index, LhsScalar, LhsPackMap, RhsScalar, RhsPackMap, AccScalar, ResScalar, ResPacket, DataMapper, AccumulatorType, RHS_SHAPE_IDX, LHS_SHAPE_IDX, PREVIOUS> depthLS;
|
||||
|
||||
EIGEN_STRONG_INLINE void operator()(Index rowIdx, Index colIdx, Index depthIdx, const DataMapper& res, AccumulatorType& acc,
|
||||
Index rows, Index depth, Index cols, ResScalar alpha, const ResPacket& pAlpha, LhsPackMap& lhsPackMap, RhsPackMap& rhsPackMap)
|
||||
{
|
||||
constexpr auto rhsProgress = SHAPES<Architecture, CPU, LhsScalar, RhsScalar>[RHS_SHAPE_IDX][SHAPES_RHS_DIMENSION];
|
||||
constexpr auto lhsProgress = SHAPES<Architecture, CPU, LhsScalar, RhsScalar>[LHS_SHAPE_IDX][SHAPES_LHS_DIMENSION];
|
||||
constexpr auto depthProgress = SHAPES<Architecture, CPU, LhsScalar, RhsScalar>[IDX][SHAPES_DEP_DIMENSION];
|
||||
|
||||
//typedef Accumulator<Architecture, CPU, AccScalar, ResScalar, DataMapper, lhsProgress, rhsProgress> AccumulatorType;
|
||||
|
||||
MicroKernel<Architecture, CPU, Index, LhsScalar, LhsPackMap, RhsScalar, RhsPackMap, AccScalar, ResScalar, AccumulatorType, lhsProgress, depthProgress, rhsProgress> mkt;
|
||||
//AccumulatorType acc;
|
||||
|
||||
//acc.zero();
|
||||
|
||||
acc.template prefetch<lhsProgress, depthProgress, rhsProgress>(res, rowIdx, colIdx);
|
||||
|
||||
lhsPackMap.template prefetch<lhsProgress, depthProgress>(0);
|
||||
rhsPackMap.template prefetch<rhsProgress, depthProgress>(0);
|
||||
|
||||
for(; depthIdx + depthProgress <= depth; depthIdx+=depthProgress)
|
||||
{
|
||||
#ifdef __DEBUG__
|
||||
auto M = lhsProgress;
|
||||
auto K = depthProgress;
|
||||
auto N = rhsProgress;
|
||||
std::cout << "Kernel " << M << " x " << K << " x " << N << " @ " << rowIdx << ", " << depthIdx << ", " << colIdx << std::endl;
|
||||
std::cout << "LHS ";
|
||||
for(auto i = 0; i < M; i++)
|
||||
{
|
||||
for(auto j = 0; j < K; j++)
|
||||
{
|
||||
std::cout << lhsPackMap.pCur[i*K + j] << " ";
|
||||
}
|
||||
}
|
||||
std::cout << std::endl << "RHS ";
|
||||
for(auto i = 0; i < K; i++)
|
||||
{
|
||||
for(auto j = 0; j < N; j++)
|
||||
{
|
||||
std::cout << rhsPackMap.pCur[i*N + j] << " ";
|
||||
}
|
||||
}
|
||||
std::cout << std::endl;
|
||||
#endif
|
||||
mkt(lhsPackMap, rhsPackMap, rowIdx, colIdx, depthIdx, acc);
|
||||
}
|
||||
//acc.store(res, rowIdx, colIdx, alpha, pAlpha);
|
||||
|
||||
depthLS(rowIdx, colIdx, depthIdx, res, acc, rows, depth, cols, alpha, pAlpha, lhsPackMap, rhsPackMap);
|
||||
}
|
||||
};
|
||||
|
||||
template<int Architecture, int CPU, typename Index, typename LhsScalar, typename LhsPackMap, typename RhsScalar, typename RhsPackMap, typename AccScalar, typename ResScalar, typename ResPacket, typename DataMapper, typename AccumulatorType, int RHS_SHAPE_IDX, int LHS_SHAPE_IDX>
|
||||
struct DepthLoopStruct<Architecture, CPU, Index, LhsScalar, LhsPackMap, RhsScalar, RhsPackMap, AccScalar, ResScalar, ResPacket, DataMapper, AccumulatorType, RHS_SHAPE_IDX, LHS_SHAPE_IDX, -1>
|
||||
{
|
||||
EIGEN_STRONG_INLINE void operator()(Index, Index, Index, const DataMapper&, AccumulatorType&,
|
||||
Index, Index, Index, ResScalar, const ResPacket&, LhsPackMap&, RhsPackMap&) {}
|
||||
};
|
||||
|
||||
template<int Architecture, int CPU, typename Index, typename LhsScalar, typename LhsPackMap, typename RhsScalar, typename RhsPackMap, typename AccScalar, typename ResScalar, typename ResPacket, typename DataMapper, int RHS_SHAPE_IDX, int IDX>
|
||||
struct LhsLoopStruct
|
||||
{
|
||||
static constexpr auto PREVIOUS = SHAPES<Architecture, CPU, LhsScalar, RhsScalar>[IDX][SHAPES_LHS_POINTER];
|
||||
LhsLoopStruct<Architecture, CPU, Index, LhsScalar, LhsPackMap, RhsScalar, RhsPackMap, AccScalar, ResScalar, ResPacket, DataMapper, RHS_SHAPE_IDX, PREVIOUS> lhsLS;
|
||||
|
||||
EIGEN_STRONG_INLINE void operator()(Index rowIdx, int colIdx, const DataMapper& res,
|
||||
Index rows, Index depth, Index cols, ResScalar alpha, const ResPacket& pAlpha, LhsPackMap& lhsPackMap, RhsPackMap& rhsPackMap)
|
||||
{
|
||||
constexpr auto lhsProgress = SHAPES<Architecture, CPU, LhsScalar, RhsScalar>[IDX][SHAPES_LHS_DIMENSION];
|
||||
constexpr auto rhsProgress = SHAPES<Architecture, CPU, LhsScalar, RhsScalar>[IDX][SHAPES_RHS_DIMENSION];
|
||||
|
||||
typedef Accumulator<Architecture, CPU, AccScalar, ResScalar, DataMapper, lhsProgress, rhsProgress> AccumulatorType;
|
||||
|
||||
DepthLoopStruct<Architecture, CPU, Index, LhsScalar, LhsPackMap, RhsScalar, RhsPackMap, AccScalar, ResScalar, ResPacket, DataMapper, AccumulatorType, RHS_SHAPE_IDX, IDX, IDX> depthLS;
|
||||
|
||||
//rhsPackMap.resetCur();
|
||||
for(;rowIdx + lhsProgress <= rows; rowIdx+=lhsProgress)
|
||||
{
|
||||
rhsPackMap.resetCur();
|
||||
AccumulatorType acc;
|
||||
acc.zero();
|
||||
//lhsPackMap.moveTo(rowIdx);
|
||||
//rhsPackMap.moveTo(colIdx);
|
||||
|
||||
depthLS(rowIdx, colIdx, 0, res, acc, rows, depth, cols, alpha, pAlpha, lhsPackMap, rhsPackMap);
|
||||
acc.store(res, rowIdx, colIdx, alpha, pAlpha);
|
||||
}
|
||||
lhsLS(rowIdx, colIdx, res, rows, depth, cols, alpha, pAlpha, lhsPackMap, rhsPackMap);
|
||||
}
|
||||
};
|
||||
|
||||
template<int Architecture, int CPU, typename Index, typename LhsScalar, typename LhsPackMap, typename RhsScalar, typename RhsPackMap, typename AccScalar, typename ResScalar, typename ResPacket, typename DataMapper, int RHS_SHAPE_IDX>
|
||||
struct LhsLoopStruct<Architecture, CPU, Index, LhsScalar, LhsPackMap, RhsScalar, RhsPackMap, AccScalar, ResScalar, ResPacket, DataMapper, RHS_SHAPE_IDX, -1>
|
||||
{
|
||||
EIGEN_STRONG_INLINE void operator()(Index, Index, const DataMapper&,
|
||||
Index, Index, Index, ResScalar, const ResPacket&, LhsPackMap&, RhsPackMap&) {}
|
||||
};
|
||||
|
||||
template<int Architecture, int CPU, typename Index, typename LhsScalar, typename LhsPackMap, typename RhsScalar, typename RhsPackMap, typename AccScalar, typename ResScalar, typename ResPacket, typename DataMapper, int IDX>
|
||||
struct RhsLoopStruct
|
||||
{
|
||||
static constexpr auto PREVIOUS = SHAPES<Architecture, CPU, LhsScalar, RhsScalar>[IDX][SHAPES_RHS_POINTER];
|
||||
RhsLoopStruct<Architecture, CPU, Index, LhsScalar, LhsPackMap, RhsScalar, RhsPackMap, AccScalar, ResScalar, ResPacket, DataMapper, PREVIOUS> rhsLS;
|
||||
|
||||
EIGEN_STRONG_INLINE void operator()(Index colIdx, const DataMapper& res,
|
||||
Index rows, Index depth, Index cols, ResScalar alpha, const ResPacket& pAlpha, LhsPackMap& lhsPackMap, RhsPackMap& rhsPackMap)
|
||||
{
|
||||
constexpr auto rhsProgress = SHAPES<Architecture, CPU, LhsScalar, RhsScalar>[IDX][SHAPES_RHS_DIMENSION];
|
||||
|
||||
for(;colIdx + rhsProgress <= cols; colIdx+=rhsProgress)
|
||||
{
|
||||
LhsLoopStruct<Architecture, CPU, Index, LhsScalar, LhsPackMap, RhsScalar, RhsPackMap, AccScalar, ResScalar, ResPacket, DataMapper, IDX, IDX> lhsLS;
|
||||
lhsPackMap.resetCur();
|
||||
lhsLS(0, colIdx, res, rows, depth, cols, alpha, pAlpha, lhsPackMap, rhsPackMap);
|
||||
rhsPackMap.updateBase();
|
||||
}
|
||||
rhsLS(colIdx, res, rows, depth, cols, alpha, pAlpha, lhsPackMap, rhsPackMap);
|
||||
}
|
||||
};
|
||||
|
||||
template<int Architecture, int CPU, typename Index, typename LhsScalar, typename LhsPackMap, typename RhsScalar, typename RhsPackMap, typename AccScalar, typename ResScalar, typename ResPacket, typename DataMapper>
|
||||
struct RhsLoopStruct<Architecture, CPU, Index, LhsScalar, LhsPackMap, RhsScalar, RhsPackMap, AccScalar, ResScalar, ResPacket, DataMapper, -1>
|
||||
{
|
||||
EIGEN_STRONG_INLINE void operator()(Index colIdx, const DataMapper&,
|
||||
Index, Index, Index, ResScalar, const ResPacket&, LhsPackMap&, RhsPackMap&) {}
|
||||
};
|
||||
|
||||
template<int Architecture, int CPU, typename ResScalar, typename AccScalar, typename LhsScalar, typename RhsScalar, typename Index, typename DataMapper>
|
||||
EIGEN_STRONG_INLINE void gemm(const DataMapper& res, const LhsScalar* blockA, const RhsScalar* blockB,
|
||||
Index rows, Index depth, Index cols, ResScalar alpha, Index strideA, Index strideB, Index offsetA, Index offsetB)
|
||||
{
|
||||
using ResPacket = typename unpacket_traits<ResScalar>::type;
|
||||
typedef PackMap<Architecture, CPU, Index, LhsScalar, DataMapper, true> LhsPackMap;
|
||||
typedef PackMap<Architecture, CPU, Index, RhsScalar, DataMapper, false> RhsPackMap;
|
||||
|
||||
#ifdef __DEBUG__
|
||||
std::cout << "blockA" << std::endl;
|
||||
for(auto i = 0; i < rows*depth; i++)
|
||||
{
|
||||
if(i % 4 == 0 && i > 0)
|
||||
std::cout << std::endl;
|
||||
std::cout << blockA[i] << " ";
|
||||
}
|
||||
std::cout << std::endl;
|
||||
std::cout << "blockB" << std::endl;
|
||||
for(auto i = 0; i < depth*cols; i++)
|
||||
{
|
||||
if(i % 4 == 0 && i > 0)
|
||||
std::cout << std::endl;
|
||||
std::cout << blockB[i] << " ";
|
||||
}
|
||||
std::cout << std::endl;
|
||||
#endif
|
||||
asm __volatile__("#BEGING_GEBP\n\t");
|
||||
RhsLoopStruct<Architecture, CPU, Index, LhsScalar, LhsPackMap, RhsScalar, RhsPackMap, AccScalar, ResScalar, ResPacket, DataMapper, SHAPES_COUNT<0, 0, LhsScalar, RhsScalar>-1> rhsLS;
|
||||
LhsPackMap lhsPackMap(blockA, depth, strideA, offsetA);
|
||||
RhsPackMap rhsPackMap(blockB, depth, strideB, offsetB);
|
||||
|
||||
ResPacket pAlpha = pset1<ResPacket>(alpha);
|
||||
|
||||
rhsLS(0, res, rows, depth, cols, alpha, pAlpha, lhsPackMap, rhsPackMap);
|
||||
asm __volatile__("#END_GEBP\n\t");
|
||||
}
|
||||
/*
|
||||
template<typename Index, typename DataMapper, int nr, bool Conjugate, bool PanelMode>
|
||||
struct gemm_pack_rhs<float, Index, DataMapper, nr, ColMajor, Conjugate, PanelMode>
|
||||
{
|
||||
void operator()(float* blockB, const DataMapper& rhs, Index depth, Index cols, Index stride=0, Index offset=0);
|
||||
};
|
||||
|
||||
template<typename Index, typename DataMapper, int nr, bool Conjugate, bool PanelMode>
|
||||
void gemm_pack_rhs<float, Index, DataMapper, nr, ColMajor, Conjugate, PanelMode>
|
||||
::operator()(float* blockB, const DataMapper& rhs, Index depth, Index cols, Index stride, Index offset)
|
||||
{
|
||||
rhs_pack<0, 0, Index, float, DataMapper, Conjugate, PanelMode, ColMajor> pack;
|
||||
pack(blockB, rhs, depth, cols, stride, offset);
|
||||
}
|
||||
|
||||
template<typename Index, typename DataMapper, int nr, bool Conjugate, bool PanelMode>
|
||||
struct gemm_pack_rhs<float, Index, DataMapper, nr, RowMajor, Conjugate, PanelMode>
|
||||
{
|
||||
void operator()(float* blockB, const DataMapper& rhs, Index depth, Index cols, Index stride=0, Index offset=0);
|
||||
};
|
||||
|
||||
template<typename Index, typename DataMapper, int nr, bool Conjugate, bool PanelMode>
|
||||
void gemm_pack_rhs<float, Index, DataMapper, nr, RowMajor, Conjugate, PanelMode>
|
||||
::operator()(float* blockB, const DataMapper& rhs, Index depth, Index cols, Index stride, Index offset)
|
||||
{
|
||||
rhs_pack<0, 0, Index, float, DataMapper, Conjugate, PanelMode, RowMajor> pack;
|
||||
pack(blockB, rhs, depth, cols, stride, offset);
|
||||
}
|
||||
|
||||
template<typename Index, typename DataMapper, int Pack1, int Pack2, typename Packet, bool Conjugate, bool PanelMode>
|
||||
struct gemm_pack_lhs<float, Index, DataMapper, Pack1, Pack2, Packet, RowMajor, Conjugate, PanelMode>
|
||||
{
|
||||
void operator()(float* blockA, const DataMapper& lhs, Index depth, Index rows, Index stride=0, Index offset=0);
|
||||
};
|
||||
|
||||
template<typename Index, typename DataMapper, int Pack1, int Pack2, typename Packet, bool Conjugate, bool PanelMode>
|
||||
void gemm_pack_lhs<float, Index, DataMapper, Pack1, Pack2, Packet, RowMajor, Conjugate, PanelMode>
|
||||
::operator()(float* blockA, const DataMapper& lhs, Index depth, Index rows, Index stride, Index offset)
|
||||
{
|
||||
lhs_pack<0, 0, Index, float, DataMapper, Conjugate, PanelMode, RowMajor> pack;
|
||||
pack(blockA, lhs, depth, rows, stride, offset);
|
||||
}
|
||||
|
||||
template<typename Index, typename DataMapper, int Pack1, int Pack2, typename Packet, bool Conjugate, bool PanelMode>
|
||||
struct gemm_pack_lhs<float, Index, DataMapper, Pack1, Pack2, Packet, ColMajor, Conjugate, PanelMode>
|
||||
{
|
||||
void operator()(float* blockA, const DataMapper& lhs, Index depth, Index rows, Index stride=0, Index offset=0);
|
||||
};
|
||||
|
||||
template<typename Index, typename DataMapper, int Pack1, int Pack2, typename Packet, bool Conjugate, bool PanelMode>
|
||||
void gemm_pack_lhs<float, Index, DataMapper, Pack1, Pack2, Packet, ColMajor, Conjugate, PanelMode>
|
||||
::operator()(float* blockA, const DataMapper& lhs, Index depth, Index rows, Index stride, Index offset)
|
||||
{
|
||||
lhs_pack<0, 0, Index, float, DataMapper, Conjugate, PanelMode, ColMajor> pack;
|
||||
pack(blockA, lhs, depth, rows, stride, offset);
|
||||
}
|
||||
*/
|
||||
template<typename Index, typename DataMapper, int mr, int nr, bool ConjugateLhs, bool ConjugateRhs>
|
||||
struct gebp_kernel<float, float, Index, DataMapper, mr, nr, ConjugateLhs, ConjugateRhs>
|
||||
{
|
||||
void operator()(const DataMapper& res, const float* blockA, const float* blockB,
|
||||
Index rows, Index depth, Index cols, float alpha,
|
||||
Index strideA=-1, Index strideB=-1, Index offsetA=0, Index offsetB=0);
|
||||
};
|
||||
|
||||
template<typename Index, typename DataMapper, int mr, int nr, bool ConjugateLhs, bool ConjugateRhs>
|
||||
void gebp_kernel<float, float, Index, DataMapper, mr, nr, ConjugateLhs, ConjugateRhs>
|
||||
::operator()(const DataMapper& res, const float* blockA, const float* blockB,
|
||||
Index rows, Index depth, Index cols, float alpha,
|
||||
Index strideA, Index strideB, Index offsetA, Index offsetB)
|
||||
{
|
||||
gemm<0, 0, float, float, float, float, Index, DataMapper>(res, blockA, blockB, rows, depth, cols, alpha, strideA, strideB, offsetA, offsetB);
|
||||
}
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
#endif // EIGEN_MATRIX_PRODUCT_NEON_H
|
||||
@@ -1,192 +0,0 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2021 Everton Constantino (everton.constantino@hotmail.com)
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_PACKING_OPS_NEON_H
|
||||
#define EIGEN_PACKING_OPS_NEON_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
#ifdef __ENABLE_CUSTOM_PACKING__
|
||||
|
||||
template<int CPU, typename Scalar, bool isLhs>
|
||||
constexpr int PACK_SHAPES_COUNT<0, CPU, Scalar, isLhs> = 3;
|
||||
|
||||
template<int CPU, typename Scalar>
|
||||
constexpr int PACK_SHAPES_COUNT<0, CPU, Scalar, true> = 4;
|
||||
|
||||
template<int CPU, typename Scalar, bool isLhs>
|
||||
constexpr int PACK_SHAPES<0, CPU, Scalar, isLhs>[PACK_SHAPES_COUNT<0, CPU, Scalar, isLhs>][PACK_SHAPES_DIMENSION] = {{1,1,PACK_SHAPES_END},{4,1,0},{4,4,0}};
|
||||
|
||||
template<int CPU, typename Scalar>
|
||||
constexpr int PACK_SHAPES<0, CPU, Scalar, true>[PACK_SHAPES_COUNT<0, CPU, Scalar, true>][PACK_SHAPES_DIMENSION] = {{1,1,PACK_SHAPES_END},{4,1,0},{4,4,0},{8,1,2}};
|
||||
|
||||
template<int CPU, typename Index, typename Scalar, bool isLhs, typename DataMapper, bool Conjugate, bool PanelMode, int StorageOrder>
|
||||
struct PackingOperator<0, CPU, Index, Scalar, isLhs, DataMapper, Conjugate, PanelMode, StorageOrder, 4, 4>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Scalar* operator()(Index d1Idx, Index d2Idx, Scalar *block, const DataMapper& data)
|
||||
{
|
||||
using Packet = typename packet_traits<Scalar>::type;
|
||||
constexpr int vectorSize = packet_traits<Scalar>::size;
|
||||
|
||||
Scalar *c = block;
|
||||
|
||||
if(!isLhs)
|
||||
{
|
||||
int tD = d1Idx;
|
||||
d1Idx = d2Idx;
|
||||
d2Idx = tD;
|
||||
}
|
||||
|
||||
if(isLhs && StorageOrder == ColMajor || !isLhs && StorageOrder == RowMajor)
|
||||
{
|
||||
Packet p0 = data.template loadPacket<Packet>(d1Idx, d2Idx + 0);
|
||||
Packet p1 = data.template loadPacket<Packet>(d1Idx, d2Idx + 1);
|
||||
Packet p2 = data.template loadPacket<Packet>(d1Idx, d2Idx + 2);
|
||||
Packet p3 = data.template loadPacket<Packet>(d1Idx, d2Idx + 3);
|
||||
|
||||
pstore<Scalar>(c + 0*vectorSize, p0);
|
||||
pstore<Scalar>(c + 1*vectorSize, p1);
|
||||
pstore<Scalar>(c + 2*vectorSize, p2);
|
||||
pstore<Scalar>(c + 3*vectorSize, p3);
|
||||
c+=4*vectorSize;
|
||||
} else {
|
||||
PacketBlock<Packet, 4> pblock;
|
||||
|
||||
pblock.packet[0] = data.template loadPacket<Packet>(d1Idx, d2Idx + 0);
|
||||
pblock.packet[1] = data.template loadPacket<Packet>(d1Idx, d2Idx + 1);
|
||||
pblock.packet[2] = data.template loadPacket<Packet>(d1Idx, d2Idx + 2);
|
||||
pblock.packet[3] = data.template loadPacket<Packet>(d1Idx, d2Idx + 3);
|
||||
|
||||
ptranspose(pblock);
|
||||
|
||||
pstore<Scalar>(c + 0*vectorSize, pblock.packet[0]);
|
||||
pstore<Scalar>(c + 1*vectorSize, pblock.packet[1]);
|
||||
pstore<Scalar>(c + 2*vectorSize, pblock.packet[2]);
|
||||
pstore<Scalar>(c + 3*vectorSize, pblock.packet[3]);
|
||||
c+=4*vectorSize;
|
||||
}
|
||||
return c;
|
||||
}
|
||||
};
|
||||
|
||||
template<int CPU, typename Index, typename Scalar, bool isLhs, typename DataMapper, bool Conjugate, bool PanelMode, int StorageOrder>
|
||||
struct PackingOperator<0, CPU, Index, Scalar, isLhs, DataMapper, Conjugate, PanelMode, StorageOrder, 8, 1>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Scalar* operator()(Index d1Idx, Index d2Idx, Scalar *block, const DataMapper& data)
|
||||
{
|
||||
using Packet = typename packet_traits<Scalar>::type;
|
||||
Scalar *c = block;
|
||||
if(isLhs && StorageOrder == ColMajor)
|
||||
{
|
||||
Packet p = data.template loadPacket<Packet>(d1Idx + 0, d2Idx);
|
||||
pstore<Scalar>(c, p);
|
||||
c+=4;
|
||||
p = data.template loadPacket<Packet>(d1Idx + 4, d2Idx);
|
||||
pstore<Scalar>(c, p);
|
||||
c+=4;
|
||||
} else if(!isLhs && StorageOrder == RowMajor) {
|
||||
Packet p = data.template loadPacket<Packet>(d2Idx, d1Idx + 0);
|
||||
pstore<Scalar>(c, p);
|
||||
c+=4;
|
||||
p = data.template loadPacket<Packet>(d2Idx, d1Idx + 4);
|
||||
pstore<Scalar>(c, p);
|
||||
c+=4;
|
||||
} else {
|
||||
if(isLhs)
|
||||
{
|
||||
*c = data(d1Idx + 0, d2Idx + 0);
|
||||
c++;
|
||||
*c = data(d1Idx + 1, d2Idx + 0);
|
||||
c++;
|
||||
*c = data(d1Idx + 2, d2Idx + 0);
|
||||
c++;
|
||||
*c = data(d1Idx + 3, d2Idx + 0);
|
||||
c++;
|
||||
*c = data(d1Idx + 0, d2Idx + 4);
|
||||
c++;
|
||||
*c = data(d1Idx + 1, d2Idx + 4);
|
||||
c++;
|
||||
*c = data(d1Idx + 2, d2Idx + 4);
|
||||
c++;
|
||||
*c = data(d1Idx + 3, d2Idx + 4);
|
||||
c++;
|
||||
} else {
|
||||
*c = data(d2Idx, d1Idx + 0);
|
||||
c++;
|
||||
*c = data(d2Idx, d1Idx + 1);
|
||||
c++;
|
||||
*c = data(d2Idx, d1Idx + 2);
|
||||
c++;
|
||||
*c = data(d2Idx, d1Idx + 3);
|
||||
c++;
|
||||
*c = data(d2Idx + 4, d1Idx + 0);
|
||||
c++;
|
||||
*c = data(d2Idx + 4, d1Idx + 1);
|
||||
c++;
|
||||
*c = data(d2Idx + 4, d1Idx + 2);
|
||||
c++;
|
||||
*c = data(d2Idx + 4, d1Idx + 3);
|
||||
c++;
|
||||
}
|
||||
}
|
||||
return c;
|
||||
}
|
||||
};
|
||||
|
||||
template<int CPU, typename Index, typename Scalar, bool isLhs, typename DataMapper, bool Conjugate, bool PanelMode, int StorageOrder>
|
||||
struct PackingOperator<0, CPU, Index, Scalar, isLhs, DataMapper, Conjugate, PanelMode, StorageOrder, 4, 1>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Scalar* operator()(Index d1Idx, Index d2Idx, Scalar *block, const DataMapper& data)
|
||||
{
|
||||
using Packet = typename packet_traits<Scalar>::type;
|
||||
Scalar *c = block;
|
||||
if(isLhs && StorageOrder == ColMajor)
|
||||
{
|
||||
Packet p = data.template loadPacket<Packet>(d1Idx, d2Idx);
|
||||
pstore<Scalar>(c, p);
|
||||
c+=4;
|
||||
} else if(!isLhs && StorageOrder == RowMajor) {
|
||||
Packet p = data.template loadPacket<Packet>(d2Idx, d1Idx);
|
||||
pstore<Scalar>(c, p);
|
||||
c+=4;
|
||||
} else {
|
||||
if(isLhs)
|
||||
{
|
||||
*c = data(d1Idx + 0, d2Idx);
|
||||
c++;
|
||||
*c = data(d1Idx + 1, d2Idx);
|
||||
c++;
|
||||
*c = data(d1Idx + 2, d2Idx);
|
||||
c++;
|
||||
*c = data(d1Idx + 3, d2Idx);
|
||||
c++;
|
||||
} else {
|
||||
*c = data(d2Idx, d1Idx + 0);
|
||||
c++;
|
||||
*c = data(d2Idx, d1Idx + 1);
|
||||
c++;
|
||||
*c = data(d2Idx, d1Idx + 2);
|
||||
c++;
|
||||
*c = data(d2Idx, d1Idx + 3);
|
||||
c++;
|
||||
}
|
||||
}
|
||||
return c;
|
||||
}
|
||||
};
|
||||
|
||||
#endif // __ENABLE_CUSTOM_PACKING__
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_PACKING_OPS_NEON_H
|
||||
@@ -113,19 +113,13 @@ template<> EIGEN_STRONG_INLINE Packet2cf ploadu<Packet2cf>(const std::complex<fl
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>& from)
|
||||
{
|
||||
Packet2cf res;
|
||||
#if EIGEN_GNUC_AT_MOST(4,2)
|
||||
// Workaround annoying "may be used uninitialized in this function" warning with gcc 4.2
|
||||
res.v = _mm_loadl_pi(_mm_set1_ps(0.0f), reinterpret_cast<const __m64*>(&from));
|
||||
#elif EIGEN_GNUC_AT_LEAST(4,6)
|
||||
// Suppress annoying "may be used uninitialized in this function" warning with gcc >= 4.6
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wuninitialized"
|
||||
res.v = _mm_loadl_pi(res.v, (const __m64*)&from);
|
||||
#pragma GCC diagnostic pop
|
||||
#ifdef EIGEN_VECTORIZE_SSE3
|
||||
res.v = _mm_castpd_ps(_mm_loaddup_pd(reinterpret_cast<double const*>(&from)));
|
||||
#else
|
||||
res.v = _mm_loadl_pi(res.v, (const __m64*)&from);
|
||||
res.v = _mm_castpd_ps(_mm_load_sd(reinterpret_cast<double const*>(&from)));
|
||||
res.v = _mm_movelh_ps(res.v, res.v);
|
||||
#endif
|
||||
return Packet2cf(_mm_movelh_ps(res.v,res.v));
|
||||
return res;
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf ploaddup<Packet2cf>(const std::complex<float>* from) { return pset1<Packet2cf>(*from); }
|
||||
|
||||
@@ -16,8 +16,8 @@
|
||||
//------------------------------------------------------------------------------------------
|
||||
|
||||
#define EIGEN_WORLD_VERSION 3
|
||||
#define EIGEN_MAJOR_VERSION 4
|
||||
#define EIGEN_MINOR_VERSION 99
|
||||
#define EIGEN_MAJOR_VERSION 3
|
||||
#define EIGEN_MINOR_VERSION 90
|
||||
|
||||
#define EIGEN_VERSION_AT_LEAST(x,y,z) (EIGEN_WORLD_VERSION>x || (EIGEN_WORLD_VERSION>=x && \
|
||||
(EIGEN_MAJOR_VERSION>y || (EIGEN_MAJOR_VERSION>=y && \
|
||||
@@ -684,8 +684,7 @@
|
||||
// Does the compiler support result_of?
|
||||
// result_of was deprecated in c++17 and removed in c++ 20
|
||||
#ifndef EIGEN_HAS_STD_RESULT_OF
|
||||
#if EIGEN_MAX_CPP_VER >= 11 && \
|
||||
(defined(__cplusplus) && __cplusplus >= 201103L && __cplusplus < 201703L)
|
||||
#if EIGEN_HAS_CXX11 && EIGEN_COMP_CXXVER < 17
|
||||
#define EIGEN_HAS_STD_RESULT_OF 1
|
||||
#else
|
||||
#define EIGEN_HAS_STD_RESULT_OF 0
|
||||
@@ -704,8 +703,7 @@
|
||||
#endif // EIGEN_HAS_STD_HASH
|
||||
|
||||
#ifndef EIGEN_HAS_STD_INVOKE_RESULT
|
||||
#if EIGEN_MAX_CPP_VER >= 17 && \
|
||||
(defined(__cplusplus) && __cplusplus >= 201703L)
|
||||
#if EIGEN_MAX_CPP_VER >= 17 && EIGEN_COMP_CXXVER >= 17
|
||||
#define EIGEN_HAS_STD_INVOKE_RESULT 1
|
||||
#else
|
||||
#define EIGEN_HAS_STD_INVOKE_RESULT 0
|
||||
|
||||
@@ -136,15 +136,14 @@ template<typename T, int Value> class variable_if_dynamic
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
|
||||
operator T() const { return T(Value); }
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
void setValue(T) const {}
|
||||
void setValue(T v) const { EIGEN_ONLY_USED_FOR_DEBUG(v); eigen_assert(v == T(Value)); }
|
||||
};
|
||||
|
||||
template<typename T> class variable_if_dynamic<T, Dynamic>
|
||||
{
|
||||
T m_value;
|
||||
EIGEN_DEVICE_FUNC variable_if_dynamic() { eigen_assert(false); }
|
||||
public:
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit variable_if_dynamic(T value) : m_value(value) {}
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit variable_if_dynamic(T value = 0) EIGEN_NO_THROW : m_value(value) {}
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T value() const { return m_value; }
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE operator T() const { return m_value; }
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void setValue(T value) { m_value = value; }
|
||||
|
||||
@@ -498,8 +498,6 @@ template<typename MatrixType, typename DiagType, typename SubDiagType>
|
||||
EIGEN_DEVICE_FUNC
|
||||
ComputationInfo computeFromTridiagonal_impl(DiagType& diag, SubDiagType& subdiag, const Index maxIterations, bool computeEigenvectors, MatrixType& eivec)
|
||||
{
|
||||
EIGEN_USING_STD(abs);
|
||||
|
||||
ComputationInfo info;
|
||||
typedef typename MatrixType::Scalar Scalar;
|
||||
|
||||
@@ -510,15 +508,23 @@ ComputationInfo computeFromTridiagonal_impl(DiagType& diag, SubDiagType& subdiag
|
||||
|
||||
typedef typename DiagType::RealScalar RealScalar;
|
||||
const RealScalar considerAsZero = (std::numeric_limits<RealScalar>::min)();
|
||||
const RealScalar precision = RealScalar(2)*NumTraits<RealScalar>::epsilon();
|
||||
|
||||
const RealScalar precision_inv = RealScalar(1)/NumTraits<RealScalar>::epsilon();
|
||||
while (end>0)
|
||||
{
|
||||
for (Index i = start; i<end; ++i)
|
||||
if (internal::isMuchSmallerThan(abs(subdiag[i]),(abs(diag[i])+abs(diag[i+1])),precision) || abs(subdiag[i]) <= considerAsZero)
|
||||
subdiag[i] = 0;
|
||||
for (Index i = start; i<end; ++i) {
|
||||
if (numext::abs(subdiag[i]) < considerAsZero) {
|
||||
subdiag[i] = RealScalar(0);
|
||||
} else {
|
||||
// abs(subdiag[i]) <= epsilon * sqrt(abs(diag[i]) + abs(diag[i+1]))
|
||||
// Scaled to prevent underflows.
|
||||
const RealScalar scaled_subdiag = precision_inv * subdiag[i];
|
||||
if (scaled_subdiag * scaled_subdiag <= (numext::abs(diag[i])+numext::abs(diag[i+1]))) {
|
||||
subdiag[i] = RealScalar(0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// find the largest unreduced block
|
||||
// find the largest unreduced block at the end of the matrix.
|
||||
while (end>0 && subdiag[end-1]==RealScalar(0))
|
||||
{
|
||||
end--;
|
||||
@@ -821,32 +827,38 @@ SelfAdjointEigenSolver<MatrixType>& SelfAdjointEigenSolver<MatrixType>
|
||||
}
|
||||
|
||||
namespace internal {
|
||||
|
||||
// Francis implicit QR step.
|
||||
template<int StorageOrder,typename RealScalar, typename Scalar, typename Index>
|
||||
EIGEN_DEVICE_FUNC
|
||||
static void tridiagonal_qr_step(RealScalar* diag, RealScalar* subdiag, Index start, Index end, Scalar* matrixQ, Index n)
|
||||
{
|
||||
EIGEN_USING_STD(abs);
|
||||
// Wilkinson Shift.
|
||||
RealScalar td = (diag[end-1] - diag[end])*RealScalar(0.5);
|
||||
RealScalar e = subdiag[end-1];
|
||||
// Note that thanks to scaling, e^2 or td^2 cannot overflow, however they can still
|
||||
// underflow thus leading to inf/NaN values when using the following commented code:
|
||||
// RealScalar e2 = numext::abs2(subdiag[end-1]);
|
||||
// RealScalar mu = diag[end] - e2 / (td + (td>0 ? 1 : -1) * sqrt(td*td + e2));
|
||||
// RealScalar e2 = numext::abs2(subdiag[end-1]);
|
||||
// RealScalar mu = diag[end] - e2 / (td + (td>0 ? 1 : -1) * sqrt(td*td + e2));
|
||||
// This explain the following, somewhat more complicated, version:
|
||||
RealScalar mu = diag[end];
|
||||
if(td==RealScalar(0))
|
||||
mu -= abs(e);
|
||||
else
|
||||
{
|
||||
RealScalar e2 = numext::abs2(subdiag[end-1]);
|
||||
RealScalar h = numext::hypot(td,e);
|
||||
if(e2==RealScalar(0)) mu -= (e / (td + (td>RealScalar(0) ? RealScalar(1) : RealScalar(-1)))) * (e / h);
|
||||
else mu -= e2 / (td + (td>RealScalar(0) ? h : -h));
|
||||
if(td==RealScalar(0)) {
|
||||
mu -= numext::abs(e);
|
||||
} else if (e != RealScalar(0)) {
|
||||
const RealScalar e2 = numext::abs2(e);
|
||||
const RealScalar h = numext::hypot(td,e);
|
||||
if(e2 == RealScalar(0)) {
|
||||
mu -= e / ((td + (td>RealScalar(0) ? h : -h)) / e);
|
||||
} else {
|
||||
mu -= e2 / (td + (td>RealScalar(0) ? h : -h));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
RealScalar x = diag[start] - mu;
|
||||
RealScalar z = subdiag[start];
|
||||
for (Index k = start; k < end; ++k)
|
||||
// If z ever becomes zero, the Givens rotation will be the identity and
|
||||
// z will stay zero for all future iterations.
|
||||
for (Index k = start; k < end && z != RealScalar(0); ++k)
|
||||
{
|
||||
JacobiRotation<RealScalar> rot;
|
||||
rot.makeGivens(x, z);
|
||||
@@ -859,12 +871,11 @@ static void tridiagonal_qr_step(RealScalar* diag, RealScalar* subdiag, Index sta
|
||||
diag[k+1] = rot.s() * sdk + rot.c() * dkp1;
|
||||
subdiag[k] = rot.c() * sdk - rot.s() * dkp1;
|
||||
|
||||
|
||||
if (k > start)
|
||||
subdiag[k - 1] = rot.c() * subdiag[k-1] - rot.s() * z;
|
||||
|
||||
// "Chasing the bulge" to return to triangular form.
|
||||
x = subdiag[k];
|
||||
|
||||
if (k < end - 1)
|
||||
{
|
||||
z = -rot.s() * subdiag[k+1];
|
||||
|
||||
@@ -141,8 +141,8 @@ struct compute_inverse_size4<Architecture::Target, float, MatrixType, ResultType
|
||||
iC = psub(iC, pmul(vec4f_swizzle2(A, A, 1, 0, 3, 2), vec4f_swizzle2(DC, DC, 2, 1, 2, 1)));
|
||||
iC = psub(pmul(B, vec4f_duplane(dC, 0)), iC);
|
||||
|
||||
const int bits[4] = {0, -2147483648, -2147483648, 0};
|
||||
const Packet4f p4f_sign_PNNP = preinterpret<Packet4f, Packet4i>(pgather<int, Packet4i>(bits, static_cast<Eigen::Index>(1)));
|
||||
const float sign_mask[4] = {0.0f, -0.0f, -0.0f, 0.0f};
|
||||
const Packet4f p4f_sign_PNNP = pset<Packet4f>(sign_mask);
|
||||
rd = pxor(rd, p4f_sign_PNNP);
|
||||
iA = pmul(iA, rd);
|
||||
iB = pmul(iB, rd);
|
||||
@@ -323,12 +323,12 @@ struct compute_inverse_size4<Architecture::Target, double, MatrixType, ResultTyp
|
||||
iC1 = psub(pmul(B1, dC), iC1);
|
||||
iC2 = psub(pmul(B2, dC), iC2);
|
||||
|
||||
const int bits1[4] = {0, -2147483648, 0, 0};
|
||||
const int bits2[4] = {0, 0, 0, -2147483648};
|
||||
const Packet2d _Sign_NP = preinterpret<Packet2d, Packet4i>(pgather<int, Packet4i>(bits1, static_cast<Eigen::Index>(1)));
|
||||
const Packet2d _Sign_PN = preinterpret<Packet2d, Packet4i>(pgather<int, Packet4i>(bits2, static_cast<Eigen::Index>(1)));
|
||||
d1 = pxor(rd, _Sign_PN);
|
||||
d2 = pxor(rd, _Sign_NP);
|
||||
const double sign_mask1[2] = {0.0, -0.0};
|
||||
const double sign_mask2[2] = {-0.0, 0.0};
|
||||
const Packet2d sign_PN = pset<Packet2d>(sign_mask1);
|
||||
const Packet2d sign_NP = pset<Packet2d>(sign_mask2);
|
||||
d1 = pxor(rd, sign_PN);
|
||||
d2 = pxor(rd, sign_NP);
|
||||
|
||||
Index res_stride = result.outerStride();
|
||||
double *res = result.data();
|
||||
|
||||
@@ -208,6 +208,7 @@ protected:
|
||||
using Base::m_computeThinV;
|
||||
using Base::m_matrixU;
|
||||
using Base::m_matrixV;
|
||||
using Base::m_info;
|
||||
using Base::m_isInitialized;
|
||||
using Base::m_nonzeroSingularValues;
|
||||
|
||||
@@ -256,16 +257,25 @@ BDCSVD<MatrixType>& BDCSVD<MatrixType>::compute(const MatrixType& matrix, unsign
|
||||
{
|
||||
// FIXME this line involves temporaries
|
||||
JacobiSVD<MatrixType> jsvd(matrix,computationOptions);
|
||||
if(computeU()) m_matrixU = jsvd.matrixU();
|
||||
if(computeV()) m_matrixV = jsvd.matrixV();
|
||||
m_singularValues = jsvd.singularValues();
|
||||
m_nonzeroSingularValues = jsvd.nonzeroSingularValues();
|
||||
m_isInitialized = true;
|
||||
m_info = jsvd.info();
|
||||
if (m_info == Success || m_info == NoConvergence) {
|
||||
if(computeU()) m_matrixU = jsvd.matrixU();
|
||||
if(computeV()) m_matrixV = jsvd.matrixV();
|
||||
m_singularValues = jsvd.singularValues();
|
||||
m_nonzeroSingularValues = jsvd.nonzeroSingularValues();
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
//**** step 0 - Copy the input matrix and apply scaling to reduce over/under-flows
|
||||
RealScalar scale = matrix.cwiseAbs().maxCoeff();
|
||||
RealScalar scale = matrix.cwiseAbs().template maxCoeff<PropagateNaN>();
|
||||
if (!(numext::isfinite)(scale)) {
|
||||
m_isInitialized = true;
|
||||
m_info = InvalidInput;
|
||||
return *this;
|
||||
}
|
||||
|
||||
if(scale==Literal(0)) scale = Literal(1);
|
||||
MatrixX copy;
|
||||
if (m_isTranspose) copy = matrix.adjoint()/scale;
|
||||
@@ -282,7 +292,11 @@ BDCSVD<MatrixType>& BDCSVD<MatrixType>::compute(const MatrixType& matrix, unsign
|
||||
m_computed.topRows(m_diagSize) = bid.bidiagonal().toDenseMatrix().transpose();
|
||||
m_computed.template bottomRows<1>().setZero();
|
||||
divide(0, m_diagSize - 1, 0, 0, 0);
|
||||
|
||||
if (m_info != Success && m_info != NoConvergence) {
|
||||
m_isInitialized = true;
|
||||
return *this;
|
||||
}
|
||||
|
||||
//**** step 3 - Copy singular values and vectors
|
||||
for (int i=0; i<m_diagSize; i++)
|
||||
{
|
||||
@@ -394,7 +408,7 @@ void BDCSVD<MatrixType>::structured_update(Block<MatrixXr,Dynamic,Dynamic> A, co
|
||||
//@param shift : Each time one takes the left submatrix, one must add 1 to the shift. Why? Because! We actually want the last column of the U submatrix
|
||||
// to become the first column (*coeff) and to shift all the other columns to the right. There are more details on the reference paper.
|
||||
template<typename MatrixType>
|
||||
void BDCSVD<MatrixType>::divide (Eigen::Index firstCol, Eigen::Index lastCol, Eigen::Index firstRowW, Eigen::Index firstColW, Eigen::Index shift)
|
||||
void BDCSVD<MatrixType>::divide(Eigen::Index firstCol, Eigen::Index lastCol, Eigen::Index firstRowW, Eigen::Index firstColW, Eigen::Index shift)
|
||||
{
|
||||
// requires rows = cols + 1;
|
||||
using std::pow;
|
||||
@@ -414,6 +428,8 @@ void BDCSVD<MatrixType>::divide (Eigen::Index firstCol, Eigen::Index lastCol, Ei
|
||||
{
|
||||
// FIXME this line involves temporaries
|
||||
JacobiSVD<MatrixXr> b(m_computed.block(firstCol, firstCol, n + 1, n), ComputeFullU | (m_compV ? ComputeFullV : 0));
|
||||
m_info = b.info();
|
||||
if (m_info != Success && m_info != NoConvergence) return;
|
||||
if (m_compU)
|
||||
m_naiveU.block(firstCol, firstCol, n + 1, n + 1).real() = b.matrixU();
|
||||
else
|
||||
@@ -433,7 +449,9 @@ void BDCSVD<MatrixType>::divide (Eigen::Index firstCol, Eigen::Index lastCol, Ei
|
||||
// and the divide of the right submatrice reads one column of the left submatrice. That's why we need to treat the
|
||||
// right submatrix before the left one.
|
||||
divide(k + 1 + firstCol, lastCol, k + 1 + firstRowW, k + 1 + firstColW, shift);
|
||||
if (m_info != Success && m_info != NoConvergence) return;
|
||||
divide(firstCol, k - 1 + firstCol, firstRowW, firstColW + 1, shift + 1);
|
||||
if (m_info != Success && m_info != NoConvergence) return;
|
||||
|
||||
if (m_compU)
|
||||
{
|
||||
|
||||
@@ -585,6 +585,7 @@ template<typename _MatrixType, int QRPreconditioner> class JacobiSVD
|
||||
using Base::m_matrixU;
|
||||
using Base::m_matrixV;
|
||||
using Base::m_singularValues;
|
||||
using Base::m_info;
|
||||
using Base::m_isInitialized;
|
||||
using Base::m_isAllocated;
|
||||
using Base::m_usePrescribedThreshold;
|
||||
@@ -625,6 +626,7 @@ void JacobiSVD<MatrixType, QRPreconditioner>::allocate(Eigen::Index rows, Eigen:
|
||||
|
||||
m_rows = rows;
|
||||
m_cols = cols;
|
||||
m_info = Success;
|
||||
m_isInitialized = false;
|
||||
m_isAllocated = true;
|
||||
m_computationOptions = computationOptions;
|
||||
@@ -674,7 +676,12 @@ JacobiSVD<MatrixType, QRPreconditioner>::compute(const MatrixType& matrix, unsig
|
||||
const RealScalar considerAsZero = (std::numeric_limits<RealScalar>::min)();
|
||||
|
||||
// Scaling factor to reduce over/under-flows
|
||||
RealScalar scale = matrix.cwiseAbs().maxCoeff();
|
||||
RealScalar scale = matrix.cwiseAbs().template maxCoeff<PropagateNaN>();
|
||||
if (!(numext::isfinite)(scale)) {
|
||||
m_isInitialized = true;
|
||||
m_info = InvalidInput;
|
||||
return *this;
|
||||
}
|
||||
if(scale==RealScalar(0)) scale = RealScalar(1);
|
||||
|
||||
/*** step 1. The R-SVD step: we use a QR decomposition to reduce to the case of a square matrix */
|
||||
|
||||
@@ -51,8 +51,11 @@ template<typename Derived> struct traits<SVDBase<Derived> >
|
||||
* smaller value among \a n and \a p, there are only \a m singular vectors; the remaining columns of \a U and \a V do not correspond to actual
|
||||
* singular vectors. Asking for \em thin \a U or \a V means asking for only their \a m first columns to be formed. So \a U is then a n-by-m matrix,
|
||||
* and \a V is then a p-by-m matrix. Notice that thin \a U and \a V are all you need for (least squares) solving.
|
||||
*
|
||||
* The status of the computation can be retrived using the \a info() method. Unless \a info() returns \a Success, the results should be not
|
||||
* considered well defined.
|
||||
*
|
||||
* If the input matrix has inf or nan coefficients, the result of the computation is undefined, but the computation is guaranteed to
|
||||
* If the input matrix has inf or nan coefficients, the result of the computation is undefined, and \a info() will return \a InvalidInput, but the computation is guaranteed to
|
||||
* terminate in finite (and reasonable) time.
|
||||
* \sa class BDCSVD, class JacobiSVD
|
||||
*/
|
||||
@@ -97,7 +100,7 @@ public:
|
||||
*/
|
||||
const MatrixUType& matrixU() const
|
||||
{
|
||||
eigen_assert(m_isInitialized && "SVD is not initialized.");
|
||||
_check_compute_assertions();
|
||||
eigen_assert(computeU() && "This SVD decomposition didn't compute U. Did you ask for it?");
|
||||
return m_matrixU;
|
||||
}
|
||||
@@ -113,7 +116,7 @@ public:
|
||||
*/
|
||||
const MatrixVType& matrixV() const
|
||||
{
|
||||
eigen_assert(m_isInitialized && "SVD is not initialized.");
|
||||
_check_compute_assertions();
|
||||
eigen_assert(computeV() && "This SVD decomposition didn't compute V. Did you ask for it?");
|
||||
return m_matrixV;
|
||||
}
|
||||
@@ -125,14 +128,14 @@ public:
|
||||
*/
|
||||
const SingularValuesType& singularValues() const
|
||||
{
|
||||
eigen_assert(m_isInitialized && "SVD is not initialized.");
|
||||
_check_compute_assertions();
|
||||
return m_singularValues;
|
||||
}
|
||||
|
||||
/** \returns the number of singular values that are not exactly 0 */
|
||||
Index nonzeroSingularValues() const
|
||||
{
|
||||
eigen_assert(m_isInitialized && "SVD is not initialized.");
|
||||
_check_compute_assertions();
|
||||
return m_nonzeroSingularValues;
|
||||
}
|
||||
|
||||
@@ -145,7 +148,7 @@ public:
|
||||
inline Index rank() const
|
||||
{
|
||||
using std::abs;
|
||||
eigen_assert(m_isInitialized && "JacobiSVD is not initialized.");
|
||||
_check_compute_assertions();
|
||||
if(m_singularValues.size()==0) return 0;
|
||||
RealScalar premultiplied_threshold = numext::maxi<RealScalar>(m_singularValues.coeff(0) * threshold(), (std::numeric_limits<RealScalar>::min)());
|
||||
Index i = m_nonzeroSingularValues-1;
|
||||
@@ -224,6 +227,18 @@ public:
|
||||
solve(const MatrixBase<Rhs>& b) const;
|
||||
#endif
|
||||
|
||||
|
||||
/** \brief Reports whether previous computation was successful.
|
||||
*
|
||||
* \returns \c Success if computation was successful.
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
ComputationInfo info() const
|
||||
{
|
||||
eigen_assert(m_isInitialized && "SVD is not initialized.");
|
||||
return m_info;
|
||||
}
|
||||
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
template<typename RhsType, typename DstType>
|
||||
void _solve_impl(const RhsType &rhs, DstType &dst) const;
|
||||
@@ -233,26 +248,31 @@ public:
|
||||
#endif
|
||||
|
||||
protected:
|
||||
|
||||
|
||||
static void check_template_parameters()
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
|
||||
}
|
||||
|
||||
void _check_compute_assertions() const {
|
||||
eigen_assert(m_isInitialized && "SVD is not initialized.");
|
||||
}
|
||||
|
||||
template<bool Transpose_, typename Rhs>
|
||||
void _check_solve_assertion(const Rhs& b) const {
|
||||
EIGEN_ONLY_USED_FOR_DEBUG(b);
|
||||
eigen_assert(m_isInitialized && "SVD is not initialized.");
|
||||
_check_compute_assertions();
|
||||
eigen_assert(computeU() && computeV() && "SVDBase::solve(): Both unitaries U and V are required to be computed (thin unitaries suffice).");
|
||||
eigen_assert((Transpose_?cols():rows())==b.rows() && "SVDBase::solve(): invalid number of rows of the right hand side matrix b");
|
||||
}
|
||||
|
||||
|
||||
// return true if already allocated
|
||||
bool allocate(Index rows, Index cols, unsigned int computationOptions) ;
|
||||
|
||||
MatrixUType m_matrixU;
|
||||
MatrixVType m_matrixV;
|
||||
SingularValuesType m_singularValues;
|
||||
ComputationInfo m_info;
|
||||
bool m_isInitialized, m_isAllocated, m_usePrescribedThreshold;
|
||||
bool m_computeFullU, m_computeThinU;
|
||||
bool m_computeFullV, m_computeThinV;
|
||||
@@ -265,7 +285,8 @@ protected:
|
||||
* Default constructor of SVDBase
|
||||
*/
|
||||
SVDBase()
|
||||
: m_isInitialized(false),
|
||||
: m_info(Success),
|
||||
m_isInitialized(false),
|
||||
m_isAllocated(false),
|
||||
m_usePrescribedThreshold(false),
|
||||
m_computeFullU(false),
|
||||
@@ -327,6 +348,7 @@ bool SVDBase<MatrixType>::allocate(Index rows, Index cols, unsigned int computat
|
||||
|
||||
m_rows = rows;
|
||||
m_cols = cols;
|
||||
m_info = Success;
|
||||
m_isInitialized = false;
|
||||
m_isAllocated = true;
|
||||
m_computationOptions = computationOptions;
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
|
||||
#define NOGMM
|
||||
#define NOMTL
|
||||
#define EIGEN_GOOGLEHASH_SUPPORT 1
|
||||
|
||||
#include <map>
|
||||
#include <ext/hash_map>
|
||||
|
||||
107
ci/smoketests.gitlab-ci.yml
Normal file
107
ci/smoketests.gitlab-ci.yml
Normal file
@@ -0,0 +1,107 @@
|
||||
.buildsmoketests:linux:base:
|
||||
stage: buildsmoketests
|
||||
image: ubuntu:18.04
|
||||
before_script:
|
||||
- apt-get update -y
|
||||
- apt-get install -y --no-install-recommends software-properties-common
|
||||
- add-apt-repository -y ppa:ubuntu-toolchain-r/test
|
||||
- apt-get update
|
||||
- apt-get install --no-install-recommends -y ${EIGEN_CI_CXX_COMPILER}
|
||||
${EIGEN_CI_CC_COMPILER} cmake ninja-build
|
||||
script:
|
||||
- mkdir -p ${BUILDDIR} && cd ${BUILDDIR}
|
||||
- CXX=${EIGEN_CI_CXX_COMPILER} CC=${EIGEN_CI_CC_COMPILER} cmake -G
|
||||
${EIGEN_CI_CMAKE_GENEATOR} -DEIGEN_TEST_CXX11=${EIGEN_TEST_CXX11}
|
||||
${EIGEN_CI_ADDITIONAL_ARGS} ..
|
||||
- cmake --build . --target buildsmoketests
|
||||
artifacts:
|
||||
name: "$CI_JOB_NAME-$CI_COMMIT_REF_NAME"
|
||||
paths:
|
||||
- ${BUILDDIR}/
|
||||
expire_in: 5 days
|
||||
only:
|
||||
- merge_requests
|
||||
|
||||
buildsmoketests:x86-64:linux:gcc-10:cxx11-off:
|
||||
extends: .buildsmoketests:linux:base
|
||||
variables:
|
||||
EIGEN_CI_CXX_COMPILER: "g++-10"
|
||||
EIGEN_CI_CC_COMPILER: "gcc-10"
|
||||
EIGEN_TEST_CXX11: "off"
|
||||
|
||||
buildsmoketests:x86-64:linux:gcc-10:cxx11-on:
|
||||
extends: .buildsmoketests:linux:base
|
||||
variables:
|
||||
EIGEN_CI_CXX_COMPILER: "g++-10"
|
||||
EIGEN_CI_CC_COMPILER: "gcc-10"
|
||||
EIGEN_TEST_CXX11: "on"
|
||||
|
||||
buildsmoketests:x86-64:linux:clang-10:cxx11-off:
|
||||
extends: .buildsmoketests:linux:base
|
||||
variables:
|
||||
EIGEN_CI_CXX_COMPILER: "clang++-10"
|
||||
EIGEN_CI_CC_COMPILER: "clang-10"
|
||||
EIGEN_TEST_CXX11: "off"
|
||||
|
||||
buildsmoketests:x86-64:linux:clang-10:cxx11-on:
|
||||
extends: .buildsmoketests:linux:base
|
||||
variables:
|
||||
EIGEN_CI_CXX_COMPILER: "clang++-10"
|
||||
EIGEN_CI_CC_COMPILER: "clang-10"
|
||||
EIGEN_TEST_CXX11: "on"
|
||||
|
||||
.smoketests:linux:base:
|
||||
stage: smoketests
|
||||
image: ubuntu:18.04
|
||||
before_script:
|
||||
- apt-get update -y
|
||||
- apt-get install -y --no-install-recommends software-properties-common
|
||||
- add-apt-repository -y ppa:ubuntu-toolchain-r/test
|
||||
- apt-get update
|
||||
- apt-get install --no-install-recommends -y ${EIGEN_CI_CXX_COMPILER}
|
||||
${EIGEN_CI_CC_COMPILER} cmake ninja-build xsltproc
|
||||
script:
|
||||
- export CXX=${EIGEN_CI_CXX_COMPILER}
|
||||
- export CC=${EIGEN_CI_CC_COMPILER}
|
||||
- cd ${BUILDDIR} && ctest --output-on-failure --no-compress-output
|
||||
--build-no-clean -T test -L smoketest
|
||||
after_script:
|
||||
- apt-get update -y
|
||||
- apt-get install --no-install-recommends -y xsltproc
|
||||
- cd ${BUILDDIR}
|
||||
- xsltproc ../ci/CTest2JUnit.xsl Testing/`head -n 1 < Testing/TAG`/Test.xml > "JUnitTestResults_$CI_JOB_ID.xml"
|
||||
artifacts:
|
||||
reports:
|
||||
junit:
|
||||
- ${BUILDDIR}/JUnitTestResults_$CI_JOB_ID.xml
|
||||
expire_in: 5 days
|
||||
only:
|
||||
- merge_requests
|
||||
|
||||
smoketests:x86-64:linux:gcc-10:cxx11-off:
|
||||
extends: .smoketests:linux:base
|
||||
variables:
|
||||
EIGEN_CI_CXX_COMPILER: g++-10
|
||||
EIGEN_CI_CC_COMPILER: gcc-10
|
||||
needs: [ "buildsmoketests:x86-64:linux:gcc-10:cxx11-off" ]
|
||||
|
||||
smoketests:x86-64:linux:gcc-10:cxx11-on:
|
||||
extends: .smoketests:linux:base
|
||||
variables:
|
||||
EIGEN_CI_CXX_COMPILER: g++-10
|
||||
EIGEN_CI_CC_COMPILER: gcc-10
|
||||
needs: [ "buildsmoketests:x86-64:linux:gcc-10:cxx11-on" ]
|
||||
|
||||
smoketests:x86-64:linux:clang-10:cxx11-off:
|
||||
extends: .smoketests:linux:base
|
||||
variables:
|
||||
EIGEN_CI_CXX_COMPILER: clang++-10
|
||||
EIGEN_CI_CC_COMPILER: clang-10
|
||||
needs: [ "buildsmoketests:x86-64:linux:clang-10:cxx11-off" ]
|
||||
|
||||
smoketests:x86-64:linux:clang-10:cxx11-on:
|
||||
extends: .smoketests:linux:base
|
||||
variables:
|
||||
EIGEN_CI_CXX_COMPILER: clang++-10
|
||||
EIGEN_CI_CC_COMPILER: clang-10
|
||||
needs: [ "buildsmoketests:x86-64:linux:clang-10:cxx11-on" ]
|
||||
131
cmake/EigenSmokeTestList.cmake
Normal file
131
cmake/EigenSmokeTestList.cmake
Normal file
@@ -0,0 +1,131 @@
|
||||
# List of tests that will be build and run during Eigen's smoke testing. If one
|
||||
# of these tests doesn't exists or cannot be build with the current configuration
|
||||
# it will just be skipped.
|
||||
set(ei_smoke_test_list
|
||||
adjoint_1
|
||||
alignedvector3
|
||||
array_cwise_7
|
||||
array_cwise_8
|
||||
array_for_matrix_1
|
||||
array_of_string
|
||||
array_replicate_1
|
||||
array_reverse_1
|
||||
autodiff_1
|
||||
autodiff_scalar_1
|
||||
bandmatrix
|
||||
bdcsvd_9
|
||||
bessel_functions_1
|
||||
bfloat16_float
|
||||
blasutil_1
|
||||
block_5
|
||||
BVH
|
||||
cholesky_1
|
||||
cholmod_support_23
|
||||
cholmod_support_24
|
||||
conservative_resize_1
|
||||
constructor_1
|
||||
corners_1
|
||||
ctorleakmiscmatrices_4
|
||||
dense_storage
|
||||
determinant_1
|
||||
diagonal_1
|
||||
diagonal_2
|
||||
diagonalmatrices_1
|
||||
dynalloc
|
||||
eigensolver_complex_1
|
||||
eigensolver_selfadjoint_8
|
||||
EulerAngles_1
|
||||
exceptions
|
||||
fastmath
|
||||
first_aligned
|
||||
geo_alignedbox_2
|
||||
geo_eulerangles_1
|
||||
geo_homogeneous_1
|
||||
geo_hyperplane_1
|
||||
geo_orthomethods_1
|
||||
geo_parametrizedline_1
|
||||
geo_transformations_7
|
||||
half_float
|
||||
hessenberg_1
|
||||
hessenberg_6qr_10
|
||||
householder_8
|
||||
indexed_view_1
|
||||
inplace_decomposition_1
|
||||
integer_types_1
|
||||
inverse_1
|
||||
is_same_dense
|
||||
jacobi_1
|
||||
jacobisvd_1
|
||||
kronecker_product
|
||||
linearstructure_1
|
||||
mapped_matrix_1
|
||||
mapstaticmethods_1
|
||||
mapstride_1
|
||||
matrix_square_root_1
|
||||
meta
|
||||
minres_2
|
||||
miscmatrices_1
|
||||
mixingtypes_7
|
||||
nestbyvalue
|
||||
nesting_ops_1
|
||||
nomalloc_1
|
||||
nullary_1
|
||||
num_dimensions
|
||||
NumericalDiff
|
||||
numext
|
||||
packetmath
|
||||
permutationmatrices_1
|
||||
polynomialsolver_1
|
||||
prec_inverse_4x4_1
|
||||
product_extra_5
|
||||
product_selfadjoint_1
|
||||
product_small_7
|
||||
product_symm_1
|
||||
product_syrk_1
|
||||
product_trmm_1
|
||||
product_trmv_1
|
||||
product_trsolve_5
|
||||
qr_1
|
||||
qr_colpivoting_7
|
||||
qr_fullpivoting_4
|
||||
rand
|
||||
real_qz_1
|
||||
redux_1
|
||||
ref_1
|
||||
resize
|
||||
rvalue_types_1
|
||||
schur_complex_1
|
||||
schur_real_1
|
||||
selfadjoint_1
|
||||
sizeof
|
||||
sizeoverflow
|
||||
smallvectors
|
||||
sparse_basic_3
|
||||
sparse_block_1
|
||||
sparse_extra_4
|
||||
sparse_permutations_2
|
||||
sparse_product_4
|
||||
sparse_ref_1
|
||||
sparse_solvers_1
|
||||
sparse_vector_1
|
||||
special_functions_1
|
||||
special_numbers_1
|
||||
special_packetmath_1
|
||||
spqr_support_2
|
||||
stable_norm_1
|
||||
stddeque_1
|
||||
stddeque_overload_1
|
||||
stdlist_1
|
||||
stdlist_overload_1
|
||||
stdvector_1
|
||||
stdvector_overload_1
|
||||
stl_iterators_1
|
||||
swap_1
|
||||
symbolic_index_1
|
||||
triangular_1
|
||||
type_aliaslu_9
|
||||
umeyama_3
|
||||
unalignedassert
|
||||
unalignedcount
|
||||
vectorwiseop_1
|
||||
visitor_1)
|
||||
@@ -18,6 +18,11 @@ macro(ei_add_test_internal testname testname_with_suffix)
|
||||
set(filename ${testname}.cpp)
|
||||
endif()
|
||||
|
||||
# Add the current target to the list of subtest targets
|
||||
get_property(EIGEN_SUBTESTS_LIST GLOBAL PROPERTY EIGEN_SUBTESTS_LIST)
|
||||
set(EIGEN_SUBTESTS_LIST "${EIGEN_SUBTESTS_LIST}${targetname}\n")
|
||||
set_property(GLOBAL PROPERTY EIGEN_SUBTESTS_LIST "${EIGEN_SUBTESTS_LIST}")
|
||||
|
||||
if(EIGEN_ADD_TEST_FILENAME_EXTENSION STREQUAL cu)
|
||||
if(EIGEN_TEST_HIP)
|
||||
hip_reset_flags()
|
||||
@@ -413,11 +418,13 @@ macro(ei_init_testing)
|
||||
define_property(GLOBAL PROPERTY EIGEN_MISSING_BACKENDS BRIEF_DOCS " " FULL_DOCS " ")
|
||||
define_property(GLOBAL PROPERTY EIGEN_TESTING_SUMMARY BRIEF_DOCS " " FULL_DOCS " ")
|
||||
define_property(GLOBAL PROPERTY EIGEN_TESTS_LIST BRIEF_DOCS " " FULL_DOCS " ")
|
||||
define_property(GLOBAL PROPERTY EIGEN_SUBTESTS_LIST BRIEF_DOCS " " FULL_DOCS " ")
|
||||
|
||||
set_property(GLOBAL PROPERTY EIGEN_TESTED_BACKENDS "")
|
||||
set_property(GLOBAL PROPERTY EIGEN_MISSING_BACKENDS "")
|
||||
set_property(GLOBAL PROPERTY EIGEN_TESTING_SUMMARY "")
|
||||
set_property(GLOBAL PROPERTY EIGEN_TESTS_LIST "")
|
||||
set_property(GLOBAL PROPERTY EIGEN_SUBTESTS_LIST "")
|
||||
|
||||
define_property(GLOBAL PROPERTY EIGEN_FAILTEST_FAILURE_COUNT BRIEF_DOCS " " FULL_DOCS " ")
|
||||
define_property(GLOBAL PROPERTY EIGEN_FAILTEST_COUNT BRIEF_DOCS " " FULL_DOCS " ")
|
||||
@@ -708,3 +715,56 @@ macro(ei_split_testsuite num_splits)
|
||||
add_dependencies("${current_target}" "${curr_test}")
|
||||
endforeach()
|
||||
endmacro(ei_split_testsuite num_splits)
|
||||
|
||||
# Defines the custom command buildsmoketests to build a number of tests
|
||||
# specified in smoke_test_list.
|
||||
#
|
||||
# Test in smoke_test_list can be either test targets (e.g. packetmath) or
|
||||
# subtests targets (e.g. packetmath_2). If any of the test are not available
|
||||
# in the current configuration they are just skipped.
|
||||
#
|
||||
# All tests added via this macro are labeled with the smoketest label. This
|
||||
# allows running smoketests only using ctest.
|
||||
#
|
||||
# Smoke tests are intended to be run before the whole test suite is invoked,
|
||||
# e.g., to smoke test patches.
|
||||
macro(ei_add_smoke_tests smoke_test_list)
|
||||
# Set the build target to build smoketests
|
||||
set(buildtarget "buildsmoketests")
|
||||
add_custom_target("${buildtarget}")
|
||||
|
||||
# Get list of all tests and translate it into a CMake list
|
||||
get_property(EIGEN_TESTS_LIST GLOBAL PROPERTY EIGEN_TESTS_LIST)
|
||||
string(REGEX REPLACE "\n" " " EIGEN_TESTS_LIST "${EIGEN_TESTS_LIST}")
|
||||
set(EIGEN_TESTS_LIST "${EIGEN_TESTS_LIST}")
|
||||
separate_arguments(EIGEN_TESTS_LIST)
|
||||
|
||||
# Check if the test in smoke_test_list is a currently valid test target
|
||||
foreach(test IN ITEMS ${smoke_test_list})
|
||||
# Add tests in smoke_test_list to our smoke test target but only if the test
|
||||
# is currently available, i.e., is in EIGEN_SUBTESTS_LIST
|
||||
if ("${test}" IN_LIST EIGEN_TESTS_LIST)
|
||||
add_dependencies("${buildtarget}" "${test}")
|
||||
# In the case of a test we match all subtests
|
||||
set(ctest_regex "${ctest_regex}^${test}_[0-9]+$$|")
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
# Get list of all subtests and translate it into a CMake list
|
||||
get_property(EIGEN_SUBTESTS_LIST GLOBAL PROPERTY EIGEN_SUBTESTS_LIST)
|
||||
string(REGEX REPLACE "\n" " " EIGEN_SUBTESTS_LIST "${EIGEN_SUBTESTS_LIST}")
|
||||
set(EIGEN_SUBTESTS_LIST "${EIGEN_SUBTESTS_LIST}")
|
||||
separate_arguments(EIGEN_SUBTESTS_LIST)
|
||||
|
||||
# Check if the test in smoke_test_list is a currently valid subtest target
|
||||
foreach(test IN ITEMS ${smoke_test_list})
|
||||
# Add tests in smoke_test_list to our smoke test target but only if the test
|
||||
# is currently available, i.e., is in EIGEN_SUBTESTS_LIST
|
||||
if ("${test}" IN_LIST EIGEN_SUBTESTS_LIST)
|
||||
add_dependencies("${buildtarget}" "${test}")
|
||||
# Add label smoketest to be able to run smoketests using ctest
|
||||
get_property(test_labels TEST ${test} PROPERTY LABELS)
|
||||
set_property(TEST ${test} PROPERTY LABELS "${test_labels};smoketest")
|
||||
endif()
|
||||
endforeach()
|
||||
endmacro(ei_add_smoke_tests)
|
||||
@@ -1,6 +0,0 @@
|
||||
#!/bin/bash
|
||||
#echo 'Compiling with master'
|
||||
#g++ -O3 -I../eigen-master -std=c++11 new_gemm_test.cpp -o gto
|
||||
echo 'Compiling current'
|
||||
g++ -O3 -I. -std=c++14 new_gemm_test.cpp -D__ENABLE_VECTOR_KERNELS__ -D__ENABLE_PREFETCH__ -o gtp
|
||||
g++ -O3 -I. -std=c++14 new_gemm_test.cpp -D__ENABLE_VECTOR_KERNELS__ -o gt
|
||||
@@ -1,105 +0,0 @@
|
||||
#include <Eigen/Dense>
|
||||
#include <iostream>
|
||||
#include <ctime>
|
||||
#include <cmath>
|
||||
|
||||
using namespace Eigen;
|
||||
|
||||
void set(MatrixXf& A, int m, int n, int id, int digits)
|
||||
{
|
||||
for(auto i = 0; i < m; i++)
|
||||
for(auto j = 0; j < n; j++)
|
||||
A(i,j) = id*std::pow(10,(2*digits)) + i*std::pow(10,digits) + j;
|
||||
}
|
||||
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
#ifdef __DEBUG__
|
||||
int m = std::atoi(argv[1]), k = std::atoi(argv[1]), n = std::atoi(argv[1]);
|
||||
int max = std::max(std::max(m,k),n);
|
||||
MatrixXf A = MatrixXf::Zero(m, k);
|
||||
MatrixXf B = MatrixXf::Zero(k, n);
|
||||
MatrixXf C = MatrixXf::Zero(m, n);
|
||||
MatrixXf D = MatrixXf::Zero(m, n);
|
||||
|
||||
set(A, m, k, 1, static_cast<int>(std::log10(max)) + 1);
|
||||
set(B, k, n, 2, static_cast<int>(std::log10(max)) + 1);
|
||||
|
||||
for(auto i = 0; i < 2; i++)
|
||||
C = A*B;
|
||||
|
||||
#ifdef __DEBUG_SHOW_INPUTS__
|
||||
std::cout << A << std::endl;
|
||||
std::cout << B << std::endl;
|
||||
#endif
|
||||
|
||||
#ifdef __DEBUG_SHOW_RESULT__
|
||||
std::cout << C << std::endl;
|
||||
#endif
|
||||
|
||||
std::cout << std::endl;
|
||||
|
||||
for(auto i = 0; i < m; i++)
|
||||
{
|
||||
for(auto j = 0; j < n; j++)
|
||||
{
|
||||
float acc=0;
|
||||
for(auto kk = 0; kk < k; kk++)
|
||||
{
|
||||
acc += A(i,kk)*B(kk,j);
|
||||
}
|
||||
D(i,j) = acc;
|
||||
if(std::sqrt(std::pow(D(i,j)-C(i,j),2)) > 1.0e-5)
|
||||
{
|
||||
std::cout << "Difference too big at " << i << " ," << j << " is " << C(i,j) << " should be " << D(i,j) << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
#ifdef __DEBUG_SHOW_RESULT__
|
||||
std::cout << D << std::endl;
|
||||
#endif
|
||||
#else
|
||||
if(argc < 5)
|
||||
{
|
||||
std::cout << "Wrong number of arguments." << std::endl;
|
||||
return -1;
|
||||
}
|
||||
int m = std::atoi(argv[1]), k = std::atoi(argv[2]), n = std::atoi(argv[3]);
|
||||
int RUNS = std::atoi(argv[4]);
|
||||
double time = 0;
|
||||
|
||||
MatrixXf A = MatrixXf::Random(m,k);
|
||||
MatrixXf B = MatrixXf::Random(k,n);
|
||||
for(auto i = 0; i < RUNS; i++)
|
||||
{
|
||||
MatrixXf C = MatrixXf::Zero(m, n);
|
||||
|
||||
std::clock_t start,end;
|
||||
start = std::clock();
|
||||
C = A*B;
|
||||
end = std::clock();
|
||||
|
||||
time += 1000.0*(end-start) / CLOCKS_PER_SEC;
|
||||
}
|
||||
std::cout << time << std::endl;
|
||||
#ifdef TEST_SCALAR
|
||||
start = std::clock();
|
||||
for(auto i = 0; i < m; i++)
|
||||
{
|
||||
for(auto j = 0; j < n; j++)
|
||||
{
|
||||
float acc=0;
|
||||
for(auto kk = 0; kk < k; kk++)
|
||||
{
|
||||
acc += A(i,kk)*B(kk,j);
|
||||
}
|
||||
C(i,j) = acc;
|
||||
}
|
||||
}
|
||||
end = std::clock();
|
||||
|
||||
std::cout << 1000.0*(end-start) / CLOCKS_PER_SEC << std::endl;
|
||||
#endif
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
40
run.sh
40
run.sh
@@ -1,40 +0,0 @@
|
||||
#!/bin/bash
|
||||
function run() {
|
||||
OLD=0
|
||||
NEW=0
|
||||
NEWP=0
|
||||
EXECS=$1
|
||||
SIZE=$2
|
||||
RUNS=$3
|
||||
for ((i = 0; i < $EXECS; i++)) do
|
||||
SEL=$(A=$(shuf -i 0-10 -n 1); echo $(($A % 2)))
|
||||
if [ $SEL -eq 0 ]; then
|
||||
T_OLD=$(./gto $SIZE $SIZE $SIZE $RUNS)
|
||||
T_NEW=$(./gt $SIZE $SIZE $SIZE $RUNS)
|
||||
T_NEWP=$(./gtp $SIZE $SIZE $SIZE $RUNS)
|
||||
else
|
||||
T_NEW=$(./gt $SIZE $SIZE $SIZE $RUNS)
|
||||
T_NEWP=$(./gtp $SIZE $SIZE $SIZE $RUNS)
|
||||
T_OLD=$(./gto $SIZE $SIZE $SIZE $RUNS)
|
||||
fi
|
||||
NEW=$NEW+$T_NEW
|
||||
OLD=$OLD+$T_OLD
|
||||
NEWP=$NEWP+$T_NEWP
|
||||
done
|
||||
SPEED=$(echo "($OLD) / ($NEW)" | bc -l)
|
||||
SPEEDP=$(echo "($OLD) / ($NEWP)" | bc -l)
|
||||
echo "$SIZE -> $SPEED $SPEEDP"
|
||||
}
|
||||
|
||||
run $1 16 500
|
||||
run $1 21 500
|
||||
run $1 32 500
|
||||
run $1 53 500
|
||||
run $1 64 100
|
||||
run $1 97 100
|
||||
run $1 128 50
|
||||
run $1 203 50
|
||||
run $1 256 10
|
||||
run $1 673 10
|
||||
run $1 1024 5
|
||||
run $1 2048 2
|
||||
@@ -76,20 +76,20 @@ class AnnoyingScalar
|
||||
|
||||
AnnoyingScalar operator/(const AnnoyingScalar& other) const
|
||||
{ return AnnoyingScalar((*v)/(*other.v)); }
|
||||
|
||||
|
||||
AnnoyingScalar& operator+=(const AnnoyingScalar& other) { *v += *other.v; return *this; }
|
||||
AnnoyingScalar& operator-=(const AnnoyingScalar& other) { *v -= *other.v; return *this; }
|
||||
AnnoyingScalar& operator*=(const AnnoyingScalar& other) { *v *= *other.v; return *this; }
|
||||
AnnoyingScalar& operator/=(const AnnoyingScalar& other) { *v /= *other.v; return *this; }
|
||||
AnnoyingScalar& operator= (const AnnoyingScalar& other) { *v = *other.v; return *this; }
|
||||
|
||||
|
||||
bool operator==(const AnnoyingScalar& other) const { return *v == *other.v; }
|
||||
bool operator!=(const AnnoyingScalar& other) const { return *v != *other.v; }
|
||||
bool operator<=(const AnnoyingScalar& other) const { return *v <= *other.v; }
|
||||
bool operator< (const AnnoyingScalar& other) const { return *v < *other.v; }
|
||||
bool operator>=(const AnnoyingScalar& other) const { return *v >= *other.v; }
|
||||
bool operator> (const AnnoyingScalar& other) const { return *v > *other.v; }
|
||||
|
||||
|
||||
float* v;
|
||||
float data;
|
||||
static int instances;
|
||||
@@ -136,12 +136,23 @@ struct NumTraits<AnnoyingScalar> : NumTraits<float>
|
||||
|
||||
template<> inline AnnoyingScalar test_precision<AnnoyingScalar>() { return test_precision<float>(); }
|
||||
|
||||
namespace internal {
|
||||
template<> double cast(const AnnoyingScalar& x) { return double(*x.v); }
|
||||
template<> float cast(const AnnoyingScalar& x) { return *x.v; }
|
||||
namespace numext {
|
||||
template<>
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
bool (isfinite)(const AnnoyingScalar& x) {
|
||||
return (numext::isfinite)(*x.v);
|
||||
}
|
||||
}
|
||||
|
||||
namespace internal {
|
||||
template<> EIGEN_STRONG_INLINE AnnoyingScalar pcmp_eq(const AnnoyingScalar& a, const AnnoyingScalar& b)
|
||||
{ return AnnoyingScalar(pcmp_eq(*a.v, *b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE AnnoyingScalar pselect(const AnnoyingScalar& mask, const AnnoyingScalar& a, const AnnoyingScalar& b)
|
||||
{ return numext::equal_strict(*mask.v, 0.f) ? b : a; }
|
||||
template<> EIGEN_STRONG_INLINE double cast(const AnnoyingScalar& x) { return double(*x.v); }
|
||||
template<> EIGEN_STRONG_INLINE float cast(const AnnoyingScalar& x) { return *x.v; }
|
||||
}
|
||||
} // namespace Eigen
|
||||
|
||||
AnnoyingScalar get_test_precision(const AnnoyingScalar&)
|
||||
{ return Eigen::test_precision<AnnoyingScalar>(); }
|
||||
|
||||
@@ -460,3 +460,7 @@ cmake_dependent_option(EIGEN_TEST_BUILD_DOCUMENTATION "Test building the doxygen
|
||||
if(EIGEN_TEST_BUILD_DOCUMENTATION)
|
||||
add_dependencies(buildtests doc)
|
||||
endif()
|
||||
|
||||
# Register all smoke tests
|
||||
include("EigenSmokeTestList")
|
||||
ei_add_smoke_tests("${ei_smoke_test_list}")
|
||||
|
||||
@@ -332,7 +332,9 @@ EIGEN_DECLARE_TEST(geo_quaternion)
|
||||
CALL_SUBTEST_2(( quaternionAlignment<double>() ));
|
||||
CALL_SUBTEST_2( mapQuaternion<double>() );
|
||||
|
||||
#ifndef EIGEN_TEST_ANNOYING_SCALAR_DONT_THROW
|
||||
AnnoyingScalar::dont_throw = true;
|
||||
#endif
|
||||
CALL_SUBTEST_3(( quaternion<AnnoyingScalar,AutoAlign>() ));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -29,10 +29,6 @@
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef EIGEN_GOOGLEHASH_SUPPORT
|
||||
#include <google/sparse_hash_map>
|
||||
#endif
|
||||
|
||||
#include <Eigen/Cholesky>
|
||||
#include <Eigen/LU>
|
||||
#include <Eigen/Sparse>
|
||||
|
||||
@@ -7,9 +7,9 @@
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#include "main.h"
|
||||
#include <iterator>
|
||||
#include <numeric>
|
||||
#include "main.h"
|
||||
|
||||
template< class Iterator >
|
||||
std::reverse_iterator<Iterator>
|
||||
@@ -47,6 +47,18 @@ bool is_pointer_based_stl_iterator(const internal::pointer_based_stl_iterator<Xp
|
||||
template<typename XprType>
|
||||
bool is_generic_randaccess_stl_iterator(const internal::generic_randaccess_stl_iterator<XprType> &) { return true; }
|
||||
|
||||
template<typename Iter>
|
||||
bool is_default_constructible_and_assignable(const Iter& it)
|
||||
{
|
||||
#if EIGEN_HAS_CXX11
|
||||
VERIFY(std::is_default_constructible<Iter>::value);
|
||||
VERIFY(std::is_nothrow_default_constructible<Iter>::value);
|
||||
#endif
|
||||
Iter it2;
|
||||
it2 = it;
|
||||
return (it==it2);
|
||||
}
|
||||
|
||||
template<typename Xpr>
|
||||
void check_begin_end_for_loop(Xpr xpr)
|
||||
{
|
||||
@@ -124,6 +136,22 @@ void test_stl_iterators(int rows=Rows, int cols=Cols)
|
||||
|
||||
Index i, j;
|
||||
|
||||
// Verify that iterators are default constructible (See bug #1900)
|
||||
{
|
||||
VERIFY( is_default_constructible_and_assignable(v.begin()));
|
||||
VERIFY( is_default_constructible_and_assignable(v.end()));
|
||||
VERIFY( is_default_constructible_and_assignable(cv.begin()));
|
||||
VERIFY( is_default_constructible_and_assignable(cv.end()));
|
||||
|
||||
VERIFY( is_default_constructible_and_assignable(A.row(0).begin()));
|
||||
VERIFY( is_default_constructible_and_assignable(A.row(0).end()));
|
||||
VERIFY( is_default_constructible_and_assignable(cA.row(0).begin()));
|
||||
VERIFY( is_default_constructible_and_assignable(cA.row(0).end()));
|
||||
|
||||
VERIFY( is_default_constructible_and_assignable(B.row(0).begin()));
|
||||
VERIFY( is_default_constructible_and_assignable(B.row(0).end()));
|
||||
}
|
||||
|
||||
// Check we got a fast pointer-based iterator when expected
|
||||
{
|
||||
VERIFY( is_pointer_based_stl_iterator(v.begin()) );
|
||||
|
||||
@@ -298,7 +298,8 @@ EIGEN_DONT_INLINE Scalar zero() { return Scalar(0); }
|
||||
// workaround aggressive optimization in ICC
|
||||
template<typename T> EIGEN_DONT_INLINE T sub(T a, T b) { return a - b; }
|
||||
|
||||
// all this function does is verify we don't iterate infinitely on nan/inf values
|
||||
// This function verifies we don't iterate infinitely on nan/inf values,
|
||||
// and that info() returns InvalidInput.
|
||||
template<typename SvdType, typename MatrixType>
|
||||
void svd_inf_nan()
|
||||
{
|
||||
@@ -307,18 +308,22 @@ void svd_inf_nan()
|
||||
Scalar some_inf = Scalar(1) / zero<Scalar>();
|
||||
VERIFY(sub(some_inf, some_inf) != sub(some_inf, some_inf));
|
||||
svd.compute(MatrixType::Constant(10,10,some_inf), ComputeFullU | ComputeFullV);
|
||||
VERIFY(svd.info() == InvalidInput);
|
||||
|
||||
Scalar nan = std::numeric_limits<Scalar>::quiet_NaN();
|
||||
VERIFY(nan != nan);
|
||||
svd.compute(MatrixType::Constant(10,10,nan), ComputeFullU | ComputeFullV);
|
||||
VERIFY(svd.info() == InvalidInput);
|
||||
|
||||
MatrixType m = MatrixType::Zero(10,10);
|
||||
m(internal::random<int>(0,9), internal::random<int>(0,9)) = some_inf;
|
||||
svd.compute(m, ComputeFullU | ComputeFullV);
|
||||
VERIFY(svd.info() == InvalidInput);
|
||||
|
||||
m = MatrixType::Zero(10,10);
|
||||
m(internal::random<int>(0,9), internal::random<int>(0,9)) = nan;
|
||||
svd.compute(m, ComputeFullU | ComputeFullV);
|
||||
VERIFY(svd.info() == InvalidInput);
|
||||
|
||||
// regression test for bug 791
|
||||
m.resize(3,3);
|
||||
@@ -326,6 +331,7 @@ void svd_inf_nan()
|
||||
0, -0.5, 0,
|
||||
nan, 0, 0;
|
||||
svd.compute(m, ComputeFullU | ComputeFullV);
|
||||
VERIFY(svd.info() == InvalidInput);
|
||||
|
||||
m.resize(4,4);
|
||||
m << 1, 0, 0, 0,
|
||||
@@ -333,6 +339,7 @@ void svd_inf_nan()
|
||||
1, 0, 1, nan,
|
||||
0, nan, nan, 0;
|
||||
svd.compute(m, ComputeFullU | ComputeFullV);
|
||||
VERIFY(svd.info() == InvalidInput);
|
||||
}
|
||||
|
||||
// Regression test for bug 286: JacobiSVD loops indefinitely with some
|
||||
|
||||
@@ -466,7 +466,7 @@ struct sizes_match_below_dim {
|
||||
template <typename Dims1, typename Dims2, ptrdiff_t n>
|
||||
struct sizes_match_below_dim<Dims1, Dims2, n, n> {
|
||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool run(Dims1& dims1, Dims2& dims2) {
|
||||
return (array_get<n-1>(dims1) == array_get<n-1>(dims2)) &
|
||||
return (array_get<n-1>(dims1) == array_get<n-1>(dims2)) &&
|
||||
sizes_match_below_dim<Dims1, Dims2, n-1, n-1>::run(dims1, dims2);
|
||||
}
|
||||
};
|
||||
|
||||
@@ -357,8 +357,8 @@ __global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void ScanKernel(Self self, Index total_s
|
||||
|
||||
}
|
||||
|
||||
template <typename Self, typename Reducer>
|
||||
struct ScanLauncher<Self, Reducer, GpuDevice, false> {
|
||||
template <typename Self, typename Reducer, bool Vectorize>
|
||||
struct ScanLauncher<Self, Reducer, GpuDevice, Vectorize> {
|
||||
void operator()(const Self& self, typename Self::CoeffReturnType* data) {
|
||||
Index total_size = internal::array_prod(self.dimensions());
|
||||
Index num_blocks = (total_size / self.size() + 63) / 64;
|
||||
|
||||
@@ -14,6 +14,7 @@
|
||||
#include "../../Eigen/Jacobi"
|
||||
#include "../../Eigen/Householder"
|
||||
|
||||
|
||||
/**
|
||||
* \defgroup IterativeLinearSolvers_Module Iterative solvers module
|
||||
* This module aims to provide various iterative linear and non linear solver algorithms.
|
||||
@@ -23,11 +24,12 @@
|
||||
* - an IDR(s) implementation
|
||||
* - a DGMRES implementation
|
||||
* - a MINRES implementation
|
||||
*
|
||||
* \code
|
||||
* #include <unsupported/Eigen/IterativeSolvers>
|
||||
* \endcode
|
||||
*/
|
||||
//@{
|
||||
|
||||
|
||||
#include "../../Eigen/src/Core/util/DisableStupidWarnings.h"
|
||||
|
||||
@@ -45,6 +47,5 @@
|
||||
|
||||
#include "../../Eigen/src/Core/util/ReenableStupidWarnings.h"
|
||||
|
||||
//@}
|
||||
|
||||
#endif // EIGEN_ITERATIVE_SOLVERS_MODULE_H
|
||||
|
||||
@@ -24,6 +24,7 @@
|
||||
|
||||
#ifdef EIGEN_GOOGLEHASH_SUPPORT
|
||||
#include <google/dense_hash_map>
|
||||
#include <google/sparse_hash_map>
|
||||
#endif
|
||||
|
||||
/**
|
||||
|
||||
@@ -10,7 +10,13 @@
|
||||
#ifndef EIGEN_RANDOMSETTER_H
|
||||
#define EIGEN_RANDOMSETTER_H
|
||||
|
||||
namespace Eigen {
|
||||
#if defined(EIGEN_GOOGLEHASH_SUPPORT)
|
||||
// Ensure the ::google namespace exists, required for checking existence of
|
||||
// ::google::dense_hash_map and ::google::sparse_hash_map.
|
||||
namespace google {}
|
||||
#endif
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
/** Represents a std::map
|
||||
*
|
||||
@@ -56,7 +62,26 @@ template<typename Scalar> struct StdUnorderedMapTraits
|
||||
};
|
||||
#endif // EIGEN_UNORDERED_MAP_SUPPORT
|
||||
|
||||
#ifdef _DENSE_HASH_MAP_H_
|
||||
#if defined(EIGEN_GOOGLEHASH_SUPPORT)
|
||||
|
||||
namespace google {
|
||||
|
||||
// Namespace work-around, since sometimes dense_hash_map and sparse_hash_map
|
||||
// are in the global namespace, and other times they are under ::google.
|
||||
using namespace ::google;
|
||||
|
||||
template<typename KeyType, typename Scalar>
|
||||
struct DenseHashMap {
|
||||
typedef dense_hash_map<KeyType, Scalar> type;
|
||||
};
|
||||
|
||||
template<typename KeyType, typename Scalar>
|
||||
struct SparseHashMap {
|
||||
typedef sparse_hash_map<KeyType, Scalar> type;
|
||||
};
|
||||
|
||||
} // namespace google
|
||||
|
||||
/** Represents a google::dense_hash_map
|
||||
*
|
||||
* \see RandomSetter
|
||||
@@ -64,7 +89,7 @@ template<typename Scalar> struct StdUnorderedMapTraits
|
||||
template<typename Scalar> struct GoogleDenseHashMapTraits
|
||||
{
|
||||
typedef int KeyType;
|
||||
typedef google::dense_hash_map<KeyType,Scalar> Type;
|
||||
typedef typename google::DenseHashMap<KeyType,Scalar>::type Type;
|
||||
enum {
|
||||
IsSorted = 0
|
||||
};
|
||||
@@ -72,9 +97,7 @@ template<typename Scalar> struct GoogleDenseHashMapTraits
|
||||
static void setInvalidKey(Type& map, const KeyType& k)
|
||||
{ map.set_empty_key(k); }
|
||||
};
|
||||
#endif
|
||||
|
||||
#ifdef _SPARSE_HASH_MAP_H_
|
||||
/** Represents a google::sparse_hash_map
|
||||
*
|
||||
* \see RandomSetter
|
||||
@@ -82,7 +105,7 @@ template<typename Scalar> struct GoogleDenseHashMapTraits
|
||||
template<typename Scalar> struct GoogleSparseHashMapTraits
|
||||
{
|
||||
typedef int KeyType;
|
||||
typedef google::sparse_hash_map<KeyType,Scalar> Type;
|
||||
typedef typename google::SparseHashMap<KeyType,Scalar>::type Type;
|
||||
enum {
|
||||
IsSorted = 0
|
||||
};
|
||||
@@ -134,18 +157,17 @@ template<typename Scalar> struct GoogleSparseHashMapTraits
|
||||
* GoogleSparseHashMapTraits, GnuHashMapTraits, and finally StdMapTraits.
|
||||
*
|
||||
* For performance and memory consumption reasons it is highly recommended to use one of
|
||||
* the Google's hash_map implementation. To enable the support for them, you have two options:
|
||||
* - \#include <google/dense_hash_map> yourself \b before Eigen/Sparse header
|
||||
* - define EIGEN_GOOGLEHASH_SUPPORT
|
||||
* In the later case the inclusion of <google/dense_hash_map> is made for you.
|
||||
* Google's hash_map implementations. To enable the support for them, you must define
|
||||
* EIGEN_GOOGLEHASH_SUPPORT. This will include both <google/dense_hash_map> and
|
||||
* <google/sparse_hash_map> for you.
|
||||
*
|
||||
* \see http://code.google.com/p/google-sparsehash/
|
||||
* \see https://github.com/sparsehash/sparsehash
|
||||
*/
|
||||
template<typename SparseMatrixType,
|
||||
template <typename T> class MapTraits =
|
||||
#if defined _DENSE_HASH_MAP_H_
|
||||
#if defined(EIGEN_GOOGLEHASH_SUPPORT)
|
||||
GoogleDenseHashMapTraits
|
||||
#elif defined _HASH_MAP
|
||||
#elif defined(_HASH_MAP)
|
||||
GnuHashMapTraits
|
||||
#else
|
||||
StdMapTraits
|
||||
|
||||
@@ -444,7 +444,7 @@ void test_gpu_forced_evals() {
|
||||
d_float, num_elem);
|
||||
Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_res_half1(
|
||||
d_res_half1, num_elem);
|
||||
Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Unaligned> gpu_res_half2(
|
||||
Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Unaligned> gpu_res_half2(
|
||||
d_res_half2, num_elem);
|
||||
Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_res_float(
|
||||
d_res_float, num_elem);
|
||||
@@ -461,7 +461,7 @@ void test_gpu_forced_evals() {
|
||||
Tensor<float, 1> half_prec2(num_elem);
|
||||
Tensor<float, 1> full_prec(num_elem);
|
||||
gpu_device.memcpyDeviceToHost(half_prec1.data(), d_res_half1, num_elem*sizeof(float));
|
||||
gpu_device.memcpyDeviceToHost(half_prec2.data(), d_res_half1, num_elem*sizeof(float));
|
||||
gpu_device.memcpyDeviceToHost(half_prec2.data(), d_res_half2, num_elem*sizeof(float));
|
||||
gpu_device.memcpyDeviceToHost(full_prec.data(), d_res_float, num_elem*sizeof(float));
|
||||
gpu_device.synchronize();
|
||||
|
||||
|
||||
@@ -123,10 +123,8 @@ template<typename SparseMatrixType> void sparse_extra(const SparseMatrixType& re
|
||||
#ifdef EIGEN_UNORDERED_MAP_SUPPORT
|
||||
VERIFY(( test_random_setter<RandomSetter<SparseMatrixType, StdUnorderedMapTraits> >(m,refMat,nonzeroCoords) ));
|
||||
#endif
|
||||
#ifdef _DENSE_HASH_MAP_H_
|
||||
#ifdef EIGEN_GOOGLEHASH_SUPPORT
|
||||
VERIFY(( test_random_setter<RandomSetter<SparseMatrixType, GoogleDenseHashMapTraits> >(m,refMat,nonzeroCoords) ));
|
||||
#endif
|
||||
#ifdef _SPARSE_HASH_MAP_H_
|
||||
VERIFY(( test_random_setter<RandomSetter<SparseMatrixType, GoogleSparseHashMapTraits> >(m,refMat,nonzeroCoords) ));
|
||||
#endif
|
||||
|
||||
|
||||
Reference in New Issue
Block a user