mirror of
https://gitlab.com/libeigen/eigen.git
synced 2026-04-10 11:34:33 +08:00
committed by
Rasmus Munk Larsen
parent
752911927f
commit
4d05fcf8da
@@ -2496,38 +2496,60 @@ template <>
|
||||
EIGEN_STRONG_INLINE Packet4f ploadquad<Packet4f>(const float* from) {
|
||||
return vld1q_dup_f32(from);
|
||||
}
|
||||
|
||||
// WORKAROUND: Apple Clang 17.0.0 (and Homebrew Clang 21.1.8) at -O0 optimization
|
||||
// generate incorrect code for vld1_dup_[su]8, ignoring the pointer offset.
|
||||
// We use vdup_n_s8(*from) to force a safe scalar load before broadcast.
|
||||
EIGEN_ALWAYS_INLINE int8x8_t eigen_vld1_dup_s8(const int8_t* ptr) {
|
||||
#if EIGEN_COMP_CLANGAPPLE && EIGEN_ARCH_ARM64
|
||||
return vdup_n_s8(*ptr);
|
||||
#else
|
||||
return vld1_dup_s8(ptr);
|
||||
#endif
|
||||
}
|
||||
|
||||
EIGEN_ALWAYS_INLINE uint8x8_t eigen_vld1_dup_u8(const uint8_t* ptr) {
|
||||
#if EIGEN_COMP_CLANGAPPLE && EIGEN_ARCH_ARM64
|
||||
return vdup_n_u8(*ptr);
|
||||
#else
|
||||
return vld1_dup_u8(ptr);
|
||||
#endif
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4c ploadquad<Packet4c>(const int8_t* from) {
|
||||
return vget_lane_s32(vreinterpret_s32_s8(vld1_dup_s8(from)), 0);
|
||||
return vget_lane_s32(vreinterpret_s32_s8(eigen_vld1_dup_s8(from)), 0);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet8c ploadquad<Packet8c>(const int8_t* from) {
|
||||
return vreinterpret_s8_u32(
|
||||
vzip_u32(vreinterpret_u32_s8(vld1_dup_s8(from)), vreinterpret_u32_s8(vld1_dup_s8(from + 1))).val[0]);
|
||||
vzip_u32(vreinterpret_u32_s8(eigen_vld1_dup_s8(from)), vreinterpret_u32_s8(eigen_vld1_dup_s8(from + 1))).val[0]);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet16c ploadquad<Packet16c>(const int8_t* from) {
|
||||
const int8x8_t a = vreinterpret_s8_u32(
|
||||
vzip_u32(vreinterpret_u32_s8(vld1_dup_s8(from)), vreinterpret_u32_s8(vld1_dup_s8(from + 1))).val[0]);
|
||||
vzip_u32(vreinterpret_u32_s8(eigen_vld1_dup_s8(from)), vreinterpret_u32_s8(eigen_vld1_dup_s8(from + 1))).val[0]);
|
||||
const int8x8_t b = vreinterpret_s8_u32(
|
||||
vzip_u32(vreinterpret_u32_s8(vld1_dup_s8(from + 2)), vreinterpret_u32_s8(vld1_dup_s8(from + 3))).val[0]);
|
||||
vzip_u32(vreinterpret_u32_s8(eigen_vld1_dup_s8(from + 2)), vreinterpret_u32_s8(eigen_vld1_dup_s8(from + 3)))
|
||||
.val[0]);
|
||||
return vcombine_s8(a, b);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4uc ploadquad<Packet4uc>(const uint8_t* from) {
|
||||
return vget_lane_u32(vreinterpret_u32_u8(vld1_dup_u8(from)), 0);
|
||||
return vget_lane_u32(vreinterpret_u32_u8(eigen_vld1_dup_u8(from)), 0);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet8uc ploadquad<Packet8uc>(const uint8_t* from) {
|
||||
return vreinterpret_u8_u32(
|
||||
vzip_u32(vreinterpret_u32_u8(vld1_dup_u8(from)), vreinterpret_u32_u8(vld1_dup_u8(from + 1))).val[0]);
|
||||
vzip_u32(vreinterpret_u32_u8(eigen_vld1_dup_u8(from)), vreinterpret_u32_u8(eigen_vld1_dup_u8(from + 1))).val[0]);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet16uc ploadquad<Packet16uc>(const uint8_t* from) {
|
||||
const uint8x8_t a = vreinterpret_u8_u32(
|
||||
vzip_u32(vreinterpret_u32_u8(vld1_dup_u8(from)), vreinterpret_u32_u8(vld1_dup_u8(from + 1))).val[0]);
|
||||
vzip_u32(vreinterpret_u32_u8(eigen_vld1_dup_u8(from)), vreinterpret_u32_u8(eigen_vld1_dup_u8(from + 1))).val[0]);
|
||||
const uint8x8_t b = vreinterpret_u8_u32(
|
||||
vzip_u32(vreinterpret_u32_u8(vld1_dup_u8(from + 2)), vreinterpret_u32_u8(vld1_dup_u8(from + 3))).val[0]);
|
||||
vzip_u32(vreinterpret_u32_u8(eigen_vld1_dup_u8(from + 2)), vreinterpret_u32_u8(eigen_vld1_dup_u8(from + 3)))
|
||||
.val[0]);
|
||||
return vcombine_u8(a, b);
|
||||
}
|
||||
template <>
|
||||
|
||||
@@ -1,11 +1,19 @@
|
||||
#define EIGEN_NO_DEBUG_SMALL_PRODUCT_BLOCKS
|
||||
#include "sparse_solver.h"
|
||||
|
||||
#if defined(DEBUG)
|
||||
#undef DEBUG
|
||||
#endif
|
||||
|
||||
#define EIGEN_NO_DEBUG_SMALL_PRODUCT_BLOCKS
|
||||
|
||||
#pragma clang diagnostic push
|
||||
// The following "warning" causes a build failure on macOS with the latest
|
||||
// version of clang:
|
||||
// error: non-defining declaration of enumeration with a fixed underlying
|
||||
// type is only permitted as a standalone declaration
|
||||
#pragma clang diagnostic ignored "-Welaborated-enum-base"
|
||||
#include <Eigen/AccelerateSupport>
|
||||
#pragma clang diagnostic pop
|
||||
|
||||
#include "sparse_solver.h"
|
||||
|
||||
template <typename MatrixType, typename DenseMat>
|
||||
int generate_sparse_rectangular_problem(MatrixType& A, DenseMat& dA, int maxRows = 300, int maxCols = 300) {
|
||||
|
||||
@@ -86,6 +86,13 @@ inline T REF_ABS_DIFF(const T& a, const T& b) {
|
||||
return a > b ? a - b : b - a;
|
||||
}
|
||||
|
||||
// MacOS apple-clang has an issue with pcmp_eq for half when inlined,
|
||||
// resulting in an ICE, but only in this specific test.
|
||||
template <typename Packet>
|
||||
EIGEN_DONT_INLINE Packet REF_PCMP_EQ(const Packet& a, const Packet& b) {
|
||||
return internal::pcmp_eq(a, b);
|
||||
}
|
||||
|
||||
// Specializations for bool.
|
||||
template <>
|
||||
inline bool REF_ADD(const bool& a, const bool& b) {
|
||||
@@ -361,21 +368,21 @@ void packetmath_boolean_mask_ops() {
|
||||
data1[i + PacketSize] = internal::random<bool>() ? data1[i] : Scalar(0);
|
||||
}
|
||||
|
||||
CHECK_CWISE2_MASK(internal::pcmp_eq, internal::pcmp_eq);
|
||||
CHECK_CWISE2_MASK(REF_PCMP_EQ, internal::pcmp_eq);
|
||||
|
||||
// Test (-0) == (0) for signed operations
|
||||
for (int i = 0; i < PacketSize; ++i) {
|
||||
data1[i] = Scalar(-0.0);
|
||||
data1[i + PacketSize] = internal::random<bool>() ? data1[i] : Scalar(0);
|
||||
}
|
||||
CHECK_CWISE2_MASK(internal::pcmp_eq, internal::pcmp_eq);
|
||||
CHECK_CWISE2_MASK(REF_PCMP_EQ, internal::pcmp_eq);
|
||||
|
||||
// Test NaN
|
||||
for (int i = 0; i < PacketSize; ++i) {
|
||||
data1[i] = NumTraits<Scalar>::quiet_NaN();
|
||||
data1[i + PacketSize] = internal::random<bool>() ? data1[i] : Scalar(0);
|
||||
}
|
||||
CHECK_CWISE2_MASK(internal::pcmp_eq, internal::pcmp_eq);
|
||||
CHECK_CWISE2_MASK(REF_PCMP_EQ, internal::pcmp_eq);
|
||||
}
|
||||
|
||||
template <typename Scalar, typename Packet>
|
||||
|
||||
@@ -7,7 +7,7 @@ endif()
|
||||
set_property(GLOBAL PROPERTY EIGEN_CURRENT_SUBPROJECT "Unsupported")
|
||||
add_custom_target(BuildUnsupported)
|
||||
|
||||
include_directories(../../test ../../unsupported ../../Eigen
|
||||
include_directories(../../test ../../unsupported
|
||||
${CMAKE_CURRENT_BINARY_DIR}/../../test)
|
||||
|
||||
find_package (Threads)
|
||||
|
||||
Reference in New Issue
Block a user