Fix packetmath tests on M* macs.

libeigen/eigen!2120
This commit is contained in:
Antonio Sánchez
2026-02-08 18:07:24 +00:00
committed by Rasmus Munk Larsen
parent 752911927f
commit 4d05fcf8da
4 changed files with 52 additions and 15 deletions

View File

@@ -2496,38 +2496,60 @@ template <>
EIGEN_STRONG_INLINE Packet4f ploadquad<Packet4f>(const float* from) {
return vld1q_dup_f32(from);
}
// WORKAROUND: Apple Clang 17.0.0 (and Homebrew Clang 21.1.8) at -O0 optimization
// generate incorrect code for vld1_dup_[su]8, ignoring the pointer offset.
// We use vdup_n_s8(*from) to force a safe scalar load before broadcast.
EIGEN_ALWAYS_INLINE int8x8_t eigen_vld1_dup_s8(const int8_t* ptr) {
#if EIGEN_COMP_CLANGAPPLE && EIGEN_ARCH_ARM64
return vdup_n_s8(*ptr);
#else
return vld1_dup_s8(ptr);
#endif
}
EIGEN_ALWAYS_INLINE uint8x8_t eigen_vld1_dup_u8(const uint8_t* ptr) {
#if EIGEN_COMP_CLANGAPPLE && EIGEN_ARCH_ARM64
return vdup_n_u8(*ptr);
#else
return vld1_dup_u8(ptr);
#endif
}
template <>
EIGEN_STRONG_INLINE Packet4c ploadquad<Packet4c>(const int8_t* from) {
return vget_lane_s32(vreinterpret_s32_s8(vld1_dup_s8(from)), 0);
return vget_lane_s32(vreinterpret_s32_s8(eigen_vld1_dup_s8(from)), 0);
}
template <>
EIGEN_STRONG_INLINE Packet8c ploadquad<Packet8c>(const int8_t* from) {
return vreinterpret_s8_u32(
vzip_u32(vreinterpret_u32_s8(vld1_dup_s8(from)), vreinterpret_u32_s8(vld1_dup_s8(from + 1))).val[0]);
vzip_u32(vreinterpret_u32_s8(eigen_vld1_dup_s8(from)), vreinterpret_u32_s8(eigen_vld1_dup_s8(from + 1))).val[0]);
}
template <>
EIGEN_STRONG_INLINE Packet16c ploadquad<Packet16c>(const int8_t* from) {
const int8x8_t a = vreinterpret_s8_u32(
vzip_u32(vreinterpret_u32_s8(vld1_dup_s8(from)), vreinterpret_u32_s8(vld1_dup_s8(from + 1))).val[0]);
vzip_u32(vreinterpret_u32_s8(eigen_vld1_dup_s8(from)), vreinterpret_u32_s8(eigen_vld1_dup_s8(from + 1))).val[0]);
const int8x8_t b = vreinterpret_s8_u32(
vzip_u32(vreinterpret_u32_s8(vld1_dup_s8(from + 2)), vreinterpret_u32_s8(vld1_dup_s8(from + 3))).val[0]);
vzip_u32(vreinterpret_u32_s8(eigen_vld1_dup_s8(from + 2)), vreinterpret_u32_s8(eigen_vld1_dup_s8(from + 3)))
.val[0]);
return vcombine_s8(a, b);
}
template <>
EIGEN_STRONG_INLINE Packet4uc ploadquad<Packet4uc>(const uint8_t* from) {
return vget_lane_u32(vreinterpret_u32_u8(vld1_dup_u8(from)), 0);
return vget_lane_u32(vreinterpret_u32_u8(eigen_vld1_dup_u8(from)), 0);
}
template <>
EIGEN_STRONG_INLINE Packet8uc ploadquad<Packet8uc>(const uint8_t* from) {
return vreinterpret_u8_u32(
vzip_u32(vreinterpret_u32_u8(vld1_dup_u8(from)), vreinterpret_u32_u8(vld1_dup_u8(from + 1))).val[0]);
vzip_u32(vreinterpret_u32_u8(eigen_vld1_dup_u8(from)), vreinterpret_u32_u8(eigen_vld1_dup_u8(from + 1))).val[0]);
}
template <>
EIGEN_STRONG_INLINE Packet16uc ploadquad<Packet16uc>(const uint8_t* from) {
const uint8x8_t a = vreinterpret_u8_u32(
vzip_u32(vreinterpret_u32_u8(vld1_dup_u8(from)), vreinterpret_u32_u8(vld1_dup_u8(from + 1))).val[0]);
vzip_u32(vreinterpret_u32_u8(eigen_vld1_dup_u8(from)), vreinterpret_u32_u8(eigen_vld1_dup_u8(from + 1))).val[0]);
const uint8x8_t b = vreinterpret_u8_u32(
vzip_u32(vreinterpret_u32_u8(vld1_dup_u8(from + 2)), vreinterpret_u32_u8(vld1_dup_u8(from + 3))).val[0]);
vzip_u32(vreinterpret_u32_u8(eigen_vld1_dup_u8(from + 2)), vreinterpret_u32_u8(eigen_vld1_dup_u8(from + 3)))
.val[0]);
return vcombine_u8(a, b);
}
template <>

View File

@@ -1,11 +1,19 @@
#define EIGEN_NO_DEBUG_SMALL_PRODUCT_BLOCKS
#include "sparse_solver.h"
#if defined(DEBUG)
#undef DEBUG
#endif
#define EIGEN_NO_DEBUG_SMALL_PRODUCT_BLOCKS
#pragma clang diagnostic push
// The following "warning" causes a build failure on macOS with the latest
// version of clang:
// error: non-defining declaration of enumeration with a fixed underlying
// type is only permitted as a standalone declaration
#pragma clang diagnostic ignored "-Welaborated-enum-base"
#include <Eigen/AccelerateSupport>
#pragma clang diagnostic pop
#include "sparse_solver.h"
template <typename MatrixType, typename DenseMat>
int generate_sparse_rectangular_problem(MatrixType& A, DenseMat& dA, int maxRows = 300, int maxCols = 300) {

View File

@@ -86,6 +86,13 @@ inline T REF_ABS_DIFF(const T& a, const T& b) {
return a > b ? a - b : b - a;
}
// MacOS apple-clang has an issue with pcmp_eq for half when inlined,
// resulting in an ICE, but only in this specific test.
template <typename Packet>
EIGEN_DONT_INLINE Packet REF_PCMP_EQ(const Packet& a, const Packet& b) {
return internal::pcmp_eq(a, b);
}
// Specializations for bool.
template <>
inline bool REF_ADD(const bool& a, const bool& b) {
@@ -361,21 +368,21 @@ void packetmath_boolean_mask_ops() {
data1[i + PacketSize] = internal::random<bool>() ? data1[i] : Scalar(0);
}
CHECK_CWISE2_MASK(internal::pcmp_eq, internal::pcmp_eq);
CHECK_CWISE2_MASK(REF_PCMP_EQ, internal::pcmp_eq);
// Test (-0) == (0) for signed operations
for (int i = 0; i < PacketSize; ++i) {
data1[i] = Scalar(-0.0);
data1[i + PacketSize] = internal::random<bool>() ? data1[i] : Scalar(0);
}
CHECK_CWISE2_MASK(internal::pcmp_eq, internal::pcmp_eq);
CHECK_CWISE2_MASK(REF_PCMP_EQ, internal::pcmp_eq);
// Test NaN
for (int i = 0; i < PacketSize; ++i) {
data1[i] = NumTraits<Scalar>::quiet_NaN();
data1[i + PacketSize] = internal::random<bool>() ? data1[i] : Scalar(0);
}
CHECK_CWISE2_MASK(internal::pcmp_eq, internal::pcmp_eq);
CHECK_CWISE2_MASK(REF_PCMP_EQ, internal::pcmp_eq);
}
template <typename Scalar, typename Packet>

View File

@@ -7,7 +7,7 @@ endif()
set_property(GLOBAL PROPERTY EIGEN_CURRENT_SUBPROJECT "Unsupported")
add_custom_target(BuildUnsupported)
include_directories(../../test ../../unsupported ../../Eigen
include_directories(../../test ../../unsupported
${CMAKE_CURRENT_BINARY_DIR}/../../test)
find_package (Threads)