Add workaround for using std::fma for scalar multiply-add.

2026-04-10 11:34:33 +08:00 · 2025-10-09 18:57:46 +00:00
parent 5996176b88
commit ef3c5c1d1d
3 changed files with 25 additions and 10 deletions
--- a/Eigen/src/Core/MathFunctions.h
+++ b/Eigen/src/Core/MathFunctions.h
@@ -1004,8 +1004,7 @@ struct madd_impl {
  }
 };

-// Use FMA if there is a single CPU instruction.
-#ifdef EIGEN_VECTORIZE_FMA
+#if EIGEN_SCALAR_MADD_USE_FMA
 template <typename Scalar>
 struct madd_impl<Scalar, std::enable_if_t<has_fma<Scalar>::value>> {
  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar run(const Scalar& x, const Scalar& y, const Scalar& z) {
@@ -1927,7 +1926,6 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar arithmetic_shift_right(const Scalar
  return bit_cast<Scalar, SignedScalar>(bit_cast<SignedScalar, Scalar>(a) >> n);
 }

-// Otherwise, rely on template implementation.
 template <typename Scalar>
 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar fma(const Scalar& x, const Scalar& y, const Scalar& z) {
  return internal::fma_impl<Scalar>::run(x, y, z);
--- a/Eigen/src/Core/util/Macros.h
+++ b/Eigen/src/Core/util/Macros.h
@@ -52,6 +52,26 @@
 #define EIGEN_STACK_ALLOCATION_LIMIT 131072
 #endif

+/* Specify whether to use std::fma for scalar multiply-add instructions.
+ *
+ * On machines that have FMA as a single instruction, this will generally
+ * improve precision without significant performance implications.
+ *
+ * Without a single instruction, performance has been found to be reduced 2-3x
+ * on Intel CPUs, and up to 30x for WASM.
+ *
+ * If unspecified, defaults to using FMA if hardware support is available.
+ * The default should be used in most cases to ensure consistency between
+ * vectorized and non-vectorized paths.
+ */
+#ifndef EIGEN_SCALAR_MADD_USE_FMA
+#ifdef EIGEN_VECTORIZE_FMA
+#define EIGEN_SCALAR_MADD_USE_FMA 1
+#else
+#define EIGEN_SCALAR_MADD_USE_FMA 0
+#endif
+#endif
+
 //------------------------------------------------------------------------------------------
 // Compiler identification, EIGEN_COMP_*
 //------------------------------------------------------------------------------------------
--- a/doc/PreprocessorDirectives.dox
+++ b/doc/PreprocessorDirectives.dox
@@ -18,9 +18,6 @@ one option, and other parts (or libraries that you use) are compiled with anothe
 fail to link or exhibit subtle bugs. Nevertheless, these options can be useful for people who know what they
 are doing.

- - \b EIGEN2_SUPPORT and \b EIGEN2_SUPPORT_STAGEnn_xxx are disabled starting from the 3.3 release.
-   Defining one of these will raise a compile-error. If you need to compile Eigen2 code,
-   <a href="http://eigen.tuxfamily.org/index.php?title=Eigen2">check this site</a>.
 - \b EIGEN_DEFAULT_DENSE_INDEX_TYPE - the type for column and row indices in matrices, vectors and array
   (DenseBase::Index). Set to \c std::ptrdiff_t by default.
 - \b EIGEN_DEFAULT_IO_FORMAT - the IOFormat to use when printing a matrix if no %IOFormat is specified.
@@ -44,7 +41,7 @@ are doing.
   preferable. Not defined by default.
   \warning See the documentation of \c EIGEN_INITIALIZE_MATRICES_BY_ZERO for a discussion on a limitations
   of these macros when applied to \c 1x1, \c 1x2, and \c 2x1 fixed-size matrices.
- - \b EIGEN_NO_AUTOMATIC_RESIZING - if defined, the matrices (or arrays) on both sides of an assignment 
+ - \b EIGEN_NO_AUTOMATIC_RESIZING - if defined, the matrices (or arrays) on both sides of an assignment
   <tt>a = b</tt> have to be of the same size; otherwise, %Eigen automatically resizes \c a so that it is of
   the correct size. Not defined by default.

@@ -72,8 +69,8 @@ The %Eigen library contains many assertions to guard against programming errors,
 run time. However, these assertions do cost time and can thus be turned off.

 - \b EIGEN_NO_DEBUG - disables %Eigen's assertions if defined. Not defined by default, unless the
-   \c NDEBUG macro is defined (this is a standard C++ macro which disables all asserts). 
- - \b EIGEN_NO_STATIC_ASSERT - if defined, compile-time static assertions are replaced by runtime assertions; 
+   \c NDEBUG macro is defined (this is a standard C++ macro which disables all asserts).
+ - \b EIGEN_NO_STATIC_ASSERT - if defined, compile-time static assertions are replaced by runtime assertions;
   this saves compilation time. Not defined by default.
 - \b eigen_assert - macro with one argument that is used inside %Eigen for assertions. By default, it is
   basically defined to be \c assert, which aborts the program if the assertion is violated. Redefine this
@@ -90,7 +87,7 @@ run time. However, these assertions do cost time and can thus be turned off.
 Let us emphasize that \c EIGEN_MAX_*_ALIGN_BYTES define only a desirable upper bound. In practice data is aligned to largest power-of-two common divisor of \c EIGEN_MAX_STATIC_ALIGN_BYTES and the size of the data, such that memory is not wasted.
 - \b \c EIGEN_DONT_PARALLELIZE - if defined, this disables multi-threading. This is only relevant if you enabled OpenMP.
   See \ref TopicMultiThreading for details.
- - \b \c EIGEN_DONT_VECTORIZE - disables explicit vectorization when defined. Not defined by default, unless 
+ - \b \c EIGEN_DONT_VECTORIZE - disables explicit vectorization when defined. Not defined by default, unless
   alignment is disabled by %Eigen's platform test or the user defining \c EIGEN_DONT_ALIGN.
 - \b \c EIGEN_UNALIGNED_VECTORIZE - disables/enables vectorization with unaligned stores. Default is 1 (enabled).
   If set to 0 (disabled), then expression for which the destination cannot be aligned are not vectorized (e.g., unaligned