diff --git a/doc/CoeffwiseMathFunctionsTable.dox b/doc/CoeffwiseMathFunctionsTable.dox
index eb0f36c1b..cda19d985 100644
--- a/doc/CoeffwiseMathFunctionsTable.dox
+++ b/doc/CoeffwiseMathFunctionsTable.dox
@@ -608,6 +608,133 @@ This also means that, unless specified, if the function \c std::foo is available
\n
+\section CoeffwiseMathFunctionsAccuracy Accuracy of vectorized math functions
+
+The following tables summarize the accuracy of %Eigen's vectorized implementations measured
+in units of ULP (Unit in the Last Place) on an x86-64 system (Intel Xeon, GCC) with SSE2 SIMD
+target. The reference values were computed using
+MPFR at 128-bit precision.
+Float results are exhaustive over all ~4.28 billion finite representable values.
+Double results sample ~2.88 billion values using a geometric stepping factor of 10-6.
+
+These numbers may differ for other SIMD targets (AVX, AVX512, NEON, SVE, etc.)
+since each has its own packet math implementations. Functions marked "delegates to std"
+do not have a custom vectorized implementation for the tested SIMD target — they call
+the standard library function element-by-element.
+
+The full histograms for each function can be generated with the \c ulp_accuracy tool
+in test/ulp_accuracy/.
+
+\subsection CoeffwiseMathFunctionsAccuracy_float Float precision
+
+
+| Function | Max |ULP| | Mean |ULP| | % Exact | Notes |
+| Trigonometric |
+| sin | 2 | 0.087 | 91.3% | \ref CoeffwiseMathFunctionsAccuracy_note1 "1" |
+| cos | 2 | 0.088 | 91.2% | \ref CoeffwiseMathFunctionsAccuracy_note1 "1" |
+| tan | 5 | 0.238 | 77.3% | \ref CoeffwiseMathFunctionsAccuracy_note1 "1" |
+| asin | 4 | 0.726 | 51.3% | |
+| acos | 4 | 0.057 | 95.0% | |
+| atan | 4 | 0.061 | 94.0% | |
+| Hyperbolic |
+| sinh | 2 | 0.017 | 98.3% | |
+| cosh | 2 | 0.004 | 99.6% | |
+| tanh | 6 | 0.030 | 97.2% | |
+| asinh | 2 | 0.145 | 85.5% | |
+| acosh | 2 | 0.057 | 94.3% | |
+| atanh | 2 | 0.004 | 99.6% | |
+| Exponential / Logarithmic |
+| exp | 1 | 0.018 | 98.2% | |
+| exp2 | 6 | 0.034 | 97.3% | |
+| expm1 | 5 | 0.060 | 94.6% | |
+| log | 3 | 0.120 | 88.0% | |
+| log1p | 5 | 0.134 | 87.5% | |
+| log10 | 2 | 0.007 | 99.3% | |
+| log2 | 5 | 0.005 | 99.5% | |
+| Error / Special |
+| erf | 7 | 0.332 | 67.5% | |
+| erfc | 8 | 0.010 | 99.2% | |
+| lgamma | delegates to std | \ref CoeffwiseMathFunctionsAccuracy_note3 "3" |
+| Other |
+| logistic | 7 | 0.040 | 97.0% | |
+| sqrt | 0 | 0.000 | 100% | Uses hardware sqrt |
+| cbrt | 2 | 0.552 | 49.1% | |
+| rsqrt | ∞ | 0.114 | 88.2% | \ref CoeffwiseMathFunctionsAccuracy_note4 "4" |
+
+
+\subsection CoeffwiseMathFunctionsAccuracy_double Double precision
+
+
+| Function | Max |ULP| | Mean |ULP| | % Exact | Notes |
+| Trigonometric |
+| sin | 13,879,755 | 0.093 | 93.2% | \ref CoeffwiseMathFunctionsAccuracy_note1 "1" |
+| cos | 2,024,130 | 0.043 | 98.2% | \ref CoeffwiseMathFunctionsAccuracy_note1 "1" |
+| tan | 13,879,755 | 0.128 | 92.7% | \ref CoeffwiseMathFunctionsAccuracy_note1 "1" |
+| asin | 1 | <0.001 | >99.9% | |
+| acos | 1 | <0.001 | 100% | |
+| atan | 5 | 0.013 | 98.8% | |
+| Hyperbolic |
+| sinh | 2 | 0.004 | 99.6% | |
+| cosh | 2 | 0.001 | 99.9% | |
+| tanh | 8 | 0.008 | 99.3% | |
+| asinh | 2 | 0.098 | 90.2% | |
+| acosh | 2 | 0.047 | 95.3% | |
+| atanh | 2 | <0.001 | >99.9% | |
+| Exponential / Logarithmic |
+| exp | 2 | 0.001 | 99.9% | |
+| exp2 | 214 | 0.107 | 99.6% | \ref CoeffwiseMathFunctionsAccuracy_note2 "2" |
+| expm1 | 3 | 0.010 | 99.1% | |
+| log | 2 | 0.147 | 85.3% | |
+| log1p | 3 | 0.097 | 90.6% | |
+| log10 | 2 | 0.001 | 99.9% | |
+| log2 | 2 | <0.001 | 99.9% | |
+| Error / Special |
+| erf | ∞ | 0.050 | 70.5% | \ref CoeffwiseMathFunctionsAccuracy_note5 "5" |
+| erfc | 11 | 0.002 | 99.9% | |
+| lgamma | delegates to std | \ref CoeffwiseMathFunctionsAccuracy_note3 "3" |
+| Other |
+| logistic | 3 | 0.008 | 99.2% | |
+| sqrt | 0 | 0.000 | 100% | Uses hardware sqrt |
+| cbrt | 2 | 0.119 | 88.1% | |
+| rsqrt | ∞ | 0.135 | 86.5% | \ref CoeffwiseMathFunctionsAccuracy_note4 "4" |
+
+
+\subsection CoeffwiseMathFunctionsAccuracy_notes Notes
+
+\anchor CoeffwiseMathFunctionsAccuracy_note1
+1. sin/cos/tan argument reduction:
+%Eigen's vectorized sin, cos, and tan use a Cody-Waite argument reduction scheme that
+subtracts multiples of π/2 from the input. For very large arguments (|x| > ~104
+in float, |x| > ~10 in double), this reduction loses precision, producing
+occasional large ULP errors. The mean error remains low because most representable
+values are small. Applications that need high accuracy for large arguments should
+perform argument reduction in user code before calling these functions.
+
+\anchor CoeffwiseMathFunctionsAccuracy_note2
+2. exp2 double precision:
+The exp2 implementation for double shows a max error of 214 ULP near the overflow
+boundary (x ≈ 1022). The mean error is still low (0.107 ULP, 99.6% exact), so
+the large max error affects only inputs very close to overflow.
+
+\anchor CoeffwiseMathFunctionsAccuracy_note3
+3. lgamma:
+The vectorized lgamma delegates to the standard library function \c std::lgamma for
+this SIMD target (SSE2) and therefore has the same accuracy as the platform's C math library.
+
+\anchor CoeffwiseMathFunctionsAccuracy_note4
+4. rsqrt max=∞:
+The infinite max ULP is due to a sign disagreement at a single subnormal input:
+rsqrt(-0) returns -∞ in %Eigen but the MPFR reference produces NaN (rsqrt of a negative
+value). Ignoring this edge case, the implementation is accurate (< 2 ULP) everywhere else.
+For float, 16.8 million subnormal negative inputs (0.4%) also produce ±∞ vs NaN.
+The mean error excluding these outliers is well below 1 ULP.
+
+\anchor CoeffwiseMathFunctionsAccuracy_note5
+5. erf double ∞:
+The vectorized erf for double returns NaN for ±∞ instead of the correct ±1.
+This produces an infinite max ULP error at a single input value. Excluding ±∞,
+the max error is 3 ULP and the mean is 0.050 ULP.
+
*/
}
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 285977443..0555bfe63 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -532,6 +532,18 @@ if(EIGEN_TEST_BUILD_DOCUMENTATION)
add_dependencies(buildtests doc)
endif()
+# ULP accuracy measurement tool (see test/ulp_accuracy/README.md)
+find_package(MPFR)
+find_package(GMP)
+add_executable(ulp_accuracy ulp_accuracy/ulp_accuracy.cpp)
+target_compile_options(ulp_accuracy PRIVATE -pthread)
+target_link_libraries(ulp_accuracy Eigen3::Eigen ${CMAKE_THREAD_LIBS_INIT} pthread)
+if(MPFR_FOUND AND GMP_FOUND)
+ target_include_directories(ulp_accuracy PRIVATE ${MPFR_INCLUDES} ${GMP_INCLUDES})
+ target_link_libraries(ulp_accuracy ${MPFR_LIBRARIES} ${GMP_LIBRARIES})
+ target_compile_definitions(ulp_accuracy PRIVATE EIGEN_HAS_MPFR)
+endif()
+
# Register all smoke tests
include("EigenSmokeTestList")
ei_add_smoke_tests("${ei_smoke_test_list}")
diff --git a/test/ulp_accuracy/README.md b/test/ulp_accuracy/README.md
new file mode 100644
index 000000000..440d66844
--- /dev/null
+++ b/test/ulp_accuracy/README.md
@@ -0,0 +1,145 @@
+# ULP Accuracy Measurement Tool
+
+Standalone tool for measuring the accuracy of Eigen's vectorized math functions
+in units of ULP (Unit in the Last Place). Compares Eigen's SIMD implementations
+against either MPFR (128-bit high-precision reference) or the standard C++ math
+library.
+
+## Building
+
+From the Eigen build directory:
+
+```bash
+cd build
+cmake ..
+cmake --build . --target ulp_accuracy
+```
+
+If MPFR and GMP are installed, the build automatically enables MPFR support
+(`EIGEN_HAS_MPFR`). Without them, only `--ref=std` is available.
+
+### Installing MPFR (Debian/Ubuntu)
+
+```bash
+sudo apt install libmpfr-dev libgmp-dev
+```
+
+## Usage
+
+```
+./test/ulp_accuracy [options]
+
+Options:
+ --func=NAME Function to test (required unless --list)
+ --lo=VAL Start of range (default: -inf)
+ --hi=VAL End of range (default: +inf)
+ --double Test double precision (default: float)
+ --step=EPS Sampling step: advance by (1+EPS)*nextafter(x)
+ (default: 0 = exhaustive; useful for double, e.g. 1e-6)
+ --threads=N Number of threads (default: all cores)
+ --batch=N Batch size for Eigen eval (default: 4096)
+ --ref=MODE Reference: 'std' (default) or 'mpfr'
+ --hist_width=N Histogram half-width in ULPs (default: 10)
+ --list List available functions
+```
+
+## Examples
+
+List all supported functions:
+```bash
+./test/ulp_accuracy --list
+```
+
+Exhaustive float test of sin against std (tests all ~4.28 billion finite floats):
+```bash
+./test/ulp_accuracy --func=sin
+```
+
+Float test against MPFR (more accurate reference, but slower):
+```bash
+./test/ulp_accuracy --func=sin --ref=mpfr
+```
+
+Double precision test with geometric sampling (exhaustive is impractical for double):
+```bash
+./test/ulp_accuracy --func=exp --double --step=1e-6
+```
+
+Test a specific range:
+```bash
+./test/ulp_accuracy --func=sin --lo=0 --hi=6.2832
+```
+
+## Output
+
+The tool prints:
+
+- **Test configuration**: function, range, reference mode, thread count
+- **Max |ULP error|**: worst-case absolute ULP error with the offending input value
+- **Mean |ULP error|**: average absolute ULP error across all tested values
+- **Signed ULP histogram**: distribution of signed errors showing bias direction
+
+Example output:
+```
+Function: sin (float)
+Range: [-inf, inf]
+Representable values in range: 4278190082
+Reference: MPFR (128-bit)
+Threads: 32
+Batch size: 4096
+
+Results:
+ Values tested: 4278190081
+ Time: 529.04 seconds (8.1 Mvalues/s)
+ Max |ULP error|: 2
+ at x = -1.5413464e+38 (Eigen=-0.482218683, ref=-0.482218742)
+ Mean |ULP error|: 0.0874
+
+Signed ULP error histogram [-10, +10]:
+ -2 : 51988 ( 0.001%)
+ -1 : 186805349 ( 4.366%)
+ 0 : 3904475407 ( 91.265%)
+ 1 : 186805349 ( 4.366%)
+ 2 : 51988 ( 0.001%)
+```
+
+## How it works
+
+1. **Range splitting**: The input range is divided evenly across threads by
+ splitting the linear ULP space.
+
+2. **Batched evaluation**: Each thread fills batches of input values, evaluates
+ them through Eigen's vectorized path (using `Eigen::Array` operations), and
+ computes reference values one at a time.
+
+3. **ULP computation**: IEEE 754 bit patterns are mapped to a linear integer
+ scale where adjacent representable values are adjacent integers. The signed
+ ULP error is the difference between Eigen's result and the reference on this
+ scale. Special cases (NaN, infinity mismatches) report infinite error.
+
+4. **Result reduction**: Per-thread statistics (max error, mean error, histogram)
+ are merged after all threads complete.
+
+## Supported functions
+
+| Category | Functions |
+|----------|-----------|
+| Trigonometric | sin, cos, tan, asin, acos, atan |
+| Hyperbolic | sinh, cosh, tanh, asinh, acosh, atanh |
+| Exponential/Log | exp, exp2, expm1, log, log1p, log10, log2 |
+| Error/Gamma | erf, erfc, lgamma |
+| Other | logistic, sqrt, cbrt, rsqrt |
+
+## File organization
+
+- `ulp_accuracy.cpp` — Main tool: ULP computation, worker threads, CLI, result printing
+- `mpfr_reference.h` — MPFR reference function wrappers and scalar conversion helpers
+
+## Performance tips
+
+- Float exhaustive sweeps test ~4.28 billion values. With `--ref=std` this takes
+ ~50 seconds per function; with `--ref=mpfr` it takes ~500 seconds (10x slower).
+- For double precision, exhaustive testing is impractical. Use `--step=1e-6` to
+ sample ~2.88 billion values geometrically.
+- Thread count defaults to all available cores. MPFR is the bottleneck (single
+ MPFR call per value per thread), so more cores help significantly.
diff --git a/test/ulp_accuracy/mpfr_reference.h b/test/ulp_accuracy/mpfr_reference.h
new file mode 100644
index 000000000..37c691155
--- /dev/null
+++ b/test/ulp_accuracy/mpfr_reference.h
@@ -0,0 +1,78 @@
+// MPFR high-precision reference implementations for ULP accuracy testing.
+//
+// This header provides MPFR-based reference functions for all math operations
+// tested by the ulp_accuracy tool. It also includes scalar conversion helpers
+// between float/double and mpfr_t.
+//
+// Only compiled when EIGEN_HAS_MPFR is defined (i.e., MPFR and GMP are found).
+
+#ifndef EIGEN_ULP_ACCURACY_MPFR_REFERENCE_H
+#define EIGEN_ULP_ACCURACY_MPFR_REFERENCE_H
+
+#ifdef EIGEN_HAS_MPFR
+#include
+
+// ---------------------------------------------------------------------------
+// Scalar <-> mpfr_t conversion
+// ---------------------------------------------------------------------------
+
+template
+static void mpfr_set_scalar(mpfr_t rop, Scalar x, mpfr_rnd_t rnd);
+template <>
+void mpfr_set_scalar(mpfr_t rop, float x, mpfr_rnd_t rnd) {
+ mpfr_set_flt(rop, x, rnd);
+}
+template <>
+void mpfr_set_scalar(mpfr_t rop, double x, mpfr_rnd_t rnd) {
+ mpfr_set_d(rop, x, rnd);
+}
+
+template
+static Scalar mpfr_get_scalar(mpfr_t op, mpfr_rnd_t rnd);
+template <>
+float mpfr_get_scalar(mpfr_t op, mpfr_rnd_t rnd) {
+ return mpfr_get_flt(op, rnd);
+}
+template <>
+double mpfr_get_scalar(mpfr_t op, mpfr_rnd_t rnd) {
+ return mpfr_get_d(op, rnd);
+}
+
+// ---------------------------------------------------------------------------
+// MPFR wrappers for functions not directly provided by libmpfr
+// ---------------------------------------------------------------------------
+
+// logistic(x) = 1 / (1 + exp(-x))
+static int mpfr_logistic(mpfr_t rop, const mpfr_t op, mpfr_rnd_t rnd) {
+ mpfr_t tmp, one;
+ mpfr_init2(tmp, mpfr_get_prec(rop));
+ mpfr_init2(one, mpfr_get_prec(rop));
+ mpfr_set_ui(one, 1, rnd);
+ mpfr_neg(tmp, op, rnd);
+ mpfr_exp(tmp, tmp, rnd);
+ mpfr_add(tmp, tmp, one, rnd);
+ int ret = mpfr_div(rop, one, tmp, rnd);
+ mpfr_clear(tmp);
+ mpfr_clear(one);
+ return ret;
+}
+
+// rsqrt(x) = 1 / sqrt(x)
+static int mpfr_rsqrt(mpfr_t rop, const mpfr_t op, mpfr_rnd_t rnd) { return mpfr_rec_sqrt(rop, op, rnd); }
+
+// exp2(x) = 2^x
+static int mpfr_exp2_wrap(mpfr_t rop, const mpfr_t op, mpfr_rnd_t rnd) {
+ mpfr_t two;
+ mpfr_init2(two, mpfr_get_prec(rop));
+ mpfr_set_ui(two, 2, rnd);
+ int ret = mpfr_pow(rop, two, op, rnd);
+ mpfr_clear(two);
+ return ret;
+}
+
+// log2(x) — thin wrapper to match the function signature convention.
+static int mpfr_log2_wrap(mpfr_t rop, const mpfr_t op, mpfr_rnd_t rnd) { return mpfr_log2(rop, op, rnd); }
+
+#endif // EIGEN_HAS_MPFR
+
+#endif // EIGEN_ULP_ACCURACY_MPFR_REFERENCE_H
diff --git a/test/ulp_accuracy/ulp_accuracy.cpp b/test/ulp_accuracy/ulp_accuracy.cpp
new file mode 100644
index 000000000..658d2343d
--- /dev/null
+++ b/test/ulp_accuracy/ulp_accuracy.cpp
@@ -0,0 +1,611 @@
+// Standalone tool to measure ULP accuracy of Eigen's vectorized math functions
+// against either MPFR (high-precision reference) or std C++ math functions.
+//
+// See README.md in this directory for full documentation.
+//
+// Usage:
+// ./ulp_accuracy --func=sin --lo=0 --hi=6.2832 --threads=16
+// ./ulp_accuracy --func=exp --threads=16
+// ./ulp_accuracy --func=sin --ref=mpfr
+// ./ulp_accuracy --func=sin --double --step=1e-6
+// ./ulp_accuracy --list
+//
+// Build:
+// cd build && cmake .. && make ulp_accuracy
+
+#include
+#include
+
+#ifdef EIGEN_HAS_MPFR
+#include
+#endif
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+#include "mpfr_reference.h"
+
+// ============================================================================
+// ULP distance computation
+// ============================================================================
+
+// Maps IEEE 754 bits to a linear integer scale where adjacent representable
+// values are adjacent integers. The mapping is strictly monotonic:
+// -inf -> most negative, -0.0 -> -1, +0.0 -> 0, +inf -> most positive.
+static inline int64_t scalar_to_linear(float x) {
+ int32_t bits;
+ std::memcpy(&bits, &x, sizeof(bits));
+ if (bits < 0) {
+ bits = static_cast(INT32_MIN) - bits - 1;
+ }
+ return static_cast(bits);
+}
+
+static inline int64_t scalar_to_linear(double x) {
+ int64_t bits;
+ std::memcpy(&bits, &x, sizeof(bits));
+ if (bits < 0) {
+ bits = static_cast(INT64_MIN) - bits - 1;
+ }
+ return bits;
+}
+
+// Returns (eigen_val - ref_val) in ULP space.
+// Positive means Eigen overestimates, negative means it underestimates.
+// Returns INT64_MAX for incomparable values (NaN vs number, inf mismatch).
+template
+static inline int64_t signed_ulp_error(Scalar eigen_val, Scalar ref_val) {
+ if (eigen_val == ref_val) return 0; // also handles -0.0 == +0.0
+ bool e_nan = std::isnan(eigen_val), r_nan = std::isnan(ref_val);
+ if (e_nan && r_nan) return 0;
+ if (e_nan || r_nan) return INT64_MAX;
+ if (std::isinf(eigen_val) || std::isinf(ref_val)) return INT64_MAX;
+ int64_t a = scalar_to_linear(eigen_val);
+ int64_t b = scalar_to_linear(ref_val);
+ // Overflow check for a - b.
+ if (b > 0 && a < INT64_MIN + b) return INT64_MAX;
+ if (b < 0 && a > INT64_MAX + b) return INT64_MAX;
+ return a - b;
+}
+
+// ============================================================================
+// Per-thread accumulator with signed ULP histogram
+// ============================================================================
+
+template
+struct alignas(128) ThreadResult {
+ int64_t max_abs_ulp = 0;
+ Scalar max_ulp_at = Scalar(0);
+ Scalar max_ulp_eigen = Scalar(0);
+ Scalar max_ulp_ref = Scalar(0);
+ double abs_ulp_sum = 0.0;
+ uint64_t count = 0;
+
+ // Signed histogram: bins for errors in [-hist_width, +hist_width],
+ // plus two overflow bins for < -hist_width and > +hist_width.
+ // Layout: [<-W] [-W] [-W+1] ... [0] ... [W-1] [W] [>W]
+ // Total bins = 2*hist_width + 3
+ int hist_width = 0;
+ std::vector hist;
+
+ void init(int w) {
+ hist_width = w;
+ hist.assign(2 * w + 3, 0);
+ }
+
+ void record(int64_t signed_err, Scalar x, Scalar eigen_val, Scalar ref_val) {
+ int64_t abs_err = signed_err < 0 ? -signed_err : signed_err;
+ if (signed_err == INT64_MAX) abs_err = INT64_MAX;
+
+ if (abs_err > max_abs_ulp) {
+ max_abs_ulp = abs_err;
+ max_ulp_at = x;
+ max_ulp_eigen = eigen_val;
+ max_ulp_ref = ref_val;
+ }
+ if (abs_err != INT64_MAX) {
+ abs_ulp_sum += static_cast(abs_err);
+ }
+ count++;
+
+ // Histogram bin.
+ int bin;
+ if (signed_err == INT64_MAX || signed_err > hist_width) {
+ bin = 2 * hist_width + 2; // overflow high
+ } else if (signed_err < -hist_width) {
+ bin = 0; // overflow low
+ } else {
+ bin = static_cast(signed_err) + hist_width + 1;
+ }
+ hist[bin]++;
+ }
+};
+
+// ============================================================================
+// Function registry
+// ============================================================================
+
+template
+struct FuncEntry {
+ using ArrayType = Eigen::Array;
+ using EigenEval = std::function, const Eigen::Ref&)>;
+ using StdEval = std::function;
+
+#ifdef EIGEN_HAS_MPFR
+ using MpfrEval = std::function;
+#endif
+
+ std::string name;
+ EigenEval eigen_eval;
+ StdEval std_eval;
+#ifdef EIGEN_HAS_MPFR
+ MpfrEval mpfr_eval;
+#endif
+ Scalar default_lo;
+ Scalar default_hi;
+};
+
+// std::logistic is not part of the C++ standard library.
+template
+static Scalar std_logistic(Scalar x) {
+ if (x >= 0) {
+ Scalar e = std::exp(-x);
+ return Scalar(1) / (Scalar(1) + e);
+ } else {
+ Scalar e = std::exp(x);
+ return e / (Scalar(1) + e);
+ }
+}
+
+template
+static std::vector> build_func_table() {
+ using ArrayType = Eigen::Array;
+ std::vector> table;
+
+#ifdef EIGEN_HAS_MPFR
+#define ADD_FUNC(fname, eigen_expr, std_expr, mpfr_fn, lo, hi) \
+ table.push_back({#fname, [](Eigen::Ref out, const Eigen::Ref& a) { out = eigen_expr; }, \
+ [](Scalar x) -> Scalar { return std_expr; }, \
+ [](mpfr_t r, const mpfr_t o, mpfr_rnd_t d) { return mpfr_fn(r, o, d); }, lo, hi})
+#else
+#define ADD_FUNC(fname, eigen_expr, std_expr, mpfr_fn, lo, hi) \
+ table.push_back({#fname, [](Eigen::Ref out, const Eigen::Ref& a) { out = eigen_expr; }, \
+ [](Scalar x) -> Scalar { return std_expr; }, lo, hi})
+#endif
+
+ constexpr Scalar kInf = std::numeric_limits::infinity();
+
+ // Trigonometric
+ // clang-format off
+ ADD_FUNC(sin, a.sin(), std::sin(x), mpfr_sin, -kInf, kInf);
+ ADD_FUNC(cos, a.cos(), std::cos(x), mpfr_cos, -kInf, kInf);
+ ADD_FUNC(tan, a.tan(), std::tan(x), mpfr_tan, -kInf, kInf);
+ ADD_FUNC(asin, a.asin(), std::asin(x), mpfr_asin, -kInf, kInf);
+ ADD_FUNC(acos, a.acos(), std::acos(x), mpfr_acos, -kInf, kInf);
+ ADD_FUNC(atan, a.atan(), std::atan(x), mpfr_atan, -kInf, kInf);
+
+ // Hyperbolic
+ ADD_FUNC(sinh, a.sinh(), std::sinh(x), mpfr_sinh, -kInf, kInf);
+ ADD_FUNC(cosh, a.cosh(), std::cosh(x), mpfr_cosh, -kInf, kInf);
+ ADD_FUNC(tanh, a.tanh(), std::tanh(x), mpfr_tanh, -kInf, kInf);
+ ADD_FUNC(asinh, a.asinh(), std::asinh(x), mpfr_asinh, -kInf, kInf);
+ ADD_FUNC(acosh, a.acosh(), std::acosh(x), mpfr_acosh, -kInf, kInf);
+ ADD_FUNC(atanh, a.atanh(), std::atanh(x), mpfr_atanh, -kInf, kInf);
+
+ // Exponential / Logarithmic
+ ADD_FUNC(exp, a.exp(), std::exp(x), mpfr_exp, -kInf, kInf);
+ ADD_FUNC(exp2, a.exp2(), std::exp2(x), mpfr_exp2_wrap, -kInf, kInf);
+ ADD_FUNC(expm1, a.expm1(), std::expm1(x), mpfr_expm1, -kInf, kInf);
+ ADD_FUNC(log, a.log(), std::log(x), mpfr_log, -kInf, kInf);
+ ADD_FUNC(log1p, a.log1p(), std::log1p(x), mpfr_log1p, -kInf, kInf);
+ ADD_FUNC(log10, a.log10(), std::log10(x), mpfr_log10, -kInf, kInf);
+ ADD_FUNC(log2, a.log2(), std::log2(x), mpfr_log2_wrap, -kInf, kInf);
+
+ // Error / Gamma
+ ADD_FUNC(erf, a.erf(), std::erf(x), mpfr_erf, -kInf, kInf);
+ ADD_FUNC(erfc, a.erfc(), std::erfc(x), mpfr_erfc, -kInf, kInf);
+ ADD_FUNC(lgamma, a.lgamma(), std::lgamma(x), mpfr_lngamma, -kInf, kInf);
+
+ // Other
+ ADD_FUNC(logistic, a.logistic(), std_logistic(x), mpfr_logistic, -kInf, kInf);
+ ADD_FUNC(sqrt, a.sqrt(), std::sqrt(x), mpfr_sqrt, -kInf, kInf);
+ ADD_FUNC(cbrt, a.cbrt(), std::cbrt(x), mpfr_cbrt, -kInf, kInf);
+ ADD_FUNC(rsqrt, a.rsqrt(), Scalar(1)/std::sqrt(x), mpfr_rsqrt, -kInf, kInf);
+ // clang-format on
+
+#undef ADD_FUNC
+ return table;
+}
+
+// ============================================================================
+// Range iteration helpers
+// ============================================================================
+
+// Advances x toward +inf by at least 1 ULP. When step_eps > 0, additionally
+// jumps by a relative factor of (1 + step_eps) to sample the range sparsely.
+template
+static inline Scalar advance_by_step(Scalar x, double step_eps) {
+ Scalar next = std::nextafter(x, std::numeric_limits::infinity());
+ if (step_eps > 0.0 && std::isfinite(next)) {
+ // Try to jump further by a relative amount.
+ Scalar jumped = next > 0 ? next * static_cast(1.0 + step_eps) : next / static_cast(1.0 + step_eps);
+ // Use the jump only if it actually advances further (handles denormal stalling).
+ if (jumped > next) next = jumped;
+ }
+ return next;
+}
+
+// Counts the number of representable scalars in [lo, hi].
+template
+static uint64_t count_scalars_in_range(Scalar lo, Scalar hi) {
+ if (lo > hi) return 0;
+ uint64_t lo_u = static_cast(scalar_to_linear(lo));
+ uint64_t hi_u = static_cast(scalar_to_linear(hi));
+ uint64_t diff = hi_u - lo_u;
+ return diff == UINT64_MAX ? UINT64_MAX : diff + 1;
+}
+
+// Advances a scalar by n ULPs in the linear representation.
+static float advance_scalar(float x, uint64_t n) {
+ int64_t lin = scalar_to_linear(x);
+ lin += static_cast(n);
+ int32_t ibits;
+ if (lin < 0) {
+ ibits = static_cast(INT32_MIN) - static_cast(lin) - 1;
+ } else {
+ ibits = static_cast(lin);
+ }
+ float result;
+ std::memcpy(&result, &ibits, sizeof(result));
+ return result;
+}
+
+static double advance_scalar(double x, uint64_t n) {
+ int64_t lin = scalar_to_linear(x);
+ lin += static_cast(n);
+ int64_t ibits;
+ if (lin < 0) {
+ ibits = static_cast(INT64_MIN) - lin - 1;
+ } else {
+ ibits = lin;
+ }
+ double result;
+ std::memcpy(&result, &ibits, sizeof(result));
+ return result;
+}
+
+// ============================================================================
+// Worker thread: evaluates Eigen and reference over a subrange
+// ============================================================================
+
+template
+static void worker(const FuncEntry& func, Scalar lo, Scalar hi, int batch_size, bool use_mpfr, double step_eps,
+ ThreadResult& result) {
+ using ArrayType = Eigen::Array;
+ ArrayType input(batch_size);
+ ArrayType eigen_out(batch_size);
+ std::vector ref_out(batch_size);
+
+#ifdef EIGEN_HAS_MPFR
+ mpfr_t mp_in, mp_out;
+ if (use_mpfr) {
+ mpfr_init2(mp_in, 128);
+ mpfr_init2(mp_out, 128);
+ }
+#else
+ (void)use_mpfr;
+#endif
+
+ auto process_batch = [&](int n, const ArrayType& in, const ArrayType& eig) {
+ for (int i = 0; i < n; i++) {
+#ifdef EIGEN_HAS_MPFR
+ if (use_mpfr) {
+ mpfr_set_scalar(mp_in, in[i], MPFR_RNDN);
+ func.mpfr_eval(mp_out, mp_in, MPFR_RNDN);
+ ref_out[i] = mpfr_get_scalar(mp_out, MPFR_RNDN);
+ } else
+#endif
+ {
+ ref_out[i] = func.std_eval(in[i]);
+ }
+ }
+ for (int i = 0; i < n; i++) {
+ int64_t err = signed_ulp_error(eig[i], ref_out[i]);
+ result.record(err, in[i], eig[i], ref_out[i]);
+ }
+ };
+
+ int idx = 0;
+ Scalar x = lo;
+ for (;;) {
+ input[idx] = x;
+ idx++;
+
+ if (idx == batch_size) {
+ func.eigen_eval(eigen_out, input);
+ process_batch(batch_size, input, eigen_out);
+ idx = 0;
+ }
+
+ if (x >= hi) break;
+ Scalar next = advance_by_step(x, step_eps);
+ x = (next > hi) ? hi : next;
+ }
+
+ // Process remaining partial batch.
+ if (idx > 0) {
+ auto partial_in = input.head(idx);
+ auto partial_eigen = eigen_out.head(idx);
+ func.eigen_eval(partial_eigen, partial_in);
+ process_batch(idx, input, eigen_out);
+ }
+
+#ifdef EIGEN_HAS_MPFR
+ if (use_mpfr) {
+ mpfr_clear(mp_in);
+ mpfr_clear(mp_out);
+ }
+#endif
+}
+
+// ============================================================================
+// Test driver: splits range across threads and prints results
+// ============================================================================
+
+struct Options {
+ std::string func_name;
+ double lo = std::numeric_limits::quiet_NaN();
+ double hi = std::numeric_limits::quiet_NaN();
+ int num_threads;
+ int batch_size = 4096;
+ int hist_width = 10;
+ bool use_mpfr = false;
+ bool use_double = false;
+ double step_eps = 0.0;
+ bool list_funcs = false;
+};
+
+template
+static int run_test(const Options& opts) {
+ const int kDigits = std::is_same::value ? 9 : 17;
+ const char* kTypeName = std::is_same::value ? "float" : "double";
+
+ auto table = build_func_table();
+
+ if (opts.list_funcs) {
+ std::printf("Available functions:\n");
+ for (const auto& f : table) {
+ std::printf(" %s\n", f.name.c_str());
+ }
+ return 0;
+ }
+
+ // Look up the requested function.
+ const FuncEntry* entry = nullptr;
+ for (const auto& f : table) {
+ if (f.name == opts.func_name) {
+ entry = &f;
+ break;
+ }
+ }
+ if (!entry) {
+ std::fprintf(stderr, "Error: unknown function '%s' (use --list to see available functions)\n",
+ opts.func_name.c_str());
+ return 1;
+ }
+
+ Scalar lo = std::isnan(opts.lo) ? entry->default_lo : static_cast(opts.lo);
+ Scalar hi = std::isnan(opts.hi) ? entry->default_hi : static_cast(opts.hi);
+ uint64_t total_scalars = count_scalars_in_range(lo, hi);
+ int num_threads = opts.num_threads;
+
+ // Print test configuration.
+ std::printf("Function: %s (%s)\n", opts.func_name.c_str(), kTypeName);
+ std::printf("Range: [%.*g, %.*g]\n", kDigits, double(lo), kDigits, double(hi));
+ if (opts.step_eps > 0.0) {
+ std::printf("Sampling step: (1 + %g) * nextafter(x)\n", opts.step_eps);
+ } else {
+ std::printf("Representable values in range: %lu\n", static_cast(total_scalars));
+ }
+ std::printf("Reference: %s\n", opts.use_mpfr ? "MPFR (128-bit)" : "std C++ math");
+ std::printf("Threads: %d\n", num_threads);
+ std::printf("Batch size: %d\n", opts.batch_size);
+ std::printf("\n");
+ std::fflush(stdout);
+
+ // Split range across threads.
+ if (total_scalars > 0 && static_cast(num_threads) > total_scalars) {
+ num_threads = static_cast(total_scalars);
+ }
+ if (num_threads < 1) num_threads = 1;
+
+ // Heap-allocate each ThreadResult separately to avoid false sharing.
+ std::vector>> results;
+ results.reserve(num_threads);
+ for (int t = 0; t < num_threads; t++) {
+ results.push_back(std::make_unique>());
+ results.back()->init(opts.hist_width);
+ }
+
+ std::vector threads;
+ uint64_t scalars_per_thread = total_scalars / num_threads;
+ Scalar chunk_lo = lo;
+
+ auto start_time = std::chrono::steady_clock::now();
+
+ for (int t = 0; t < num_threads; t++) {
+ Scalar chunk_hi;
+ if (t == num_threads - 1) {
+ chunk_hi = hi;
+ } else {
+ chunk_hi = advance_scalar(chunk_lo, scalars_per_thread - 1);
+ }
+ threads.emplace_back(worker, std::cref(*entry), chunk_lo, chunk_hi, opts.batch_size, opts.use_mpfr,
+ opts.step_eps, std::ref(*results[t]));
+ chunk_lo = std::nextafter(chunk_hi, std::numeric_limits::infinity());
+ }
+
+ for (auto& t : threads) t.join();
+ auto end_time = std::chrono::steady_clock::now();
+ double elapsed = std::chrono::duration(end_time - start_time).count();
+
+ // Reduce per-thread results.
+ ThreadResult global;
+ global.init(opts.hist_width);
+ for (int t = 0; t < num_threads; t++) {
+ const auto& r = *results[t];
+ if (r.max_abs_ulp > global.max_abs_ulp) {
+ global.max_abs_ulp = r.max_abs_ulp;
+ global.max_ulp_at = r.max_ulp_at;
+ global.max_ulp_eigen = r.max_ulp_eigen;
+ global.max_ulp_ref = r.max_ulp_ref;
+ }
+ global.abs_ulp_sum += r.abs_ulp_sum;
+ global.count += r.count;
+ for (size_t b = 0; b < global.hist.size(); b++) {
+ global.hist[b] += r.hist[b];
+ }
+ }
+
+ double mean_ulp = global.count > 0 ? global.abs_ulp_sum / global.count : 0.0;
+
+ // Print results.
+ std::printf("Results:\n");
+ std::printf(" Values tested: %lu\n", static_cast(global.count));
+ std::printf(" Time: %.2f seconds (%.1f Mvalues/s)\n", elapsed, global.count / elapsed / 1e6);
+ if (global.max_abs_ulp == INT64_MAX) {
+ std::printf(" Max |ULP error|: inf\n");
+ } else {
+ std::printf(" Max |ULP error|: %ld\n", static_cast(global.max_abs_ulp));
+ }
+ std::printf(" at x = %.*g (Eigen=%.*g, ref=%.*g)\n", kDigits, double(global.max_ulp_at), kDigits,
+ double(global.max_ulp_eigen), kDigits, double(global.max_ulp_ref));
+ std::printf(" Mean |ULP error|: %.4f\n", mean_ulp);
+ std::printf("\n");
+
+ // Print signed error histogram.
+ std::printf("Signed ULP error histogram [-%d, +%d]:\n", opts.hist_width, opts.hist_width);
+ int nbins = 2 * opts.hist_width + 3;
+ for (int b = 0; b < nbins; b++) {
+ if (global.hist[b] == 0) continue;
+ double pct = 100.0 * global.hist[b] / global.count;
+ if (b == 0) {
+ std::printf(" <%-4d: %12lu (%7.3f%%)\n", -opts.hist_width, static_cast(global.hist[b]), pct);
+ } else if (b == nbins - 1) {
+ std::printf(" >%-4d: %12lu (%7.3f%%)\n", opts.hist_width, static_cast(global.hist[b]), pct);
+ } else {
+ int err = b - opts.hist_width - 1;
+ std::printf(" %-5d: %12lu (%7.3f%%)\n", err, static_cast(global.hist[b]), pct);
+ }
+ }
+
+ return 0;
+}
+
+// ============================================================================
+// Command-line parsing
+// ============================================================================
+
+static void print_usage() {
+ std::printf(
+ "Usage: ulp_accuracy [options]\n"
+ " --func=NAME Function to test (required unless --list)\n"
+ " --lo=VAL Start of range (default: -inf)\n"
+ " --hi=VAL End of range (default: +inf)\n"
+ " --double Test double precision (default: float)\n"
+ " --step=EPS Sampling step: advance by (1+EPS)*nextafter(x)\n"
+ " (default: 0 = exhaustive; useful for double, e.g. 1e-6)\n"
+ " --threads=N Number of threads (default: all cores)\n"
+ " --batch=N Batch size for Eigen eval (default: 4096)\n"
+ " --ref=MODE Reference: 'std' (default) or 'mpfr'\n"
+ " --hist_width=N Histogram half-width in ULPs (default: 10)\n"
+ " --list List available functions\n");
+}
+
+int main(int argc, char* argv[]) {
+ Options opts;
+ opts.num_threads = static_cast(std::thread::hardware_concurrency());
+ if (opts.num_threads == 0) opts.num_threads = 4;
+ std::string ref_mode;
+
+ for (int i = 1; i < argc; i++) {
+ std::string arg = argv[i];
+ if (arg.substr(0, 7) == "--func=") {
+ opts.func_name = arg.substr(7);
+ } else if (arg.substr(0, 5) == "--lo=") {
+ std::string val = arg.substr(5);
+ if (val == "inf")
+ opts.lo = std::numeric_limits::infinity();
+ else if (val == "-inf")
+ opts.lo = -std::numeric_limits::infinity();
+ else
+ opts.lo = std::stod(val);
+ } else if (arg.substr(0, 5) == "--hi=") {
+ std::string val = arg.substr(5);
+ if (val == "inf")
+ opts.hi = std::numeric_limits::infinity();
+ else if (val == "-inf")
+ opts.hi = -std::numeric_limits::infinity();
+ else
+ opts.hi = std::stod(val);
+ } else if (arg.substr(0, 10) == "--threads=") {
+ opts.num_threads = std::stoi(arg.substr(10));
+ } else if (arg.substr(0, 8) == "--batch=") {
+ opts.batch_size = std::stoi(arg.substr(8));
+ } else if (arg.substr(0, 6) == "--ref=") {
+ ref_mode = arg.substr(6);
+ } else if (arg.substr(0, 13) == "--hist_width=") {
+ opts.hist_width = std::stoi(arg.substr(13));
+ } else if (arg.substr(0, 7) == "--step=") {
+ opts.step_eps = std::stod(arg.substr(7));
+ } else if (arg == "--double") {
+ opts.use_double = true;
+ } else if (arg == "--list") {
+ opts.list_funcs = true;
+ } else if (arg == "--help" || arg == "-h") {
+ print_usage();
+ return 0;
+ } else {
+ std::fprintf(stderr, "Unknown option: %s\n", arg.c_str());
+ print_usage();
+ return 1;
+ }
+ }
+
+ // Determine reference mode (default: std).
+ if (ref_mode.empty() || ref_mode == "std") {
+ opts.use_mpfr = false;
+ } else if (ref_mode == "mpfr") {
+#ifdef EIGEN_HAS_MPFR
+ opts.use_mpfr = true;
+#else
+ std::fprintf(stderr, "Error: MPFR support not compiled in. Use --ref=std or rebuild with MPFR.\n");
+ return 1;
+#endif
+ } else {
+ std::fprintf(stderr, "Error: --ref must be 'std' or 'mpfr'\n");
+ return 1;
+ }
+
+ if (!opts.list_funcs && opts.func_name.empty()) {
+ std::fprintf(stderr, "Error: --func=NAME is required (use --list to see available functions)\n");
+ return 1;
+ }
+
+ if (opts.use_double) {
+ return run_test(opts);
+ } else {
+ return run_test(opts);
+ }
+}