From dd5d390daf3a3a561a772b64f1b602e5f240bf8b Mon Sep 17 00:00:00 2001 From: Till Hoffmann Date: Fri, 1 Apr 2016 13:32:29 +0100 Subject: [PATCH 1/7] Added zeta function. --- Eigen/src/Core/GenericPacketMath.h | 5 + Eigen/src/Core/GlobalFunctions.h | 1 + Eigen/src/Core/SpecialFunctions.h | 189 ++++++++++++++++++ Eigen/src/Core/arch/CUDA/MathFunctions.h | 14 ++ Eigen/src/Core/arch/CUDA/PacketMath.h | 1 + Eigen/src/Core/functors/UnaryFunctors.h | 22 ++ Eigen/src/plugins/ArrayCwiseUnaryOps.h | 9 + test/array.cpp | 9 + .../Eigen/CXX11/src/Tensor/TensorBase.h | 6 + 9 files changed, 256 insertions(+) diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h index 802def51d..988fc9c99 100644 --- a/Eigen/src/Core/GenericPacketMath.h +++ b/Eigen/src/Core/GenericPacketMath.h @@ -76,6 +76,7 @@ struct default_packet_traits HasTanh = 0, HasLGamma = 0, HasDiGamma = 0, + HasZeta = 0, HasErf = 0, HasErfc = 0, HasIGamma = 0, @@ -450,6 +451,10 @@ Packet plgamma(const Packet& a) { using numext::lgamma; return lgamma(a); } /** \internal \returns the derivative of lgamma, psi(\a a) (coeff-wise) */ template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pdigamma(const Packet& a) { using numext::digamma; return digamma(a); } + +/** \internal \returns the zeta function of two arguments (coeff-wise) */ +template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet pzeta(const Packet& x, const Packet& q) { using numext::zeta; return zeta(x, q); } /** \internal \returns the erf(\a a) (coeff-wise) */ template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS diff --git a/Eigen/src/Core/GlobalFunctions.h b/Eigen/src/Core/GlobalFunctions.h index 7df0fdda9..a013cca1f 100644 --- a/Eigen/src/Core/GlobalFunctions.h +++ b/Eigen/src/Core/GlobalFunctions.h @@ -51,6 +51,7 @@ namespace Eigen EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(tanh,scalar_tanh_op) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(lgamma,scalar_lgamma_op) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(digamma,scalar_digamma_op) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(zeta,scalar_zeta_op) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(erf,scalar_erf_op) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(erfc,scalar_erfc_op) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(exp,scalar_exp_op) diff --git a/Eigen/src/Core/SpecialFunctions.h b/Eigen/src/Core/SpecialFunctions.h index 37ebb5915..4240ebf2f 100644 --- a/Eigen/src/Core/SpecialFunctions.h +++ b/Eigen/src/Core/SpecialFunctions.h @@ -722,6 +722,189 @@ struct igamma_impl { #endif // EIGEN_HAS_C99_MATH +/**************************************************************************** + * Implementation of Riemann zeta function of two arguments * + ****************************************************************************/ + +template +struct zeta_retval { + typedef Scalar type; +}; + +#ifndef EIGEN_HAS_C99_MATH + +template +struct zeta_impl { + EIGEN_DEVICE_FUNC + static Scalar run(Scalar x) { + EIGEN_STATIC_ASSERT((internal::is_same::value == false), + THIS_TYPE_IS_NOT_SUPPORTED); + return Scalar(0); + } +}; + +#else + +template +struct zeta_impl { + EIGEN_DEVICE_FUNC + static Scalar run(Scalar x, Scalar q) { + /* zeta.c + * + * Riemann zeta function of two arguments + * + * + * + * SYNOPSIS: + * + * double x, q, y, zeta(); + * + * y = zeta( x, q ); + * + * + * + * DESCRIPTION: + * + * + * + * inf. + * - -x + * zeta(x,q) = > (k+q) + * - + * k=0 + * + * where x > 1 and q is not a negative integer or zero. + * The Euler-Maclaurin summation formula is used to obtain + * the expansion + * + * n + * - -x + * zeta(x,q) = > (k+q) + * - + * k=1 + * + * 1-x inf. B x(x+1)...(x+2j) + * (n+q) 1 - 2j + * + --------- - ------- + > -------------------- + * x-1 x - x+2j+1 + * 2(n+q) j=1 (2j)! (n+q) + * + * where the B2j are Bernoulli numbers. Note that (see zetac.c) + * zeta(x,1) = zetac(x) + 1. + * + * + * + * ACCURACY: + * + * + * + * REFERENCE: + * + * Gradshteyn, I. S., and I. M. Ryzhik, Tables of Integrals, + * Series, and Products, p. 1073; Academic Press, 1980. + * + */ + + int i; + /*double a, b, k, s, t, w;*/ + Scalar p, r, a, b, k, s, t, w; + + const double A[] = { + 12.0, + -720.0, + 30240.0, + -1209600.0, + 47900160.0, + -1.8924375803183791606e9, /*1.307674368e12/691*/ + 7.47242496e10, + -2.950130727918164224e12, /*1.067062284288e16/3617*/ + 1.1646782814350067249e14, /*5.109094217170944e18/43867*/ + -4.5979787224074726105e15, /*8.028576626982912e20/174611*/ + 1.8152105401943546773e17, /*1.5511210043330985984e23/854513*/ + -7.1661652561756670113e18 /*1.6938241367317436694528e27/236364091*/ + }; + + const Scalar maxnum = NumTraits::infinity(); + const Scalar zero = 0.0, half = 0.5, one = 1.0; + const Scalar machep = igamma_helper::machep(); + + if( x == one ) + return maxnum; //goto retinf; + + if( x < one ) + { + // domerr: + // mtherr( "zeta", DOMAIN ); + return zero; + } + + if( q <= zero ) + { + if(q == numext::floor(q)) + { + // mtherr( "zeta", SING ); + // retinf: + return maxnum; + } + p = x; + r = numext::floor(p); + // if( x != floor(x) ) + // goto domerr; /* because q^-x not defined */ + if (p != r) + return zero; + } + + /* Euler-Maclaurin summation formula */ + /* + if( x < 25.0 ) + */ + { + /* Permit negative q but continue sum until n+q > +9 . + * This case should be handled by a reflection formula. + * If q<0 and x is an integer, there is a relation to + * the polygamma function. + */ + s = numext::pow( q, -x ); + a = q; + i = 0; + b = zero; + while( (i < 9) || (a <= Scalar(9.0)) ) + { + i += 1; + a += one; + b = numext::pow( a, -x ); + s += b; + if( numext::abs(b/s) < machep ) + return s; // goto done; + } + + w = a; + s += b*w/(x-one); + s -= half * b; + a = one; + k = zero; + for( i=0; i<12; i++ ) + { + a *= x + k; + b /= w; + t = a*b/A[i]; + s = s + t; + t = numext::abs(t/s); + if( t < machep ) + return s; // goto done; + k += one; + a *= x + k; + b /= w; + k += one; + } + // done: + return(s); + } + } +}; + +#endif // EIGEN_HAS_C99_MATH + } // end namespace internal namespace numext { @@ -737,6 +920,12 @@ EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(digamma, Scalar) digamma(const Scalar& x) { return EIGEN_MATHFUNC_IMPL(digamma, Scalar)::run(x); } + +template +EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(zeta, Scalar) +zeta(const Scalar& x, const Scalar& q) { + return EIGEN_MATHFUNC_IMPL(zeta, Scalar)::run(x, q); +} template EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(erf, Scalar) diff --git a/Eigen/src/Core/arch/CUDA/MathFunctions.h b/Eigen/src/Core/arch/CUDA/MathFunctions.h index 6822700f8..858775523 100644 --- a/Eigen/src/Core/arch/CUDA/MathFunctions.h +++ b/Eigen/src/Core/arch/CUDA/MathFunctions.h @@ -91,6 +91,20 @@ double2 pdigamma(const double2& a) using numext::digamma; return make_double2(digamma(a.x), digamma(a.y)); } + +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +float4 pzeta(const float4& a) +{ + using numext::zeta; + return make_float4(zeta(a.x), zeta(a.y), zeta(a.z), zeta(a.w)); +} + +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +double2 pzeta(const double2& a) +{ + using numext::zeta; + return make_double2(zeta(a.x), zeta(a.y)); +} template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 perf(const float4& a) diff --git a/Eigen/src/Core/arch/CUDA/PacketMath.h b/Eigen/src/Core/arch/CUDA/PacketMath.h index 56822838e..e0db18fbf 100644 --- a/Eigen/src/Core/arch/CUDA/PacketMath.h +++ b/Eigen/src/Core/arch/CUDA/PacketMath.h @@ -40,6 +40,7 @@ template<> struct packet_traits : default_packet_traits HasRsqrt = 1, HasLGamma = 1, HasDiGamma = 1, + HasZeta = 1, HasErf = 1, HasErfc = 1, HasIgamma = 1, diff --git a/Eigen/src/Core/functors/UnaryFunctors.h b/Eigen/src/Core/functors/UnaryFunctors.h index 531beead6..e2fb8d8d6 100644 --- a/Eigen/src/Core/functors/UnaryFunctors.h +++ b/Eigen/src/Core/functors/UnaryFunctors.h @@ -448,6 +448,28 @@ struct functor_traits > PacketAccess = packet_traits::HasDiGamma }; }; + +/** \internal + * \brief Template functor to compute the Riemann Zeta function of two arguments. + * \sa class CwiseUnaryOp, Cwise::zeta() + */ +template struct scalar_zeta_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_zeta_op) + EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& x, const Scalar& q) const { + using numext::zeta; return zeta(x, q); + } + typedef typename packet_traits::type Packet; + EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& x, const Packet& q) const { return internal::pzeta(x, q); } +}; +template +struct functor_traits > +{ + enum { + // Guesstimate + Cost = 10 * NumTraits::MulCost + 5 * NumTraits::AddCost, + PacketAccess = packet_traits::HasZeta + }; +}; /** \internal * \brief Template functor to compute the Gauss error function of a diff --git a/Eigen/src/plugins/ArrayCwiseUnaryOps.h b/Eigen/src/plugins/ArrayCwiseUnaryOps.h index 2ce7414a1..6c92a2f1b 100644 --- a/Eigen/src/plugins/ArrayCwiseUnaryOps.h +++ b/Eigen/src/plugins/ArrayCwiseUnaryOps.h @@ -23,6 +23,7 @@ typedef CwiseUnaryOp, const Derived> SinhReturn typedef CwiseUnaryOp, const Derived> CoshReturnType; typedef CwiseUnaryOp, const Derived> LgammaReturnType; typedef CwiseUnaryOp, const Derived> DigammaReturnType; +typedef CwiseUnaryOp, const Derived> ZetaReturnType; typedef CwiseUnaryOp, const Derived> ErfReturnType; typedef CwiseUnaryOp, const Derived> ErfcReturnType; typedef CwiseUnaryOp, const Derived> PowReturnType; @@ -329,6 +330,14 @@ digamma() const return DigammaReturnType(derived()); } +/** \returns an expression of the coefficient-wise zeta function. + */ +inline const ZetaReturnType +zeta() const +{ + return ZetaReturnType(derived()); +} + /** \returns an expression of the coefficient-wise Gauss error * function of *this. * diff --git a/test/array.cpp b/test/array.cpp index d05744c4a..2f0b0f1b6 100644 --- a/test/array.cpp +++ b/test/array.cpp @@ -322,6 +322,15 @@ template void array_real(const ArrayType& m) std::numeric_limits::infinity()); VERIFY_IS_EQUAL(numext::digamma(Scalar(-1)), std::numeric_limits::infinity()); + + // Check the zeta function against scipy.special.zeta + VERIFY_IS_APPROX(numext::zeta(Scalar(1.5), Scalar(2)), RealScalar(1.61237534869)); + VERIFY_IS_APPROX(numext::zeta(Scalar(4), Scalar(1.5)), RealScalar(0.234848505667)); + VERIFY_IS_APPROX(numext::zeta(Scalar(10.5), Scalar(3)), RealScalar(1.03086757337e-5)); + VERIFY_IS_APPROX(numext::zeta(Scalar(10000.5), Scalar(1.0001)), RealScalar(0.367879440865)); + VERIFY_IS_APPROX(numext::zeta(Scalar(3), Scalar(-2.5)), RealScalar(0.054102025820864097)); + VERIFY_IS_EQUAL(numext::zeta(Scalar(1), Scalar(1.2345)), // The second scalar does not matter + std::numeric_limits::infinity()); { // Test various propreties of igamma & igammac. These are normalized diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h index 6ee9c88b9..7c427d3c1 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h @@ -132,6 +132,12 @@ class TensorBase digamma() const { return unaryExpr(internal::scalar_digamma_op()); } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + zeta() const { + return unaryExpr(internal::scalar_zeta_op()); + } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> From 57239f4a8149dbd603ad376e90a0a4574b846710 Mon Sep 17 00:00:00 2001 From: Till Hoffmann Date: Fri, 1 Apr 2016 14:35:21 +0100 Subject: [PATCH 2/7] Added polygamma function. --- Eigen/src/Core/GenericPacketMath.h | 5 ++ Eigen/src/Core/GlobalFunctions.h | 1 + Eigen/src/Core/SpecialFunctions.h | 52 ++++++++++++++++++- Eigen/src/Core/arch/CUDA/MathFunctions.h | 22 ++++++-- Eigen/src/Core/arch/CUDA/PacketMath.h | 1 + Eigen/src/Core/functors/UnaryFunctors.h | 22 ++++++++ Eigen/src/plugins/ArrayCwiseUnaryOps.h | 9 ++++ test/array.cpp | 5 ++ .../Eigen/CXX11/src/Tensor/TensorBase.h | 6 +++ 9 files changed, 118 insertions(+), 5 deletions(-) diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h index 988fc9c99..6ff61c18a 100644 --- a/Eigen/src/Core/GenericPacketMath.h +++ b/Eigen/src/Core/GenericPacketMath.h @@ -77,6 +77,7 @@ struct default_packet_traits HasLGamma = 0, HasDiGamma = 0, HasZeta = 0, + HasPolygamma = 0, HasErf = 0, HasErfc = 0, HasIGamma = 0, @@ -456,6 +457,10 @@ Packet pdigamma(const Packet& a) { using numext::digamma; return digamma(a); } template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pzeta(const Packet& x, const Packet& q) { using numext::zeta; return zeta(x, q); } +/** \internal \returns the polygamma function (coeff-wise) */ +template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet ppolygamma(const Packet& n, const Packet& x) { using numext::polygamma; return polygamma(n, x); } + /** \internal \returns the erf(\a a) (coeff-wise) */ template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet perf(const Packet& a) { using numext::erf; return erf(a); } diff --git a/Eigen/src/Core/GlobalFunctions.h b/Eigen/src/Core/GlobalFunctions.h index a013cca1f..05ba6ddb4 100644 --- a/Eigen/src/Core/GlobalFunctions.h +++ b/Eigen/src/Core/GlobalFunctions.h @@ -52,6 +52,7 @@ namespace Eigen EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(lgamma,scalar_lgamma_op) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(digamma,scalar_digamma_op) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(zeta,scalar_zeta_op) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(polygamma,scalar_polygamma_op) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(erf,scalar_erf_op) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(erfc,scalar_erfc_op) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(exp,scalar_exp_op) diff --git a/Eigen/src/Core/SpecialFunctions.h b/Eigen/src/Core/SpecialFunctions.h index 4240ebf2f..02ac7cf3f 100644 --- a/Eigen/src/Core/SpecialFunctions.h +++ b/Eigen/src/Core/SpecialFunctions.h @@ -736,7 +736,7 @@ struct zeta_retval { template struct zeta_impl { EIGEN_DEVICE_FUNC - static Scalar run(Scalar x) { + static Scalar run(Scalar x, Scalar q) { EIGEN_STATIC_ASSERT((internal::is_same::value == false), THIS_TYPE_IS_NOT_SUPPORTED); return Scalar(0); @@ -905,6 +905,50 @@ struct zeta_impl { #endif // EIGEN_HAS_C99_MATH +/**************************************************************************** + * Implementation of polygamma function * + ****************************************************************************/ + +template +struct polygamma_retval { + typedef Scalar type; +}; + +#ifndef EIGEN_HAS_C99_MATH + +template +struct polygamma_impl { + EIGEN_DEVICE_FUNC + static Scalar run(Scalar n, Scalar x) { + EIGEN_STATIC_ASSERT((internal::is_same::value == false), + THIS_TYPE_IS_NOT_SUPPORTED); + return Scalar(0); + } +}; + +#else + +template +struct polygamma_impl { + EIGEN_DEVICE_FUNC + static Scalar run(Scalar n, Scalar x) { + Scalar zero = 0.0, one = 1.0; + Scalar nplus = n + one; + + // Just return the digamma function for n = 1 + if (n == zero) { + return digamma_impl::run(x); + } + // Use the same implementation as scipy + else { + Scalar factorial = numext::exp(lgamma_impl::run(nplus)); + return numext::pow(-one, nplus) * factorial * zeta_impl::run(nplus, x); + } + } +}; + +#endif // EIGEN_HAS_C99_MATH + } // end namespace internal namespace numext { @@ -927,6 +971,12 @@ zeta(const Scalar& x, const Scalar& q) { return EIGEN_MATHFUNC_IMPL(zeta, Scalar)::run(x, q); } +template +EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(polygamma, Scalar) +polygamma(const Scalar& n, const Scalar& x) { + return EIGEN_MATHFUNC_IMPL(polygamma, Scalar)::run(n, x); +} + template EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(erf, Scalar) erf(const Scalar& x) { diff --git a/Eigen/src/Core/arch/CUDA/MathFunctions.h b/Eigen/src/Core/arch/CUDA/MathFunctions.h index 858775523..317499b29 100644 --- a/Eigen/src/Core/arch/CUDA/MathFunctions.h +++ b/Eigen/src/Core/arch/CUDA/MathFunctions.h @@ -93,17 +93,31 @@ double2 pdigamma(const double2& a) } template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE -float4 pzeta(const float4& a) +float4 pzeta(const float4& x, const float4& q) { using numext::zeta; - return make_float4(zeta(a.x), zeta(a.y), zeta(a.z), zeta(a.w)); + return make_float4(zeta(x.x, q.x), zeta(x.y, q.y), zeta(x.z, q.z), zeta(x.w, q.w)); } template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE -double2 pzeta(const double2& a) +double2 pzeta(const double2& x, const double2& q) { using numext::zeta; - return make_double2(zeta(a.x), zeta(a.y)); + return make_double2(zeta(x.x, q.x), zeta(x.y, q.y)); +} + +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +float4 ppolygamma(const float4& n, const float4& x) +{ + using numext::polygamma; + return make_float4(polygamma(n.x, x.x), polygamma(n.y, x.y), polygamma(n.z, x.z), polygamma(n.w, x.w)); +} + +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +double2 ppolygamma(const double2& n, const double2& x) +{ + using numext::polygamma; + return make_double2(polygamma(n.x, x.x), polygamma(n.y, x.y)); } template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE diff --git a/Eigen/src/Core/arch/CUDA/PacketMath.h b/Eigen/src/Core/arch/CUDA/PacketMath.h index e0db18fbf..932df1092 100644 --- a/Eigen/src/Core/arch/CUDA/PacketMath.h +++ b/Eigen/src/Core/arch/CUDA/PacketMath.h @@ -41,6 +41,7 @@ template<> struct packet_traits : default_packet_traits HasLGamma = 1, HasDiGamma = 1, HasZeta = 1, + HasPolygamma = 1, HasErf = 1, HasErfc = 1, HasIgamma = 1, diff --git a/Eigen/src/Core/functors/UnaryFunctors.h b/Eigen/src/Core/functors/UnaryFunctors.h index e2fb8d8d6..826d84f69 100644 --- a/Eigen/src/Core/functors/UnaryFunctors.h +++ b/Eigen/src/Core/functors/UnaryFunctors.h @@ -471,6 +471,28 @@ struct functor_traits > }; }; +/** \internal + * \brief Template functor to compute the polygamma function. + * \sa class CwiseUnaryOp, Cwise::polygamma() + */ +template struct scalar_polygamma_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_polygamma_op) + EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& n, const Scalar& x) const { + using numext::polygamma; return polygamma(n, x); + } + typedef typename packet_traits::type Packet; + EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& n, const Packet& x) const { return internal::ppolygamma(n, x); } +}; +template +struct functor_traits > +{ + enum { + // Guesstimate + Cost = 10 * NumTraits::MulCost + 5 * NumTraits::AddCost, + PacketAccess = packet_traits::HasPolygamma + }; +}; + /** \internal * \brief Template functor to compute the Gauss error function of a * scalar diff --git a/Eigen/src/plugins/ArrayCwiseUnaryOps.h b/Eigen/src/plugins/ArrayCwiseUnaryOps.h index 6c92a2f1b..56c71172c 100644 --- a/Eigen/src/plugins/ArrayCwiseUnaryOps.h +++ b/Eigen/src/plugins/ArrayCwiseUnaryOps.h @@ -24,6 +24,7 @@ typedef CwiseUnaryOp, const Derived> CoshReturn typedef CwiseUnaryOp, const Derived> LgammaReturnType; typedef CwiseUnaryOp, const Derived> DigammaReturnType; typedef CwiseUnaryOp, const Derived> ZetaReturnType; +typedef CwiseUnaryOp, const Derived> PolygammaReturnType; typedef CwiseUnaryOp, const Derived> ErfReturnType; typedef CwiseUnaryOp, const Derived> ErfcReturnType; typedef CwiseUnaryOp, const Derived> PowReturnType; @@ -338,6 +339,14 @@ zeta() const return ZetaReturnType(derived()); } +/** \returns an expression of the coefficient-wise polygamma function. + */ +inline const PolygammaReturnType +polygamma() const +{ + return PolygammaReturnType(derived()); +} + /** \returns an expression of the coefficient-wise Gauss error * function of *this. * diff --git a/test/array.cpp b/test/array.cpp index 2f0b0f1b6..56d196923 100644 --- a/test/array.cpp +++ b/test/array.cpp @@ -331,6 +331,11 @@ template void array_real(const ArrayType& m) VERIFY_IS_APPROX(numext::zeta(Scalar(3), Scalar(-2.5)), RealScalar(0.054102025820864097)); VERIFY_IS_EQUAL(numext::zeta(Scalar(1), Scalar(1.2345)), // The second scalar does not matter std::numeric_limits::infinity()); + + // Check the polygamma against scipy.special.polygamma + VERIFY_IS_APPROX(numext::polygamma(Scalar(1), Scalar(2)), RealScalar(0.644934066848)); + VERIFY_IS_APPROX(numext::polygamma(Scalar(1), Scalar(3)), RealScalar(0.394934066848)); + VERIFY_IS_APPROX(numext::polygamma(Scalar(1), Scalar(25.5)), RealScalar(0.0399946696496)); { // Test various propreties of igamma & igammac. These are normalized diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h index 7c427d3c1..65b969aaf 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h @@ -138,6 +138,12 @@ class TensorBase zeta() const { return unaryExpr(internal::scalar_zeta_op()); } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + polygamma() const { + return unaryExpr(internal::scalar_polygamma_op()); + } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> From 3cb0a237c195dd470ff5cd7a6e793b74d2d6815d Mon Sep 17 00:00:00 2001 From: Till Hoffmann Date: Fri, 1 Apr 2016 17:51:39 +0100 Subject: [PATCH 3/7] Fixed suggestions by Eugene Brevdo. --- Eigen/src/Core/SpecialFunctions.h | 92 +++++++++++++------------------ test/array.cpp | 14 ++++- 2 files changed, 52 insertions(+), 54 deletions(-) diff --git a/Eigen/src/Core/SpecialFunctions.h b/Eigen/src/Core/SpecialFunctions.h index 02ac7cf3f..26b92607c 100644 --- a/Eigen/src/Core/SpecialFunctions.h +++ b/Eigen/src/Core/SpecialFunctions.h @@ -806,10 +806,9 @@ struct zeta_impl { */ int i; - /*double a, b, k, s, t, w;*/ Scalar p, r, a, b, k, s, t, w; - const double A[] = { + const Scalar A[] = { 12.0, -720.0, 30240.0, @@ -829,12 +828,10 @@ struct zeta_impl { const Scalar machep = igamma_helper::machep(); if( x == one ) - return maxnum; //goto retinf; + return maxnum; if( x < one ) { - // domerr: - // mtherr( "zeta", DOMAIN ); return zero; } @@ -842,64 +839,53 @@ struct zeta_impl { { if(q == numext::floor(q)) { - // mtherr( "zeta", SING ); - // retinf: return maxnum; } p = x; r = numext::floor(p); - // if( x != floor(x) ) - // goto domerr; /* because q^-x not defined */ if (p != r) return zero; } - /* Euler-Maclaurin summation formula */ - /* - if( x < 25.0 ) + /* Permit negative q but continue sum until n+q > +9 . + * This case should be handled by a reflection formula. + * If q<0 and x is an integer, there is a relation to + * the polygamma function. */ + s = numext::pow( q, -x ); + a = q; + i = 0; + b = zero; + while( (i < 9) || (a <= Scalar(9)) ) { - /* Permit negative q but continue sum until n+q > +9 . - * This case should be handled by a reflection formula. - * If q<0 and x is an integer, there is a relation to - * the polygamma function. - */ - s = numext::pow( q, -x ); - a = q; - i = 0; - b = zero; - while( (i < 9) || (a <= Scalar(9.0)) ) - { - i += 1; - a += one; - b = numext::pow( a, -x ); - s += b; - if( numext::abs(b/s) < machep ) - return s; // goto done; - } - - w = a; - s += b*w/(x-one); - s -= half * b; - a = one; - k = zero; - for( i=0; i<12; i++ ) - { - a *= x + k; - b /= w; - t = a*b/A[i]; - s = s + t; - t = numext::abs(t/s); - if( t < machep ) - return s; // goto done; - k += one; - a *= x + k; - b /= w; - k += one; - } - // done: - return(s); - } + i += 1; + a += one; + b = numext::pow( a, -x ); + s += b; + if( numext::abs(b/s) < machep ) + return s; + } + + w = a; + s += b*w/(x-one); + s -= half * b; + a = one; + k = zero; + for( i=0; i<12; i++ ) + { + a *= x + k; + b /= w; + t = a*b/A[i]; + s = s + t; + t = numext::abs(t/s); + if( t < machep ) + return s; + k += one; + a *= x + k; + b /= w; + k += one; + } + return s; } }; diff --git a/test/array.cpp b/test/array.cpp index 56d196923..8b0a34722 100644 --- a/test/array.cpp +++ b/test/array.cpp @@ -332,10 +332,22 @@ template void array_real(const ArrayType& m) VERIFY_IS_EQUAL(numext::zeta(Scalar(1), Scalar(1.2345)), // The second scalar does not matter std::numeric_limits::infinity()); - // Check the polygamma against scipy.special.polygamma + // Check the polygamma against scipy.special.polygamma examples VERIFY_IS_APPROX(numext::polygamma(Scalar(1), Scalar(2)), RealScalar(0.644934066848)); VERIFY_IS_APPROX(numext::polygamma(Scalar(1), Scalar(3)), RealScalar(0.394934066848)); VERIFY_IS_APPROX(numext::polygamma(Scalar(1), Scalar(25.5)), RealScalar(0.0399946696496)); + + // Check the polygamma function over a larger range of values + VERIFY_IS_APPROX(numext::polygamma(Scalar(17), Scalar(4.7)), RealScalar(293.334565435)); + VERIFY_IS_APPROX(numext::polygamma(Scalar(31), Scalar(11.8)), RealScalar(0.445487887616)); + VERIFY_IS_APPROX(numext::polygamma(Scalar(28), Scalar(17.7)), RealScalar(-2.47810300902e-07)); + VERIFY_IS_APPROX(numext::polygamma(Scalar(8), Scalar(30.2)), RealScalar(-8.29668781082e-09)); + /* The following tests only pass for doubles because floats cannot handle the large values of + the gamma function. + VERIFY_IS_APPROX(numext::polygamma(Scalar(42), Scalar(15.8)), RealScalar(-0.434562276666)); + VERIFY_IS_APPROX(numext::polygamma(Scalar(147), Scalar(54.1)), RealScalar(0.567742190178)); + VERIFY_IS_APPROX(numext::polygamma(Scalar(170), Scalar(64)), RealScalar(-0.0108615497927)); + */ { // Test various propreties of igamma & igammac. These are normalized From ffd770ce94b75202187635bf0e1e4d0006f4a015 Mon Sep 17 00:00:00 2001 From: Till Hoffmann Date: Fri, 1 Apr 2016 17:58:24 +0100 Subject: [PATCH 4/7] Fixed CUDA signature. --- .../Eigen/CXX11/src/Tensor/TensorBase.h | 26 ++++++++++--------- 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h index 65b969aaf..77b509f61 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h @@ -132,18 +132,6 @@ class TensorBase digamma() const { return unaryExpr(internal::scalar_digamma_op()); } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> - zeta() const { - return unaryExpr(internal::scalar_zeta_op()); - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> - polygamma() const { - return unaryExpr(internal::scalar_polygamma_op()); - } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> @@ -365,6 +353,20 @@ class TensorBase igammac(const OtherDerived& other) const { return binaryExpr(other.derived(), internal::scalar_igammac_op()); } + + // zeta(x = this, q = other) + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorCwiseBinaryOp, const Derived, const OtherDerived> + igammac(const OtherDerived& other) const { + return binaryExpr(other.derived(), internal::scalar_igammac_op()); + } + + // polygamma(n = this, x = other) + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorCwiseBinaryOp, const Derived, const OtherDerived> + igammac(const OtherDerived& other) const { + return binaryExpr(other.derived(), internal::scalar_igammac_op()); + } // comparisons and tests for Scalars EIGEN_DEVICE_FUNC From eb0ae602bd97866b13524bf3c2593f1dd0b261bf Mon Sep 17 00:00:00 2001 From: Till Hoffmann Date: Fri, 1 Apr 2016 18:17:45 +0100 Subject: [PATCH 5/7] Added CUDA tests. --- unsupported/test/cxx11_tensor_cuda.cu | 121 ++++++++++++++++++++++++++ 1 file changed, 121 insertions(+) diff --git a/unsupported/test/cxx11_tensor_cuda.cu b/unsupported/test/cxx11_tensor_cuda.cu index 4d8465756..fc56ae71d 100644 --- a/unsupported/test/cxx11_tensor_cuda.cu +++ b/unsupported/test/cxx11_tensor_cuda.cu @@ -626,6 +626,127 @@ void test_cuda_digamma() } } +template +void test_cuda_zeta() +{ + Tensor in_x(6); + Tensor in_q(6); + Tensor out(6); + Tensor expected_out(6); + out.setZero(); + + in_x(0) = Scalar(1); + in_x(1) = Scalar(1.5); + in_x(2) = Scalar(4); + in_x(3) = Scalar(-10.5); + in_x(4) = Scalar(10000.5); + in_x(5) = Scalar(3); + + in_q(0) = Scalar(1.2345); + in_q(1) = Scalar(2); + in_q(2) = Scalar(1.5); + in_q(3) = Scalar(3); + in_q(4) = Scalar(1.0001); + in_q(5) = Scalar(-2.5); + + expected_out(0) = std::numeric_limits::infinity(); + expected_out(1) = Scalar(1.61237534869); + expected_out(2) = Scalar(0.234848505667); + expected_out(3) = Scalar(1.03086757337e-5); + expected_out(4) = Scalar(0.367879440865); + expected_out(5) = Scalar(0.054102025820864097); + + std::size_t bytes = in_x.size() * sizeof(Scalar); + + Scalar* d_in_x, d_in_q; + Scalar* d_out; + cudaMalloc((void**)(&d_in_x), bytes); + cudaMalloc((void**)(&d_in_q), bytes); + cudaMalloc((void**)(&d_out), bytes); + + cudaMemcpy(d_in_x, in_x.data(), bytes, cudaMemcpyHostToDevice); + cudaMemcpy(d_in_q, in_q.data(), bytes, cudaMemcpyHostToDevice); + + Eigen::CudaStreamDevice stream; + Eigen::GpuDevice gpu_device(&stream); + + Eigen::TensorMap > gpu_in_x(d_in_x, 6); + Eigen::TensorMap > gpu_in_q(d_in_q, 6); + Eigen::TensorMap > gpu_out(d_out, 6); + + gpu_out.device(gpu_device) = gpu_in_x.zeta(gpu_in_q); + + assert(cudaMemcpyAsync(out.data(), d_out, bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess); + assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess); + + VERIFY_IS_EQUAL(out(0), expected_out(0)); + + for (int i = 1; i < 6; ++i) { + VERIFY_IS_APPROX(out(i), expected_out(i)); + } +} + +template +void test_cuda_polygamma() +{ + Tensor in_x(7); + Tensor in_n(7); + Tensor out(7); + Tensor expected_out(7); + out.setZero(); + + in_n(0) = Scalar(1); + in_n(1) = Scalar(1); + in_n(2) = Scalar(1); + in_n(3) = Scalar(17); + in_n(4) = Scalar(31); + in_n(5) = Scalar(28); + in_n(6) = Scalar(8); + + in_x(0) = Scalar(2); + in_x(1) = Scalar(3); + in_x(2) = Scalar(25.5); + in_x(3) = Scalar(4.7); + in_x(4) = Scalar(11.8); + in_x(5) = Scalar(17.7); + in_x(6) = Scalar(30.2); + + expected_out(0) = Scalar(0.644934066848); + expected_out(1) = Scalar(0.394934066848); + expected_out(2) = Scalar(0.0399946696496); + expected_out(3) = Scalar(293.334565435); + expected_out(4) = Scalar(0.445487887616); + expected_out(5) = Scalar(-2.47810300902e-07); + expected_out(6) = Scalar(-8.29668781082e-09); + + std::size_t bytes = in_x.size() * sizeof(Scalar); + + Scalar* d_in_x, d_in_n; + Scalar* d_out; + cudaMalloc((void**)(&d_in_x), bytes); + cudaMalloc((void**)(&d_in_n), bytes); + cudaMalloc((void**)(&d_out), bytes); + + cudaMemcpy(d_in_x, in_x.data(), bytes, cudaMemcpyHostToDevice); + cudaMemcpy(d_in_n, in_n.data(), bytes, cudaMemcpyHostToDevice); + + Eigen::CudaStreamDevice stream; + Eigen::GpuDevice gpu_device(&stream); + + Eigen::TensorMap > gpu_in_x(d_in_x, 7); + Eigen::TensorMap > gpu_in_n(d_in_n, 7); + Eigen::TensorMap > gpu_out(d_out, 7); + + gpu_out.device(gpu_device) = gpu_in_n.zeta(gpu_in_x); + + assert(cudaMemcpyAsync(out.data(), d_out, bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess); + assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess); + + for (int i = 0; i < 7; ++i) { + VERIFY_IS_APPROX(out(i), expected_out(i)); + } +} + template void test_cuda_igamma() { From b97911dd189c0377df6ba4ef1a710105b9437a3c Mon Sep 17 00:00:00 2001 From: Till Hoffmann Date: Mon, 4 Apr 2016 19:16:03 +0100 Subject: [PATCH 6/7] Refactored code into type-specific helper functions. --- Eigen/src/Core/SpecialFunctions.h | 86 +++++++++++++++++++++++-------- 1 file changed, 65 insertions(+), 21 deletions(-) diff --git a/Eigen/src/Core/SpecialFunctions.h b/Eigen/src/Core/SpecialFunctions.h index 26b92607c..772449bc7 100644 --- a/Eigen/src/Core/SpecialFunctions.h +++ b/Eigen/src/Core/SpecialFunctions.h @@ -744,6 +744,56 @@ struct zeta_impl { }; #else + +template +struct zeta_impl_series { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE Scalar run(const Scalar) { + EIGEN_STATIC_ASSERT((internal::is_same::value == false), + THIS_TYPE_IS_NOT_SUPPORTED); + return Scalar(0); + } +}; + +template <> +struct zeta_impl_series { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + static bool run(float& a, float& b, float& s, const float x, const float machep) { + int i = 0; + while(i < 9) + { + i += 1; + a += 1.0f; + b = numext::pow( a, -x ); + s += b; + if( numext::abs(b/s) < machep ) + return true; + } + + //Return whether we are done + return false; + } +}; + +template <> +struct zeta_impl_series { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + static bool run(double& a, double& b, double& s, const double x, const double machep) { + int i = 0; + while( (i < 9) || (a <= 9.0) ) + { + i += 1; + a += 1.0; + b = numext::pow( a, -x ); + s += b; + if( numext::abs(b/s) < machep ) + return true; + } + + //Return whether we are done + return false; + } +}; template struct zeta_impl { @@ -809,18 +859,18 @@ struct zeta_impl { Scalar p, r, a, b, k, s, t, w; const Scalar A[] = { - 12.0, - -720.0, - 30240.0, - -1209600.0, - 47900160.0, - -1.8924375803183791606e9, /*1.307674368e12/691*/ - 7.47242496e10, - -2.950130727918164224e12, /*1.067062284288e16/3617*/ - 1.1646782814350067249e14, /*5.109094217170944e18/43867*/ - -4.5979787224074726105e15, /*8.028576626982912e20/174611*/ - 1.8152105401943546773e17, /*1.5511210043330985984e23/854513*/ - -7.1661652561756670113e18 /*1.6938241367317436694528e27/236364091*/ + Scalar(12.0), + Scalar(-720.0), + Scalar(30240.0), + Scalar(-1209600.0), + Scalar(47900160.0), + Scalar(-1.8924375803183791606e9), /*1.307674368e12/691*/ + Scalar(7.47242496e10), + Scalar(-2.950130727918164224e12), /*1.067062284288e16/3617*/ + Scalar(1.1646782814350067249e14), /*5.109094217170944e18/43867*/ + Scalar(-4.5979787224074726105e15), /*8.028576626982912e20/174611*/ + Scalar(1.8152105401943546773e17), /*1.5511210043330985984e23/854513*/ + Scalar(-7.1661652561756670113e18) /*1.6938241367317436694528e27/236364091*/ }; const Scalar maxnum = NumTraits::infinity(); @@ -854,16 +904,10 @@ struct zeta_impl { */ s = numext::pow( q, -x ); a = q; - i = 0; b = zero; - while( (i < 9) || (a <= Scalar(9)) ) - { - i += 1; - a += one; - b = numext::pow( a, -x ); - s += b; - if( numext::abs(b/s) < machep ) - return s; + // Run the summation in a helper function that is specific to the floating precision + if (zeta_impl_series::run(a, b, s, x, machep)) { + return s; } w = a; From a350c25a396aa4fdef4878d165bb3dbaedf0a4bb Mon Sep 17 00:00:00 2001 From: Till Hoffmann Date: Tue, 5 Apr 2016 18:20:40 +0100 Subject: [PATCH 7/7] Added accuracy comments. --- Eigen/src/Core/SpecialFunctions.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Eigen/src/Core/SpecialFunctions.h b/Eigen/src/Core/SpecialFunctions.h index 772449bc7..2a0a6ff15 100644 --- a/Eigen/src/Core/SpecialFunctions.h +++ b/Eigen/src/Core/SpecialFunctions.h @@ -846,7 +846,12 @@ struct zeta_impl { * * ACCURACY: * + * Relative error for single precision: + * arithmetic domain # trials peak rms + * IEEE 0,25 10000 6.9e-7 1.0e-7 * + * Large arguments may produce underflow in powf(), in which + * case the results are inaccurate. * * REFERENCE: *