Files
eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h
Rasmus Munk Larsen 125cc9a5df Implement vectorized complex square root.
Closes #1905

Measured speedup for sqrt of `complex<float>` on Skylake:

SSE:
```
name                      old time/op             new time/op  delta
BM_eigen_sqrt_ctype/1     49.4ns ± 0%             54.3ns ± 0%  +10.01%
BM_eigen_sqrt_ctype/8      332ns ± 0%               50ns ± 1%  -84.97%
BM_eigen_sqrt_ctype/64    2.81µs ± 1%             0.38µs ± 0%  -86.49%
BM_eigen_sqrt_ctype/512   23.8µs ± 0%              3.0µs ± 0%  -87.32%
BM_eigen_sqrt_ctype/4k     202µs ± 0%               24µs ± 2%  -88.03%
BM_eigen_sqrt_ctype/32k   1.63ms ± 0%             0.19ms ± 0%  -88.18%
BM_eigen_sqrt_ctype/256k  13.0ms ± 0%              1.5ms ± 1%  -88.20%
BM_eigen_sqrt_ctype/1M    52.1ms ± 0%              6.2ms ± 0%  -88.18%
```

AVX2:
```
name                      old cpu/op  new cpu/op  delta
BM_eigen_sqrt_ctype/1     53.6ns ± 0%  55.6ns ± 0%   +3.71%
BM_eigen_sqrt_ctype/8      334ns ± 0%    27ns ± 0%  -91.86%
BM_eigen_sqrt_ctype/64    2.79µs ± 0%  0.22µs ± 2%  -92.28%
BM_eigen_sqrt_ctype/512   23.8µs ± 1%   1.7µs ± 1%  -92.81%
BM_eigen_sqrt_ctype/4k     201µs ± 0%    14µs ± 1%  -93.24%
BM_eigen_sqrt_ctype/32k   1.62ms ± 0%  0.11ms ± 1%  -93.29%
BM_eigen_sqrt_ctype/256k  13.0ms ± 0%   0.9ms ± 1%  -93.31%
BM_eigen_sqrt_ctype/1M    52.0ms ± 0%   3.5ms ± 1%  -93.31%
```

AVX512:
```
name                      old cpu/op  new cpu/op  delta
BM_eigen_sqrt_ctype/1     53.7ns ± 0%  56.2ns ± 1%   +4.75%
BM_eigen_sqrt_ctype/8      334ns ± 0%    18ns ± 2%  -94.63%
BM_eigen_sqrt_ctype/64    2.79µs ± 0%  0.12µs ± 1%  -95.54%
BM_eigen_sqrt_ctype/512   23.9µs ± 1%   1.0µs ± 1%  -95.89%
BM_eigen_sqrt_ctype/4k     202µs ± 0%     8µs ± 1%  -96.13%
BM_eigen_sqrt_ctype/32k   1.63ms ± 0%  0.06ms ± 1%  -96.15%
BM_eigen_sqrt_ctype/256k  13.0ms ± 0%   0.5ms ± 4%  -96.11%
BM_eigen_sqrt_ctype/1M    52.1ms ± 0%   2.0ms ± 1%  -96.13%
```
2020-12-08 18:13:35 -08:00

98 lines
3.0 KiB
C++

// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2019 Gael Guennebaud <gael.guennebaud@inria.fr>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#ifndef EIGEN_ARCH_GENERIC_PACKET_MATH_FUNCTIONS_FWD_H
#define EIGEN_ARCH_GENERIC_PACKET_MATH_FUNCTIONS_FWD_H
namespace Eigen {
namespace internal {
// Forward declarations of the generic math functions
// implemented in GenericPacketMathFunctions.h
// This is needed to workaround a circular dependency.
template<typename Packet> EIGEN_STRONG_INLINE Packet
pfrexp_float(const Packet& a, Packet& exponent);
template<typename Packet> EIGEN_STRONG_INLINE Packet
pfrexp_double(const Packet& a, Packet& exponent);
template<typename Packet> EIGEN_STRONG_INLINE Packet
pldexp_float(Packet a, Packet exponent);
/** \internal \returns log(x) for single precision float */
template <typename Packet>
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
EIGEN_UNUSED
Packet plog_float(const Packet _x);
/** \internal \returns log2(x) for single precision float */
template <typename Packet>
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
EIGEN_UNUSED
Packet plog2_float(const Packet _x);
/** \internal \returns log(x) for single precision float */
template <typename Packet>
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
EIGEN_UNUSED
Packet plog_double(const Packet _x);
/** \internal \returns log2(x) for single precision float */
template <typename Packet>
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
EIGEN_UNUSED
Packet plog2_double(const Packet _x);
/** \internal \returns log(1 + x) */
template<typename Packet>
Packet generic_plog1p(const Packet& x);
/** \internal \returns exp(x)-1 */
template<typename Packet>
Packet generic_expm1(const Packet& x);
/** \internal \returns exp(x) for single precision float */
template <typename Packet>
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
EIGEN_UNUSED
Packet pexp_float(const Packet _x);
/** \internal \returns exp(x) for double precision real numbers */
template <typename Packet>
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
EIGEN_UNUSED
Packet pexp_double(const Packet _x);
/** \internal \returns sin(x) for single precision float */
template<typename Packet>
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
EIGEN_UNUSED
Packet psin_float(const Packet& x);
/** \internal \returns cos(x) for single precision float */
template<typename Packet>
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
EIGEN_UNUSED
Packet pcos_float(const Packet& x);
/** \internal \returns sqrt(x) for complex types */
template<typename Packet>
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
EIGEN_UNUSED
Packet psqrt_complex(const Packet& a);
template <typename Packet, int N> struct ppolevl;
} // end namespace internal
} // end namespace Eigen
#endif // EIGEN_ARCH_GENERIC_PACKET_MATH_FUNCTIONS_FWD_H