mirror of
https://gitlab.com/libeigen/eigen.git
synced 2026-04-10 11:34:33 +08:00
@@ -288,6 +288,9 @@ using std::ptrdiff_t;
|
||||
#if defined EIGEN_VECTORIZE_RVV10FP16
|
||||
#include "src/Core/arch/RVV10/PacketMathFP16.h"
|
||||
#endif
|
||||
#if defined EIGEN_VECTORIZE_RVV10BF16
|
||||
#include "src/Core/arch/RVV10/PacketMathBF16.h"
|
||||
#endif
|
||||
#elif defined EIGEN_VECTORIZE_ZVECTOR
|
||||
#include "src/Core/arch/ZVector/PacketMath.h"
|
||||
#include "src/Core/arch/ZVector/MathFunctions.h"
|
||||
|
||||
754
Eigen/src/Core/arch/RVV10/PacketMathBF16.h
Normal file
754
Eigen/src/Core/arch/RVV10/PacketMathBF16.h
Normal file
@@ -0,0 +1,754 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2025 Chip Kerchner <ckerchner@tenstorrent.com>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_PACKET_MATH_BF16_RVV10_H
|
||||
#define EIGEN_PACKET_MATH_BF16_RVV10_H
|
||||
|
||||
// IWYU pragma: private
|
||||
#include "../../InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
namespace internal {
|
||||
|
||||
typedef eigen_packet_wrapper<vbfloat16m1_t __attribute__((riscv_rvv_vector_bits(EIGEN_RISCV64_RVV_VL))), 26>
|
||||
Packet1Xbf;
|
||||
typedef eigen_packet_wrapper<vbfloat16m2_t __attribute__((riscv_rvv_vector_bits(EIGEN_RISCV64_RVV_VL * 2))), 27>
|
||||
Packet2Xbf;
|
||||
|
||||
#if EIGEN_RISCV64_DEFAULT_LMUL == 1
|
||||
typedef Packet1Xbf PacketXbf;
|
||||
|
||||
template <>
|
||||
struct packet_traits<bfloat16> : default_packet_traits {
|
||||
typedef Packet1Xbf type;
|
||||
typedef Packet1Xbf half;
|
||||
|
||||
enum {
|
||||
Vectorizable = 1,
|
||||
AlignedOnScalar = 1,
|
||||
size = rvv_packet_size_selector<bfloat16, EIGEN_RISCV64_RVV_VL, 1>::size,
|
||||
|
||||
HasAdd = 1,
|
||||
HasSub = 1,
|
||||
HasShift = 1,
|
||||
HasMul = 1,
|
||||
HasNegate = 1,
|
||||
HasAbs = 1,
|
||||
HasArg = 0,
|
||||
HasAbs2 = 1,
|
||||
HasMin = 1,
|
||||
HasMax = 1,
|
||||
HasConj = 1,
|
||||
HasSetLinear = 0,
|
||||
HasBlend = 0,
|
||||
HasReduxp = 0,
|
||||
HasSign = 0,
|
||||
|
||||
HasCmp = 1,
|
||||
HasDiv = 1,
|
||||
HasRound = 0,
|
||||
|
||||
HasSin = 0,
|
||||
HasCos = 0,
|
||||
HasLog = 0,
|
||||
HasExp = 0,
|
||||
HasSqrt = 1,
|
||||
HasTanh = 0,
|
||||
HasErf = 0
|
||||
};
|
||||
};
|
||||
|
||||
#else
|
||||
typedef Packet2Xbf PacketXbf;
|
||||
|
||||
template <>
|
||||
struct packet_traits<bfloat16> : default_packet_traits {
|
||||
typedef Packet2Xbf type;
|
||||
typedef Packet1Xbf half;
|
||||
|
||||
enum {
|
||||
Vectorizable = 1,
|
||||
AlignedOnScalar = 1,
|
||||
size = rvv_packet_size_selector<bfloat16, EIGEN_RISCV64_RVV_VL, 2>::size,
|
||||
|
||||
HasAdd = 1,
|
||||
HasSub = 1,
|
||||
HasShift = 1,
|
||||
HasMul = 1,
|
||||
HasNegate = 1,
|
||||
HasAbs = 1,
|
||||
HasArg = 0,
|
||||
HasAbs2 = 1,
|
||||
HasMin = 1,
|
||||
HasMax = 1,
|
||||
HasConj = 1,
|
||||
HasSetLinear = 0,
|
||||
HasBlend = 0,
|
||||
HasReduxp = 0,
|
||||
HasSign = 0,
|
||||
|
||||
HasCmp = 1,
|
||||
HasDiv = 1,
|
||||
HasRound = 0,
|
||||
|
||||
HasSin = 0,
|
||||
HasCos = 0,
|
||||
HasLog = 0,
|
||||
HasExp = 0,
|
||||
HasSqrt = 1,
|
||||
HasTanh = 0,
|
||||
HasErf = 0
|
||||
};
|
||||
};
|
||||
#endif
|
||||
|
||||
template <>
|
||||
struct unpacket_traits<Packet1Xbf> : default_unpacket_traits {
|
||||
typedef bfloat16 type;
|
||||
typedef Packet1Xbf half; // Half not yet implemented
|
||||
typedef Packet1Xs integer_packet;
|
||||
typedef numext::uint8_t mask_t;
|
||||
|
||||
enum {
|
||||
size = rvv_packet_size_selector<bfloat16, EIGEN_RISCV64_RVV_VL, 1>::size,
|
||||
alignment = rvv_packet_alignment_selector<EIGEN_RISCV64_RVV_VL, 1>::alignment,
|
||||
vectorizable = true
|
||||
};
|
||||
};
|
||||
|
||||
template <>
|
||||
struct unpacket_traits<Packet2Xbf> : default_unpacket_traits {
|
||||
typedef bfloat16 type;
|
||||
typedef Packet1Xbf half;
|
||||
typedef Packet2Xs integer_packet;
|
||||
typedef numext::uint8_t mask_t;
|
||||
|
||||
enum {
|
||||
size = rvv_packet_size_selector<bfloat16, EIGEN_RISCV64_RVV_VL, 2>::size,
|
||||
alignment = rvv_packet_alignment_selector<EIGEN_RISCV64_RVV_VL, 2>::alignment,
|
||||
vectorizable = true
|
||||
};
|
||||
};
|
||||
|
||||
/********************************* Packet1Xbf ************************************/
|
||||
|
||||
EIGEN_STRONG_INLINE Packet2Xf Bf16ToF32(const Packet1Xbf& a) {
|
||||
return __riscv_vfwcvtbf16_f_f_v_f32m2(a, unpacket_traits<Packet1Xbf>::size);
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE Packet1Xbf F32ToBf16(const Packet2Xf& a) {
|
||||
return __riscv_vfncvtbf16_f_f_w_bf16m1(a, unpacket_traits<Packet2Xf>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet1Xbf ptrue<Packet1Xbf>(const Packet1Xbf& /*a*/) {
|
||||
return __riscv_vreinterpret_bf16m1(__riscv_vmv_v_x_u16m1(static_cast<numext::uint16_t>(0xffffu), unpacket_traits<Packet1Xbf>::size));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet1Xbf pzero<Packet1Xbf>(const Packet1Xbf& /*a*/) {
|
||||
return __riscv_vreinterpret_bf16m1(
|
||||
__riscv_vmv_v_x_i16m1(numext::bit_cast<int16_t>(static_cast<__bf16>(0.0)), unpacket_traits<Packet1Xbf>::size));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet1Xbf pabs(const Packet1Xbf& a) {
|
||||
return __riscv_vreinterpret_v_u16m1_bf16m1(__riscv_vand_vx_u16m1(
|
||||
__riscv_vreinterpret_v_bf16m1_u16m1(a), static_cast<numext::uint16_t>(0x7fffu), unpacket_traits<Packet1Xs>::size));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet1Xbf pset1<Packet1Xbf>(const bfloat16& from) {
|
||||
return __riscv_vreinterpret_bf16m1(
|
||||
__riscv_vmv_v_x_i16m1(numext::bit_cast<int16_t>(from), unpacket_traits<Packet1Xbf>::size));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet1Xbf pset1frombits<Packet1Xbf>(numext::uint16_t from) {
|
||||
return __riscv_vreinterpret_bf16m1(__riscv_vmv_v_x_u16m1(from, unpacket_traits<Packet1Xbf>::size));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet1Xbf plset<Packet1Xbf>(const bfloat16& a) {
|
||||
return F32ToBf16(plset<Packet2Xf>(static_cast<float>(a)));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet1Xbf padd<Packet1Xbf>(const Packet1Xbf& a, const Packet1Xbf& b) {
|
||||
return F32ToBf16(padd<Packet2Xf>(Bf16ToF32(a), Bf16ToF32(b)));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet1Xbf psub<Packet1Xbf>(const Packet1Xbf& a, const Packet1Xbf& b) {
|
||||
return F32ToBf16(psub<Packet2Xf>(Bf16ToF32(a), Bf16ToF32(b)));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet1Xbf pnegate(const Packet1Xbf& a) {
|
||||
return __riscv_vreinterpret_v_u16m1_bf16m1(__riscv_vxor_vx_u16m1(
|
||||
__riscv_vreinterpret_v_bf16m1_u16m1(a), static_cast<numext::uint16_t>(0x8000u), unpacket_traits<Packet1Xs>::size));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet1Xbf psignbit(const Packet1Xbf& a) {
|
||||
return __riscv_vreinterpret_v_i16m1_bf16m1(__riscv_vsra_vx_i16m1(
|
||||
__riscv_vreinterpret_v_bf16m1_i16m1(a), 15, unpacket_traits<Packet1Xs>::size));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet1Xbf pconj(const Packet1Xbf& a) {
|
||||
return a;
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet1Xbf pmul<Packet1Xbf>(const Packet1Xbf& a, const Packet1Xbf& b) {
|
||||
Packet2Xf c;
|
||||
return F32ToBf16(__riscv_vfwmaccbf16_vv_f32m2(pzero<Packet2Xf>(c), a, b, unpacket_traits<Packet1Xbf>::size));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet1Xbf pdiv<Packet1Xbf>(const Packet1Xbf& a, const Packet1Xbf& b) {
|
||||
return F32ToBf16(pdiv<Packet2Xf>(Bf16ToF32(a), Bf16ToF32(b)));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet1Xbf pmadd(const Packet1Xbf& a, const Packet1Xbf& b, const Packet1Xbf& c) {
|
||||
return F32ToBf16(__riscv_vfwmaccbf16_vv_f32m2(Bf16ToF32(c), a, b, unpacket_traits<Packet1Xbf>::size));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet1Xbf pmsub(const Packet1Xbf& a, const Packet1Xbf& b, const Packet1Xbf& c) {
|
||||
return F32ToBf16(__riscv_vfwmaccbf16_vv_f32m2(Bf16ToF32(pnegate<Packet1Xbf>(c)), a, b, unpacket_traits<Packet1Xbf>::size));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet1Xbf pnmadd(const Packet1Xbf& a, const Packet1Xbf& b, const Packet1Xbf& c) {
|
||||
return F32ToBf16(__riscv_vfwmaccbf16_vv_f32m2(Bf16ToF32(c), pnegate<Packet1Xbf>(a), b, unpacket_traits<Packet1Xbf>::size));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet1Xbf pnmsub(const Packet1Xbf& a, const Packet1Xbf& b, const Packet1Xbf& c) {
|
||||
return pnegate<Packet1Xbf>(F32ToBf16(__riscv_vfwmaccbf16_vv_f32m2(Bf16ToF32(c), a, b, unpacket_traits<Packet1Xbf>::size)));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet1Xbf pmin<Packet1Xbf>(const Packet1Xbf& a, const Packet1Xbf& b) {
|
||||
return F32ToBf16(pmin<Packet2Xf>(Bf16ToF32(a), Bf16ToF32(b)));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet1Xbf pmin<PropagateNaN, Packet1Xbf>(const Packet1Xbf& a, const Packet1Xbf& b) {
|
||||
return F32ToBf16(pmin<PropagateNaN, Packet2Xf>(Bf16ToF32(a), Bf16ToF32(b)));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet1Xbf pmin<PropagateNumbers, Packet1Xbf>(const Packet1Xbf& a, const Packet1Xbf& b) {
|
||||
return F32ToBf16(pmin<PropagateNumbers, Packet2Xf>(Bf16ToF32(a), Bf16ToF32(b)));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet1Xbf pmax<Packet1Xbf>(const Packet1Xbf& a, const Packet1Xbf& b) {
|
||||
return F32ToBf16(pmax<Packet2Xf>(Bf16ToF32(a), Bf16ToF32(b)));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet1Xbf pmax<PropagateNaN, Packet1Xbf>(const Packet1Xbf& a, const Packet1Xbf& b) {
|
||||
return F32ToBf16(pmax<PropagateNaN, Packet2Xf>(Bf16ToF32(a), Bf16ToF32(b)));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet1Xbf pmax<PropagateNumbers, Packet1Xbf>(const Packet1Xbf& a, const Packet1Xbf& b) {
|
||||
return F32ToBf16(pmax<PropagateNumbers, Packet2Xf>(Bf16ToF32(a), Bf16ToF32(b)));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet1Xbf pcmp_le<Packet1Xbf>(const Packet1Xbf& a, const Packet1Xbf& b) {
|
||||
return F32ToBf16(pcmp_le<Packet2Xf>(Bf16ToF32(a), Bf16ToF32(b)));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet1Xbf pcmp_lt<Packet1Xbf>(const Packet1Xbf& a, const Packet1Xbf& b) {
|
||||
return F32ToBf16(pcmp_lt<Packet2Xf>(Bf16ToF32(a), Bf16ToF32(b)));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet1Xbf pcmp_eq<Packet1Xbf>(const Packet1Xbf& a, const Packet1Xbf& b) {
|
||||
return F32ToBf16(pcmp_eq<Packet2Xf>(Bf16ToF32(a), Bf16ToF32(b)));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet1Xbf pcmp_lt_or_nan<Packet1Xbf>(const Packet1Xbf& a, const Packet1Xbf& b) {
|
||||
return F32ToBf16(pcmp_lt_or_nan<Packet2Xf>(Bf16ToF32(a), Bf16ToF32(b)));
|
||||
}
|
||||
|
||||
// Logical Operations are not supported for bfloat16, so reinterpret casts
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet1Xbf pand<Packet1Xbf>(const Packet1Xbf& a, const Packet1Xbf& b) {
|
||||
return __riscv_vreinterpret_v_u16m1_bf16m1(__riscv_vand_vv_u16m1(
|
||||
__riscv_vreinterpret_v_bf16m1_u16m1(a), __riscv_vreinterpret_v_bf16m1_u16m1(b), unpacket_traits<Packet1Xbf>::size));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet1Xbf por<Packet1Xbf>(const Packet1Xbf& a, const Packet1Xbf& b) {
|
||||
return __riscv_vreinterpret_v_u16m1_bf16m1(__riscv_vor_vv_u16m1(
|
||||
__riscv_vreinterpret_v_bf16m1_u16m1(a), __riscv_vreinterpret_v_bf16m1_u16m1(b), unpacket_traits<Packet1Xbf>::size));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet1Xbf pxor<Packet1Xbf>(const Packet1Xbf& a, const Packet1Xbf& b) {
|
||||
return __riscv_vreinterpret_v_u16m1_bf16m1(__riscv_vxor_vv_u16m1(
|
||||
__riscv_vreinterpret_v_bf16m1_u16m1(a), __riscv_vreinterpret_v_bf16m1_u16m1(b), unpacket_traits<Packet1Xbf>::size));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet1Xbf pandnot<Packet1Xbf>(const Packet1Xbf& a, const Packet1Xbf& b) {
|
||||
return __riscv_vreinterpret_v_u16m1_bf16m1(__riscv_vand_vv_u16m1(
|
||||
__riscv_vreinterpret_v_bf16m1_u16m1(a),
|
||||
__riscv_vnot_v_u16m1(__riscv_vreinterpret_v_bf16m1_u16m1(b), unpacket_traits<Packet1Xbf>::size),
|
||||
unpacket_traits<Packet1Xbf>::size));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet1Xbf pload<Packet1Xbf>(const bfloat16* from) {
|
||||
EIGEN_DEBUG_ALIGNED_LOAD return __riscv_vle16_v_bf16m1(reinterpret_cast<const __bf16*>(from),
|
||||
unpacket_traits<Packet1Xbf>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet1Xbf ploadu<Packet1Xbf>(const bfloat16* from) {
|
||||
EIGEN_DEBUG_UNALIGNED_LOAD return __riscv_vle16_v_bf16m1(reinterpret_cast<const __bf16*>(from),
|
||||
unpacket_traits<Packet1Xbf>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet1Xbf ploaddup<Packet1Xbf>(const bfloat16* from) {
|
||||
Packet1Xsu idx = __riscv_vid_v_u16m1(unpacket_traits<Packet1Xbf>::size);
|
||||
idx = __riscv_vand_vx_u16m1(idx, static_cast<numext::uint16_t>(0xfffeu), unpacket_traits<Packet1Xbf>::size);
|
||||
return __riscv_vloxei16_v_bf16m1(reinterpret_cast<const __bf16*>(from), idx, unpacket_traits<Packet1Xbf>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet1Xbf ploadquad<Packet1Xbf>(const bfloat16* from) {
|
||||
Packet1Xsu idx = __riscv_vid_v_u16m1(unpacket_traits<Packet1Xbf>::size);
|
||||
idx = __riscv_vsrl_vx_u16m1(__riscv_vand_vx_u16m1(idx, static_cast<numext::uint16_t>(0xfffcu), unpacket_traits<Packet1Xbf>::size), 1,
|
||||
unpacket_traits<Packet1Xbf>::size);
|
||||
return __riscv_vloxei16_v_bf16m1(reinterpret_cast<const __bf16*>(from), idx, unpacket_traits<Packet1Xbf>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE void pstore<bfloat16>(bfloat16* to, const Packet1Xbf& from) {
|
||||
EIGEN_DEBUG_ALIGNED_STORE __riscv_vse16_v_bf16m1(reinterpret_cast<__bf16*>(to), from,
|
||||
unpacket_traits<Packet1Xbf>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE void pstoreu<bfloat16>(bfloat16* to, const Packet1Xbf& from) {
|
||||
EIGEN_DEBUG_UNALIGNED_STORE __riscv_vse16_v_bf16m1(reinterpret_cast<__bf16*>(to), from,
|
||||
unpacket_traits<Packet1Xbf>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_DEVICE_FUNC inline Packet1Xbf pgather<bfloat16, Packet1Xbf>(const bfloat16* from, Index stride) {
|
||||
return __riscv_vlse16_v_bf16m1(reinterpret_cast<const __bf16*>(from), stride * sizeof(bfloat16),
|
||||
unpacket_traits<Packet1Xbf>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_DEVICE_FUNC inline void pscatter<bfloat16, Packet1Xbf>(bfloat16* to, const Packet1Xbf& from, Index stride) {
|
||||
__riscv_vsse16(reinterpret_cast<__bf16*>(to), stride * sizeof(bfloat16), from, unpacket_traits<Packet1Xbf>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE bfloat16 pfirst<Packet1Xbf>(const Packet1Xbf& a) {
|
||||
return numext::bit_cast<bfloat16>(__riscv_vmv_x_s_i16m1_i16(__riscv_vreinterpret_v_bf16m1_i16m1(a)));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet1Xbf psqrt(const Packet1Xbf& a) {
|
||||
return F32ToBf16(psqrt<Packet2Xf>(Bf16ToF32(a)));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet1Xbf print<Packet1Xbf>(const Packet1Xbf& a) {
|
||||
return F32ToBf16(print<Packet2Xf>(Bf16ToF32(a)));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet1Xbf pfloor<Packet1Xbf>(const Packet1Xbf& a) {
|
||||
return F32ToBf16(pfloor<Packet2Xf>(Bf16ToF32(a)));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet1Xbf preverse(const Packet1Xbf& a) {
|
||||
return __riscv_vreinterpret_v_i16m1_bf16m1(preverse<Packet1Xs>(__riscv_vreinterpret_v_bf16m1_i16m1(a)));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE bfloat16 predux<Packet1Xbf>(const Packet1Xbf& a) {
|
||||
return static_cast<bfloat16>(predux<Packet2Xf>(Bf16ToF32(a)));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE bfloat16 predux_mul<Packet1Xbf>(const Packet1Xbf& a) {
|
||||
return static_cast<bfloat16>(predux_mul<Packet2Xf>(Bf16ToF32(a)));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE bfloat16 predux_min<Packet1Xbf>(const Packet1Xbf& a) {
|
||||
return static_cast<bfloat16>(predux_min<Packet2Xf>(Bf16ToF32(a)));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE bfloat16 predux_max<Packet1Xbf>(const Packet1Xbf& a) {
|
||||
return static_cast<bfloat16>(predux_max<Packet2Xf>(Bf16ToF32(a)));
|
||||
}
|
||||
|
||||
template <int N>
|
||||
EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet1Xbf, N>& kernel) {
|
||||
bfloat16 buffer[unpacket_traits<Packet1Xbf>::size * N];
|
||||
int i = 0;
|
||||
|
||||
for (i = 0; i < N; i++) {
|
||||
__riscv_vsse16(reinterpret_cast<__bf16*>(&buffer[i]), N * sizeof(bfloat16), kernel.packet[i],
|
||||
unpacket_traits<Packet1Xbf>::size);
|
||||
}
|
||||
|
||||
for (i = 0; i < N; i++) {
|
||||
kernel.packet[i] = __riscv_vle16_v_bf16m1(reinterpret_cast<__bf16*>(&buffer[i * unpacket_traits<Packet1Xbf>::size]),
|
||||
unpacket_traits<Packet1Xbf>::size);
|
||||
}
|
||||
}
|
||||
|
||||
/********************************* Packet2Xbf ************************************/
|
||||
|
||||
EIGEN_STRONG_INLINE Packet4Xf Bf16ToF32(const Packet2Xbf& a) {
|
||||
return __riscv_vfwcvtbf16_f_f_v_f32m4(a, unpacket_traits<Packet2Xbf>::size);
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE Packet2Xbf F32ToBf16(const Packet4Xf& a) {
|
||||
return __riscv_vfncvtbf16_f_f_w_bf16m2(a, unpacket_traits<Packet4Xf>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2Xbf ptrue<Packet2Xbf>(const Packet2Xbf& /*a*/) {
|
||||
return __riscv_vreinterpret_bf16m2(__riscv_vmv_v_x_u16m2(static_cast<numext::uint16_t>(0xffffu), unpacket_traits<Packet2Xbf>::size));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2Xbf pzero<Packet2Xbf>(const Packet2Xbf& /*a*/) {
|
||||
return __riscv_vreinterpret_bf16m2(
|
||||
__riscv_vmv_v_x_i16m2(numext::bit_cast<int16_t>(static_cast<__bf16>(0.0)), unpacket_traits<Packet2Xbf>::size));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2Xbf pabs(const Packet2Xbf& a) {
|
||||
return __riscv_vreinterpret_v_u16m2_bf16m2(__riscv_vand_vx_u16m2(
|
||||
__riscv_vreinterpret_v_bf16m2_u16m2(a), static_cast<numext::uint16_t>(0x7fffu), unpacket_traits<Packet2Xs>::size));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2Xbf pset1<Packet2Xbf>(const bfloat16& from) {
|
||||
return __riscv_vreinterpret_bf16m2(
|
||||
__riscv_vmv_v_x_i16m2(numext::bit_cast<int16_t>(from), unpacket_traits<Packet2Xbf>::size));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2Xbf pset1frombits<Packet2Xbf>(numext::uint16_t from) {
|
||||
return __riscv_vreinterpret_bf16m2(__riscv_vmv_v_x_u16m2(from, unpacket_traits<Packet2Xbf>::size));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2Xbf plset<Packet2Xbf>(const bfloat16& a) {
|
||||
return F32ToBf16(plset<Packet4Xf>(static_cast<float>(a)));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2Xbf padd<Packet2Xbf>(const Packet2Xbf& a, const Packet2Xbf& b) {
|
||||
return F32ToBf16(padd<Packet4Xf>(Bf16ToF32(a), Bf16ToF32(b)));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2Xbf psub<Packet2Xbf>(const Packet2Xbf& a, const Packet2Xbf& b) {
|
||||
return F32ToBf16(psub<Packet4Xf>(Bf16ToF32(a), Bf16ToF32(b)));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2Xbf pnegate(const Packet2Xbf& a) {
|
||||
return __riscv_vreinterpret_v_u16m2_bf16m2(__riscv_vxor_vx_u16m2(
|
||||
__riscv_vreinterpret_v_bf16m2_u16m2(a), static_cast<numext::uint16_t>(0x8000u), unpacket_traits<Packet2Xs>::size));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2Xbf psignbit(const Packet2Xbf& a) {
|
||||
return __riscv_vreinterpret_v_i16m2_bf16m2(__riscv_vsra_vx_i16m2(
|
||||
__riscv_vreinterpret_v_bf16m2_i16m2(a), 15, unpacket_traits<Packet2Xs>::size));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2Xbf pconj(const Packet2Xbf& a) {
|
||||
return a;
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2Xbf pmul<Packet2Xbf>(const Packet2Xbf& a, const Packet2Xbf& b) {
|
||||
Packet4Xf c;
|
||||
return F32ToBf16(__riscv_vfwmaccbf16_vv_f32m4(pzero<Packet4Xf>(c), a, b, unpacket_traits<Packet2Xbf>::size));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2Xbf pdiv<Packet2Xbf>(const Packet2Xbf& a, const Packet2Xbf& b) {
|
||||
return F32ToBf16(pdiv<Packet4Xf>(Bf16ToF32(a), Bf16ToF32(b)));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2Xbf pmadd(const Packet2Xbf& a, const Packet2Xbf& b, const Packet2Xbf& c) {
|
||||
return F32ToBf16(__riscv_vfwmaccbf16_vv_f32m4(Bf16ToF32(c), a, b, unpacket_traits<Packet2Xbf>::size));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2Xbf pmsub(const Packet2Xbf& a, const Packet2Xbf& b, const Packet2Xbf& c) {
|
||||
return F32ToBf16(__riscv_vfwmaccbf16_vv_f32m4(Bf16ToF32(pnegate<Packet2Xbf>(c)), a, b, unpacket_traits<Packet2Xbf>::size));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2Xbf pnmadd(const Packet2Xbf& a, const Packet2Xbf& b, const Packet2Xbf& c) {
|
||||
return F32ToBf16(__riscv_vfwmaccbf16_vv_f32m4(Bf16ToF32(c), pnegate<Packet2Xbf>(a), b, unpacket_traits<Packet2Xbf>::size));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2Xbf pnmsub(const Packet2Xbf& a, const Packet2Xbf& b, const Packet2Xbf& c) {
|
||||
return pnegate<Packet2Xbf>(F32ToBf16(__riscv_vfwmaccbf16_vv_f32m4(Bf16ToF32(c), a, b, unpacket_traits<Packet2Xbf>::size)));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2Xbf pmin<Packet2Xbf>(const Packet2Xbf& a, const Packet2Xbf& b) {
|
||||
return F32ToBf16(pmin<Packet4Xf>(Bf16ToF32(a), Bf16ToF32(b)));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2Xbf pmin<PropagateNaN, Packet2Xbf>(const Packet2Xbf& a, const Packet2Xbf& b) {
|
||||
return F32ToBf16(pmin<PropagateNaN, Packet4Xf>(Bf16ToF32(a), Bf16ToF32(b)));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2Xbf pmin<PropagateNumbers, Packet2Xbf>(const Packet2Xbf& a, const Packet2Xbf& b) {
|
||||
return F32ToBf16(pmin<PropagateNumbers, Packet4Xf>(Bf16ToF32(a), Bf16ToF32(b)));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2Xbf pmax<Packet2Xbf>(const Packet2Xbf& a, const Packet2Xbf& b) {
|
||||
return F32ToBf16(pmax<Packet4Xf>(Bf16ToF32(a), Bf16ToF32(b)));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2Xbf pmax<PropagateNaN, Packet2Xbf>(const Packet2Xbf& a, const Packet2Xbf& b) {
|
||||
return F32ToBf16(pmax<PropagateNaN, Packet4Xf>(Bf16ToF32(a), Bf16ToF32(b)));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2Xbf pmax<PropagateNumbers, Packet2Xbf>(const Packet2Xbf& a, const Packet2Xbf& b) {
|
||||
return F32ToBf16(pmax<PropagateNumbers, Packet4Xf>(Bf16ToF32(a), Bf16ToF32(b)));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2Xbf pcmp_le<Packet2Xbf>(const Packet2Xbf& a, const Packet2Xbf& b) {
|
||||
return F32ToBf16(pcmp_le<Packet4Xf>(Bf16ToF32(a), Bf16ToF32(b)));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2Xbf pcmp_lt<Packet2Xbf>(const Packet2Xbf& a, const Packet2Xbf& b) {
|
||||
return F32ToBf16(pcmp_lt<Packet4Xf>(Bf16ToF32(a), Bf16ToF32(b)));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2Xbf pcmp_eq<Packet2Xbf>(const Packet2Xbf& a, const Packet2Xbf& b) {
|
||||
return F32ToBf16(pcmp_eq<Packet4Xf>(Bf16ToF32(a), Bf16ToF32(b)));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2Xbf pcmp_lt_or_nan<Packet2Xbf>(const Packet2Xbf& a, const Packet2Xbf& b) {
|
||||
return F32ToBf16(pcmp_lt_or_nan<Packet4Xf>(Bf16ToF32(a), Bf16ToF32(b)));
|
||||
}
|
||||
|
||||
// Logical Operations are not supported for bflaot16, so reinterpret casts
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2Xbf pand<Packet2Xbf>(const Packet2Xbf& a, const Packet2Xbf& b) {
|
||||
return __riscv_vreinterpret_v_u16m2_bf16m2(__riscv_vand_vv_u16m2(__riscv_vreinterpret_v_bf16m2_u16m2(a),
|
||||
__riscv_vreinterpret_v_bf16m2_u16m2(b),
|
||||
unpacket_traits<Packet2Xbf>::size));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2Xbf por<Packet2Xbf>(const Packet2Xbf& a, const Packet2Xbf& b) {
|
||||
return __riscv_vreinterpret_v_u16m2_bf16m2(__riscv_vor_vv_u16m2(__riscv_vreinterpret_v_bf16m2_u16m2(a),
|
||||
__riscv_vreinterpret_v_bf16m2_u16m2(b),
|
||||
unpacket_traits<Packet2Xbf>::size));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2Xbf pxor<Packet2Xbf>(const Packet2Xbf& a, const Packet2Xbf& b) {
|
||||
return __riscv_vreinterpret_v_u16m2_bf16m2(__riscv_vxor_vv_u16m2(__riscv_vreinterpret_v_bf16m2_u16m2(a),
|
||||
__riscv_vreinterpret_v_bf16m2_u16m2(b),
|
||||
unpacket_traits<Packet2Xbf>::size));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2Xbf pandnot<Packet2Xbf>(const Packet2Xbf& a, const Packet2Xbf& b) {
|
||||
return __riscv_vreinterpret_v_u16m2_bf16m2(__riscv_vand_vv_u16m2(
|
||||
__riscv_vreinterpret_v_bf16m2_u16m2(a),
|
||||
__riscv_vnot_v_u16m2(__riscv_vreinterpret_v_bf16m2_u16m2(b), unpacket_traits<Packet2Xbf>::size),
|
||||
unpacket_traits<Packet2Xbf>::size));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2Xbf pload<Packet2Xbf>(const bfloat16* from) {
|
||||
EIGEN_DEBUG_ALIGNED_LOAD return __riscv_vle16_v_bf16m2(reinterpret_cast<const __bf16*>(from),
|
||||
unpacket_traits<Packet2Xbf>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2Xbf ploadu<Packet2Xbf>(const bfloat16* from) {
|
||||
EIGEN_DEBUG_UNALIGNED_LOAD return __riscv_vle16_v_bf16m2(reinterpret_cast<const __bf16*>(from),
|
||||
unpacket_traits<Packet2Xbf>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2Xbf ploaddup<Packet2Xbf>(const bfloat16* from) {
|
||||
Packet2Xsu idx = __riscv_vid_v_u16m2(unpacket_traits<Packet2Xbf>::size);
|
||||
idx = __riscv_vand_vx_u16m2(idx, static_cast<numext::uint16_t>(0xfffeu), unpacket_traits<Packet2Xbf>::size);
|
||||
return __riscv_vloxei16_v_bf16m2(reinterpret_cast<const __bf16*>(from), idx, unpacket_traits<Packet2Xbf>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2Xbf ploadquad<Packet2Xbf>(const bfloat16* from) {
|
||||
Packet2Xsu idx = __riscv_vid_v_u16m2(unpacket_traits<Packet2Xbf>::size);
|
||||
idx = __riscv_vsrl_vx_u16m2(__riscv_vand_vx_u16m2(idx, static_cast<numext::uint16_t>(0xfffcu), unpacket_traits<Packet2Xbf>::size), 1,
|
||||
unpacket_traits<Packet2Xs>::size);
|
||||
return __riscv_vloxei16_v_bf16m2(reinterpret_cast<const __bf16*>(from), idx, unpacket_traits<Packet2Xbf>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE void pstore<bfloat16>(bfloat16* to, const Packet2Xbf& from) {
|
||||
EIGEN_DEBUG_ALIGNED_STORE __riscv_vse16_v_bf16m2(reinterpret_cast<__bf16*>(to), from,
|
||||
unpacket_traits<Packet2Xbf>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE void pstoreu<bfloat16>(bfloat16* to, const Packet2Xbf& from) {
|
||||
EIGEN_DEBUG_UNALIGNED_STORE __riscv_vse16_v_bf16m2(reinterpret_cast<__bf16*>(to), from,
|
||||
unpacket_traits<Packet2Xbf>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_DEVICE_FUNC inline Packet2Xbf pgather<bfloat16, Packet2Xbf>(const bfloat16* from, Index stride) {
|
||||
return __riscv_vlse16_v_bf16m2(reinterpret_cast<const __bf16*>(from), stride * sizeof(bfloat16),
|
||||
unpacket_traits<Packet2Xbf>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_DEVICE_FUNC inline void pscatter<bfloat16, Packet2Xbf>(bfloat16* to, const Packet2Xbf& from,
|
||||
Index stride) {
|
||||
__riscv_vsse16(reinterpret_cast<__bf16*>(to), stride * sizeof(bfloat16), from,
|
||||
unpacket_traits<Packet2Xbf>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE bfloat16 pfirst<Packet2Xbf>(const Packet2Xbf& a) {
|
||||
return static_cast<bfloat16>(__riscv_vmv_x_s_i16m2_i16(__riscv_vreinterpret_v_bf16m2_i16m2(a)));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2Xbf psqrt(const Packet2Xbf& a) {
|
||||
return F32ToBf16(psqrt<Packet4Xf>(Bf16ToF32(a)));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2Xbf print<Packet2Xbf>(const Packet2Xbf& a) {
|
||||
return F32ToBf16(print<Packet4Xf>(Bf16ToF32(a)));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2Xbf pfloor<Packet2Xbf>(const Packet2Xbf& a) {
|
||||
return F32ToBf16(pfloor<Packet4Xf>(Bf16ToF32(a)));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2Xbf preverse(const Packet2Xbf& a) {
|
||||
return __riscv_vreinterpret_v_i16m2_bf16m2(preverse<Packet2Xs>(__riscv_vreinterpret_v_bf16m2_i16m2(a)));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE bfloat16 predux<Packet2Xbf>(const Packet2Xbf& a) {
|
||||
return static_cast<bfloat16>(predux<Packet4Xf>(Bf16ToF32(a)));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE bfloat16 predux_mul<Packet2Xbf>(const Packet2Xbf& a) {
|
||||
return static_cast<bfloat16>(predux_mul<Packet4Xf>(Bf16ToF32(a)));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE bfloat16 predux_min<Packet2Xbf>(const Packet2Xbf& a) {
|
||||
return static_cast<bfloat16>(predux_min<Packet4Xf>(Bf16ToF32(a)));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE bfloat16 predux_max<Packet2Xbf>(const Packet2Xbf& a) {
|
||||
return static_cast<bfloat16>(predux_max<Packet4Xf>(Bf16ToF32(a)));
|
||||
}
|
||||
|
||||
template <int N>
|
||||
EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet2Xbf, N>& kernel) {
|
||||
bfloat16 buffer[unpacket_traits<Packet2Xbf>::size * N];
|
||||
int i = 0;
|
||||
|
||||
for (i = 0; i < N; i++) {
|
||||
__riscv_vsse16(reinterpret_cast<__bf16*>(&buffer[i]), N * sizeof(bfloat16), kernel.packet[i],
|
||||
unpacket_traits<Packet2Xbf>::size);
|
||||
}
|
||||
|
||||
for (i = 0; i < N; i++) {
|
||||
kernel.packet[i] =
|
||||
__riscv_vle16_v_bf16m2(reinterpret_cast<__bf16*>(&buffer[i * unpacket_traits<Packet2Xbf>::size]),
|
||||
unpacket_traits<Packet2Xbf>::size);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Packet = Packet2Xbf>
|
||||
EIGEN_STRONG_INLINE
|
||||
typename std::enable_if<std::is_same<Packet, Packet2Xbf>::value && (unpacket_traits<Packet2Xbf>::size % 8) == 0,
|
||||
Packet1Xbf>::type
|
||||
predux_half(const Packet2Xbf& a) {
|
||||
return padd<Packet1Xbf>(__riscv_vget_v_bf16m2_bf16m1(a, 0), __riscv_vget_v_bf16m2_bf16m1(a, 1));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet1Xbf pcast<Packet1Xs, Packet1Xbf>(const Packet1Xs& a) {
|
||||
return __riscv_vreinterpret_v_i16m1_bf16m1(a);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2Xbf pcast<Packet2Xs, Packet2Xbf>(const Packet2Xs& a) {
|
||||
return __riscv_vreinterpret_v_i16m2_bf16m2(a);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet1Xs pcast<Packet1Xbf, Packet1Xs>(const Packet1Xbf& a) {
|
||||
return __riscv_vreinterpret_v_bf16m1_i16m1(a);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2Xs pcast<Packet2Xbf, Packet2Xs>(const Packet2Xbf& a) {
|
||||
return __riscv_vreinterpret_v_bf16m2_i16m2(a);
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
} // namespace Eigen
|
||||
|
||||
#endif // EIGEN_PACKET_MATH_BF16_RVV10_H
|
||||
@@ -110,7 +110,7 @@ template <>
|
||||
struct unpacket_traits<Packet1Xh> {
|
||||
typedef Eigen::half type;
|
||||
typedef Packet1Xh half; // Half not yet implemented
|
||||
typedef PacketXs integer_packet;
|
||||
typedef Packet1Xs integer_packet;
|
||||
typedef numext::uint8_t mask_t;
|
||||
|
||||
enum {
|
||||
@@ -138,351 +138,351 @@ struct unpacket_traits<Packet2Xh> {
|
||||
};
|
||||
};
|
||||
|
||||
/********************************* PacketXh ************************************/
|
||||
/********************************* Packet1Xh ************************************/
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXh ptrue<PacketXh>(const PacketXh& /*a*/) {
|
||||
return __riscv_vreinterpret_f16m1(__riscv_vmv_v_x_u16m1(0xffffu, unpacket_traits<PacketXh>::size));
|
||||
EIGEN_STRONG_INLINE Packet1Xh ptrue<Packet1Xh>(const Packet1Xh& /*a*/) {
|
||||
return __riscv_vreinterpret_f16m1(__riscv_vmv_v_x_u16m1(0xffffu, unpacket_traits<Packet1Xh>::size));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXh pzero<PacketXh>(const PacketXh& /*a*/) {
|
||||
return __riscv_vfmv_v_f_f16m1(static_cast<_Float16>(0.0), unpacket_traits<PacketXh>::size);
|
||||
EIGEN_STRONG_INLINE Packet1Xh pzero<Packet1Xh>(const Packet1Xh& /*a*/) {
|
||||
return __riscv_vfmv_v_f_f16m1(static_cast<_Float16>(0.0), unpacket_traits<Packet1Xh>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXh pabs(const PacketXh& a) {
|
||||
return __riscv_vfabs_v_f16m1(a, unpacket_traits<PacketXh>::size);
|
||||
EIGEN_STRONG_INLINE Packet1Xh pabs(const Packet1Xh& a) {
|
||||
return __riscv_vfabs_v_f16m1(a, unpacket_traits<Packet1Xh>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXh pset1<PacketXh>(const Eigen::half& from) {
|
||||
return __riscv_vfmv_v_f_f16m1(numext::bit_cast<_Float16>(from), unpacket_traits<PacketXh>::size);
|
||||
EIGEN_STRONG_INLINE Packet1Xh pset1<Packet1Xh>(const Eigen::half& from) {
|
||||
return __riscv_vfmv_v_f_f16m1(numext::bit_cast<_Float16>(from), unpacket_traits<Packet1Xh>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXh pset1frombits<PacketXh>(numext::uint16_t from) {
|
||||
return __riscv_vreinterpret_f16m1(__riscv_vmv_v_x_u16m1(from, unpacket_traits<PacketXh>::size));
|
||||
EIGEN_STRONG_INLINE Packet1Xh pset1frombits<Packet1Xh>(numext::uint16_t from) {
|
||||
return __riscv_vreinterpret_f16m1(__riscv_vmv_v_x_u16m1(from, unpacket_traits<Packet1Xh>::size));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXh plset<PacketXh>(const Eigen::half& a) {
|
||||
PacketXh idx =
|
||||
__riscv_vfcvt_f_x_v_f16m1(__riscv_vreinterpret_v_u16m1_i16m1(__riscv_vid_v_u16m1(unpacket_traits<PacketXs>::size)),
|
||||
unpacket_traits<PacketXh>::size);
|
||||
return __riscv_vfadd_vf_f16m1(idx, numext::bit_cast<_Float16>(a), unpacket_traits<PacketXh>::size);
|
||||
EIGEN_STRONG_INLINE Packet1Xh plset<Packet1Xh>(const Eigen::half& a) {
|
||||
Packet1Xh idx =
|
||||
__riscv_vfcvt_f_x_v_f16m1(__riscv_vreinterpret_v_u16m1_i16m1(__riscv_vid_v_u16m1(unpacket_traits<Packet1Xs>::size)),
|
||||
unpacket_traits<Packet1Xh>::size);
|
||||
return __riscv_vfadd_vf_f16m1(idx, numext::bit_cast<_Float16>(a), unpacket_traits<Packet1Xh>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXh padd<PacketXh>(const PacketXh& a, const PacketXh& b) {
|
||||
return __riscv_vfadd_vv_f16m1(a, b, unpacket_traits<PacketXh>::size);
|
||||
EIGEN_STRONG_INLINE Packet1Xh padd<Packet1Xh>(const Packet1Xh& a, const Packet1Xh& b) {
|
||||
return __riscv_vfadd_vv_f16m1(a, b, unpacket_traits<Packet1Xh>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXh psub<PacketXh>(const PacketXh& a, const PacketXh& b) {
|
||||
return __riscv_vfsub_vv_f16m1(a, b, unpacket_traits<PacketXh>::size);
|
||||
EIGEN_STRONG_INLINE Packet1Xh psub<Packet1Xh>(const Packet1Xh& a, const Packet1Xh& b) {
|
||||
return __riscv_vfsub_vv_f16m1(a, b, unpacket_traits<Packet1Xh>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXh pnegate(const PacketXh& a) {
|
||||
return __riscv_vfneg_v_f16m1(a, unpacket_traits<PacketXh>::size);
|
||||
EIGEN_STRONG_INLINE Packet1Xh pnegate(const Packet1Xh& a) {
|
||||
return __riscv_vfneg_v_f16m1(a, unpacket_traits<Packet1Xh>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXh pconj(const PacketXh& a) {
|
||||
EIGEN_STRONG_INLINE Packet1Xh pconj(const Packet1Xh& a) {
|
||||
return a;
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXh pmul<PacketXh>(const PacketXh& a, const PacketXh& b) {
|
||||
return __riscv_vfmul_vv_f16m1(a, b, unpacket_traits<PacketXh>::size);
|
||||
EIGEN_STRONG_INLINE Packet1Xh pmul<Packet1Xh>(const Packet1Xh& a, const Packet1Xh& b) {
|
||||
return __riscv_vfmul_vv_f16m1(a, b, unpacket_traits<Packet1Xh>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXh pdiv<PacketXh>(const PacketXh& a, const PacketXh& b) {
|
||||
return __riscv_vfdiv_vv_f16m1(a, b, unpacket_traits<PacketXh>::size);
|
||||
EIGEN_STRONG_INLINE Packet1Xh pdiv<Packet1Xh>(const Packet1Xh& a, const Packet1Xh& b) {
|
||||
return __riscv_vfdiv_vv_f16m1(a, b, unpacket_traits<Packet1Xh>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXh pmadd(const PacketXh& a, const PacketXh& b, const PacketXh& c) {
|
||||
return __riscv_vfmadd_vv_f16m1(a, b, c, unpacket_traits<PacketXh>::size);
|
||||
EIGEN_STRONG_INLINE Packet1Xh pmadd(const Packet1Xh& a, const Packet1Xh& b, const Packet1Xh& c) {
|
||||
return __riscv_vfmadd_vv_f16m1(a, b, c, unpacket_traits<Packet1Xh>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXh pmsub(const PacketXh& a, const PacketXh& b, const PacketXh& c) {
|
||||
return __riscv_vfmsub_vv_f16m1(a, b, c, unpacket_traits<PacketXh>::size);
|
||||
EIGEN_STRONG_INLINE Packet1Xh pmsub(const Packet1Xh& a, const Packet1Xh& b, const Packet1Xh& c) {
|
||||
return __riscv_vfmsub_vv_f16m1(a, b, c, unpacket_traits<Packet1Xh>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXh pnmadd(const PacketXh& a, const PacketXh& b, const PacketXh& c) {
|
||||
return __riscv_vfnmsub_vv_f16m1(a, b, c, unpacket_traits<PacketXh>::size);
|
||||
EIGEN_STRONG_INLINE Packet1Xh pnmadd(const Packet1Xh& a, const Packet1Xh& b, const Packet1Xh& c) {
|
||||
return __riscv_vfnmsub_vv_f16m1(a, b, c, unpacket_traits<Packet1Xh>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXh pnmsub(const PacketXh& a, const PacketXh& b, const PacketXh& c) {
|
||||
return __riscv_vfnmadd_vv_f16m1(a, b, c, unpacket_traits<PacketXh>::size);
|
||||
EIGEN_STRONG_INLINE Packet1Xh pnmsub(const Packet1Xh& a, const Packet1Xh& b, const Packet1Xh& c) {
|
||||
return __riscv_vfnmadd_vv_f16m1(a, b, c, unpacket_traits<Packet1Xh>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXh pmin<PacketXh>(const PacketXh& a, const PacketXh& b) {
|
||||
EIGEN_STRONG_INLINE Packet1Xh pmin<Packet1Xh>(const Packet1Xh& a, const Packet1Xh& b) {
|
||||
const Eigen::half nan = (std::numeric_limits<Eigen::half>::quiet_NaN)();
|
||||
PacketXh nans =
|
||||
__riscv_vfmv_v_f_f16m1(numext::bit_cast<_Float16>(nan), unpacket_traits<PacketXh>::size);
|
||||
PacketMask16 mask = __riscv_vmfeq_vv_f16m1_b16(a, a, unpacket_traits<PacketXh>::size);
|
||||
PacketMask16 mask2 = __riscv_vmfeq_vv_f16m1_b16(b, b, unpacket_traits<PacketXh>::size);
|
||||
mask = __riscv_vmand_mm_b16(mask, mask2, unpacket_traits<PacketXh>::size);
|
||||
Packet1Xh nans =
|
||||
__riscv_vfmv_v_f_f16m1(numext::bit_cast<_Float16>(nan), unpacket_traits<Packet1Xh>::size);
|
||||
PacketMask16 mask = __riscv_vmfeq_vv_f16m1_b16(a, a, unpacket_traits<Packet1Xh>::size);
|
||||
PacketMask16 mask2 = __riscv_vmfeq_vv_f16m1_b16(b, b, unpacket_traits<Packet1Xh>::size);
|
||||
mask = __riscv_vmand_mm_b16(mask, mask2, unpacket_traits<Packet1Xh>::size);
|
||||
|
||||
return __riscv_vfmin_vv_f16m1_tumu(mask, nans, a, b, unpacket_traits<PacketXh>::size);
|
||||
return __riscv_vfmin_vv_f16m1_tumu(mask, nans, a, b, unpacket_traits<Packet1Xh>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXh pmin<PropagateNaN, PacketXh>(const PacketXh& a, const PacketXh& b) {
|
||||
return pmin<PacketXh>(a, b);
|
||||
EIGEN_STRONG_INLINE Packet1Xh pmin<PropagateNaN, Packet1Xh>(const Packet1Xh& a, const Packet1Xh& b) {
|
||||
return pmin<Packet1Xh>(a, b);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXh pmin<PropagateNumbers, PacketXh>(const PacketXh& a, const PacketXh& b) {
|
||||
return __riscv_vfmin_vv_f16m1(a, b, unpacket_traits<PacketXh>::size);
|
||||
EIGEN_STRONG_INLINE Packet1Xh pmin<PropagateNumbers, Packet1Xh>(const Packet1Xh& a, const Packet1Xh& b) {
|
||||
return __riscv_vfmin_vv_f16m1(a, b, unpacket_traits<Packet1Xh>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXh pmax<PacketXh>(const PacketXh& a, const PacketXh& b) {
|
||||
EIGEN_STRONG_INLINE Packet1Xh pmax<Packet1Xh>(const Packet1Xh& a, const Packet1Xh& b) {
|
||||
const Eigen::half nan = (std::numeric_limits<Eigen::half>::quiet_NaN)();
|
||||
PacketXh nans =
|
||||
__riscv_vfmv_v_f_f16m1(numext::bit_cast<_Float16>(nan), unpacket_traits<PacketXh>::size);
|
||||
PacketMask16 mask = __riscv_vmfeq_vv_f16m1_b16(a, a, unpacket_traits<PacketXh>::size);
|
||||
PacketMask16 mask2 = __riscv_vmfeq_vv_f16m1_b16(b, b, unpacket_traits<PacketXh>::size);
|
||||
mask = __riscv_vmand_mm_b16(mask, mask2, unpacket_traits<PacketXh>::size);
|
||||
Packet1Xh nans =
|
||||
__riscv_vfmv_v_f_f16m1(numext::bit_cast<_Float16>(nan), unpacket_traits<Packet1Xh>::size);
|
||||
PacketMask16 mask = __riscv_vmfeq_vv_f16m1_b16(a, a, unpacket_traits<Packet1Xh>::size);
|
||||
PacketMask16 mask2 = __riscv_vmfeq_vv_f16m1_b16(b, b, unpacket_traits<Packet1Xh>::size);
|
||||
mask = __riscv_vmand_mm_b16(mask, mask2, unpacket_traits<Packet1Xh>::size);
|
||||
|
||||
return __riscv_vfmax_vv_f16m1_tumu(mask, nans, a, b, unpacket_traits<PacketXh>::size);
|
||||
return __riscv_vfmax_vv_f16m1_tumu(mask, nans, a, b, unpacket_traits<Packet1Xh>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXh pmax<PropagateNaN, PacketXh>(const PacketXh& a, const PacketXh& b) {
|
||||
return pmax<PacketXh>(a, b);
|
||||
EIGEN_STRONG_INLINE Packet1Xh pmax<PropagateNaN, Packet1Xh>(const Packet1Xh& a, const Packet1Xh& b) {
|
||||
return pmax<Packet1Xh>(a, b);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXh pmax<PropagateNumbers, PacketXh>(const PacketXh& a, const PacketXh& b) {
|
||||
return __riscv_vfmax_vv_f16m1(a, b, unpacket_traits<PacketXh>::size);
|
||||
EIGEN_STRONG_INLINE Packet1Xh pmax<PropagateNumbers, Packet1Xh>(const Packet1Xh& a, const Packet1Xh& b) {
|
||||
return __riscv_vfmax_vv_f16m1(a, b, unpacket_traits<Packet1Xh>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXh pcmp_le<PacketXh>(const PacketXh& a, const PacketXh& b) {
|
||||
PacketMask16 mask = __riscv_vmfle_vv_f16m1_b16(a, b, unpacket_traits<PacketXh>::size);
|
||||
return __riscv_vmerge_vvm_f16m1(pzero<PacketXh>(a), ptrue<PacketXh>(a), mask, unpacket_traits<PacketXh>::size);
|
||||
EIGEN_STRONG_INLINE Packet1Xh pcmp_le<Packet1Xh>(const Packet1Xh& a, const Packet1Xh& b) {
|
||||
PacketMask16 mask = __riscv_vmfle_vv_f16m1_b16(a, b, unpacket_traits<Packet1Xh>::size);
|
||||
return __riscv_vmerge_vvm_f16m1(pzero<Packet1Xh>(a), ptrue<Packet1Xh>(a), mask, unpacket_traits<Packet1Xh>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXh pcmp_lt<PacketXh>(const PacketXh& a, const PacketXh& b) {
|
||||
PacketMask16 mask = __riscv_vmflt_vv_f16m1_b16(a, b, unpacket_traits<PacketXh>::size);
|
||||
return __riscv_vmerge_vvm_f16m1(pzero<PacketXh>(a), ptrue<PacketXh>(a), mask, unpacket_traits<PacketXh>::size);
|
||||
EIGEN_STRONG_INLINE Packet1Xh pcmp_lt<Packet1Xh>(const Packet1Xh& a, const Packet1Xh& b) {
|
||||
PacketMask16 mask = __riscv_vmflt_vv_f16m1_b16(a, b, unpacket_traits<Packet1Xh>::size);
|
||||
return __riscv_vmerge_vvm_f16m1(pzero<Packet1Xh>(a), ptrue<Packet1Xh>(a), mask, unpacket_traits<Packet1Xh>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXh pcmp_eq<PacketXh>(const PacketXh& a, const PacketXh& b) {
|
||||
PacketMask16 mask = __riscv_vmfeq_vv_f16m1_b16(a, b, unpacket_traits<PacketXh>::size);
|
||||
return __riscv_vmerge_vvm_f16m1(pzero<PacketXh>(a), ptrue<PacketXh>(a), mask, unpacket_traits<PacketXh>::size);
|
||||
EIGEN_STRONG_INLINE Packet1Xh pcmp_eq<Packet1Xh>(const Packet1Xh& a, const Packet1Xh& b) {
|
||||
PacketMask16 mask = __riscv_vmfeq_vv_f16m1_b16(a, b, unpacket_traits<Packet1Xh>::size);
|
||||
return __riscv_vmerge_vvm_f16m1(pzero<Packet1Xh>(a), ptrue<Packet1Xh>(a), mask, unpacket_traits<Packet1Xh>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXh pcmp_lt_or_nan<PacketXh>(const PacketXh& a, const PacketXh& b) {
|
||||
PacketMask16 mask = __riscv_vmfge_vv_f16m1_b16(a, b, unpacket_traits<PacketXh>::size);
|
||||
return __riscv_vfmerge_vfm_f16m1(ptrue<PacketXh>(a), static_cast<_Float16>(0.0), mask,
|
||||
unpacket_traits<PacketXh>::size);
|
||||
EIGEN_STRONG_INLINE Packet1Xh pcmp_lt_or_nan<Packet1Xh>(const Packet1Xh& a, const Packet1Xh& b) {
|
||||
PacketMask16 mask = __riscv_vmfge_vv_f16m1_b16(a, b, unpacket_traits<Packet1Xh>::size);
|
||||
return __riscv_vfmerge_vfm_f16m1(ptrue<Packet1Xh>(a), static_cast<_Float16>(0.0), mask,
|
||||
unpacket_traits<Packet1Xh>::size);
|
||||
}
|
||||
|
||||
// Logical Operations are not supported for half, so reinterpret casts
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXh pand<PacketXh>(const PacketXh& a, const PacketXh& b) {
|
||||
EIGEN_STRONG_INLINE Packet1Xh pand<Packet1Xh>(const Packet1Xh& a, const Packet1Xh& b) {
|
||||
return __riscv_vreinterpret_v_u16m1_f16m1(__riscv_vand_vv_u16m1(
|
||||
__riscv_vreinterpret_v_f16m1_u16m1(a), __riscv_vreinterpret_v_f16m1_u16m1(b), unpacket_traits<PacketXh>::size));
|
||||
__riscv_vreinterpret_v_f16m1_u16m1(a), __riscv_vreinterpret_v_f16m1_u16m1(b), unpacket_traits<Packet1Xh>::size));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXh por<PacketXh>(const PacketXh& a, const PacketXh& b) {
|
||||
EIGEN_STRONG_INLINE Packet1Xh por<Packet1Xh>(const Packet1Xh& a, const Packet1Xh& b) {
|
||||
return __riscv_vreinterpret_v_u16m1_f16m1(__riscv_vor_vv_u16m1(
|
||||
__riscv_vreinterpret_v_f16m1_u16m1(a), __riscv_vreinterpret_v_f16m1_u16m1(b), unpacket_traits<PacketXh>::size));
|
||||
__riscv_vreinterpret_v_f16m1_u16m1(a), __riscv_vreinterpret_v_f16m1_u16m1(b), unpacket_traits<Packet1Xh>::size));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXh pxor<PacketXh>(const PacketXh& a, const PacketXh& b) {
|
||||
EIGEN_STRONG_INLINE Packet1Xh pxor<Packet1Xh>(const Packet1Xh& a, const Packet1Xh& b) {
|
||||
return __riscv_vreinterpret_v_u16m1_f16m1(__riscv_vxor_vv_u16m1(
|
||||
__riscv_vreinterpret_v_f16m1_u16m1(a), __riscv_vreinterpret_v_f16m1_u16m1(b), unpacket_traits<PacketXh>::size));
|
||||
__riscv_vreinterpret_v_f16m1_u16m1(a), __riscv_vreinterpret_v_f16m1_u16m1(b), unpacket_traits<Packet1Xh>::size));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXh pandnot<PacketXh>(const PacketXh& a, const PacketXh& b) {
|
||||
EIGEN_STRONG_INLINE Packet1Xh pandnot<Packet1Xh>(const Packet1Xh& a, const Packet1Xh& b) {
|
||||
return __riscv_vreinterpret_v_u16m1_f16m1(__riscv_vand_vv_u16m1(
|
||||
__riscv_vreinterpret_v_f16m1_u16m1(a),
|
||||
__riscv_vnot_v_u16m1(__riscv_vreinterpret_v_f16m1_u16m1(b), unpacket_traits<PacketXh>::size),
|
||||
unpacket_traits<PacketXh>::size));
|
||||
__riscv_vnot_v_u16m1(__riscv_vreinterpret_v_f16m1_u16m1(b), unpacket_traits<Packet1Xh>::size),
|
||||
unpacket_traits<Packet1Xh>::size));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXh pload<PacketXh>(const Eigen::half* from) {
|
||||
EIGEN_STRONG_INLINE Packet1Xh pload<Packet1Xh>(const Eigen::half* from) {
|
||||
EIGEN_DEBUG_ALIGNED_LOAD return __riscv_vle16_v_f16m1(reinterpret_cast<const _Float16*>(from),
|
||||
unpacket_traits<PacketXh>::size);
|
||||
unpacket_traits<Packet1Xh>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXh ploadu<PacketXh>(const Eigen::half* from) {
|
||||
EIGEN_STRONG_INLINE Packet1Xh ploadu<Packet1Xh>(const Eigen::half* from) {
|
||||
EIGEN_DEBUG_UNALIGNED_LOAD return __riscv_vle16_v_f16m1(reinterpret_cast<const _Float16*>(from),
|
||||
unpacket_traits<PacketXh>::size);
|
||||
unpacket_traits<Packet1Xh>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXh ploaddup<PacketXh>(const Eigen::half* from) {
|
||||
PacketXsu idx = __riscv_vid_v_u16m1(unpacket_traits<PacketXh>::size);
|
||||
idx = __riscv_vand_vx_u16m1(idx, 0xfffeu, unpacket_traits<PacketXh>::size);
|
||||
return __riscv_vloxei16_v_f16m1(reinterpret_cast<const _Float16*>(from), idx, unpacket_traits<PacketXh>::size);
|
||||
EIGEN_STRONG_INLINE Packet1Xh ploaddup<Packet1Xh>(const Eigen::half* from) {
|
||||
Packet1Xsu idx = __riscv_vid_v_u16m1(unpacket_traits<Packet1Xh>::size);
|
||||
idx = __riscv_vand_vx_u16m1(idx, 0xfffeu, unpacket_traits<Packet1Xh>::size);
|
||||
return __riscv_vloxei16_v_f16m1(reinterpret_cast<const _Float16*>(from), idx, unpacket_traits<Packet1Xh>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXh ploadquad<PacketXh>(const Eigen::half* from) {
|
||||
PacketXsu idx = __riscv_vid_v_u16m1(unpacket_traits<PacketXh>::size);
|
||||
idx = __riscv_vsrl_vx_u16m1(__riscv_vand_vx_u16m1(idx, 0xfffcu, unpacket_traits<PacketXh>::size), 1,
|
||||
unpacket_traits<PacketXh>::size);
|
||||
return __riscv_vloxei16_v_f16m1(reinterpret_cast<const _Float16*>(from), idx, unpacket_traits<PacketXh>::size);
|
||||
EIGEN_STRONG_INLINE Packet1Xh ploadquad<Packet1Xh>(const Eigen::half* from) {
|
||||
Packet1Xsu idx = __riscv_vid_v_u16m1(unpacket_traits<Packet1Xh>::size);
|
||||
idx = __riscv_vsrl_vx_u16m1(__riscv_vand_vx_u16m1(idx, 0xfffcu, unpacket_traits<Packet1Xh>::size), 1,
|
||||
unpacket_traits<Packet1Xh>::size);
|
||||
return __riscv_vloxei16_v_f16m1(reinterpret_cast<const _Float16*>(from), idx, unpacket_traits<Packet1Xh>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE void pstore<Eigen::half>(Eigen::half* to, const PacketXh& from) {
|
||||
EIGEN_STRONG_INLINE void pstore<Eigen::half>(Eigen::half* to, const Packet1Xh& from) {
|
||||
EIGEN_DEBUG_ALIGNED_STORE __riscv_vse16_v_f16m1(reinterpret_cast<_Float16*>(to), from,
|
||||
unpacket_traits<PacketXh>::size);
|
||||
unpacket_traits<Packet1Xh>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE void pstoreu<Eigen::half>(Eigen::half* to, const PacketXh& from) {
|
||||
EIGEN_STRONG_INLINE void pstoreu<Eigen::half>(Eigen::half* to, const Packet1Xh& from) {
|
||||
EIGEN_DEBUG_UNALIGNED_STORE __riscv_vse16_v_f16m1(reinterpret_cast<_Float16*>(to), from,
|
||||
unpacket_traits<PacketXh>::size);
|
||||
unpacket_traits<Packet1Xh>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_DEVICE_FUNC inline PacketXh pgather<Eigen::half, PacketXh>(const Eigen::half* from, Index stride) {
|
||||
EIGEN_DEVICE_FUNC inline Packet1Xh pgather<Eigen::half, Packet1Xh>(const Eigen::half* from, Index stride) {
|
||||
return __riscv_vlse16_v_f16m1(reinterpret_cast<const _Float16*>(from), stride * sizeof(Eigen::half),
|
||||
unpacket_traits<PacketXh>::size);
|
||||
unpacket_traits<Packet1Xh>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_DEVICE_FUNC inline void pscatter<Eigen::half, PacketXh>(Eigen::half* to, const PacketXh& from, Index stride) {
|
||||
__riscv_vsse16(reinterpret_cast<_Float16*>(to), stride * sizeof(Eigen::half), from, unpacket_traits<PacketXh>::size);
|
||||
EIGEN_DEVICE_FUNC inline void pscatter<Eigen::half, Packet1Xh>(Eigen::half* to, const Packet1Xh& from, Index stride) {
|
||||
__riscv_vsse16(reinterpret_cast<_Float16*>(to), stride * sizeof(Eigen::half), from, unpacket_traits<Packet1Xh>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Eigen::half pfirst<PacketXh>(const PacketXh& a) {
|
||||
EIGEN_STRONG_INLINE Eigen::half pfirst<Packet1Xh>(const Packet1Xh& a) {
|
||||
return static_cast<Eigen::half>(__riscv_vfmv_f_s_f16m1_f16(a));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXh psqrt(const PacketXh& a) {
|
||||
return __riscv_vfsqrt_v_f16m1(a, unpacket_traits<PacketXh>::size);
|
||||
EIGEN_STRONG_INLINE Packet1Xh psqrt(const Packet1Xh& a) {
|
||||
return __riscv_vfsqrt_v_f16m1(a, unpacket_traits<Packet1Xh>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXh print<PacketXh>(const PacketXh& a) {
|
||||
const PacketXh limit = pset1<PacketXh>(static_cast<Eigen::half>(1 << 10));
|
||||
const PacketXh abs_a = pabs(a);
|
||||
EIGEN_STRONG_INLINE Packet1Xh print<Packet1Xh>(const Packet1Xh& a) {
|
||||
const Packet1Xh limit = pset1<Packet1Xh>(static_cast<Eigen::half>(1 << 10));
|
||||
const Packet1Xh abs_a = pabs(a);
|
||||
|
||||
PacketMask16 mask = __riscv_vmfne_vv_f16m1_b16(a, a, unpacket_traits<PacketXh>::size);
|
||||
const PacketXh x = __riscv_vfadd_vv_f16m1_tumu(mask, a, a, a, unpacket_traits<PacketXh>::size);
|
||||
const PacketXh new_x = __riscv_vfcvt_f_x_v_f16m1(__riscv_vfcvt_x_f_v_i16m1(a, unpacket_traits<PacketXh>::size),
|
||||
unpacket_traits<PacketXh>::size);
|
||||
PacketMask16 mask = __riscv_vmfne_vv_f16m1_b16(a, a, unpacket_traits<Packet1Xh>::size);
|
||||
const Packet1Xh x = __riscv_vfadd_vv_f16m1_tumu(mask, a, a, a, unpacket_traits<Packet1Xh>::size);
|
||||
const Packet1Xh new_x = __riscv_vfcvt_f_x_v_f16m1(__riscv_vfcvt_x_f_v_i16m1(a, unpacket_traits<Packet1Xh>::size),
|
||||
unpacket_traits<Packet1Xh>::size);
|
||||
|
||||
mask = __riscv_vmflt_vv_f16m1_b16(abs_a, limit, unpacket_traits<PacketXh>::size);
|
||||
PacketXh signed_x = __riscv_vfsgnj_vv_f16m1(new_x, x, unpacket_traits<PacketXh>::size);
|
||||
return __riscv_vmerge_vvm_f16m1(x, signed_x, mask, unpacket_traits<PacketXh>::size);
|
||||
mask = __riscv_vmflt_vv_f16m1_b16(abs_a, limit, unpacket_traits<Packet1Xh>::size);
|
||||
Packet1Xh signed_x = __riscv_vfsgnj_vv_f16m1(new_x, x, unpacket_traits<Packet1Xh>::size);
|
||||
return __riscv_vmerge_vvm_f16m1(x, signed_x, mask, unpacket_traits<Packet1Xh>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXh pfloor<PacketXh>(const PacketXh& a) {
|
||||
PacketXh tmp = print<PacketXh>(a);
|
||||
EIGEN_STRONG_INLINE Packet1Xh pfloor<Packet1Xh>(const Packet1Xh& a) {
|
||||
Packet1Xh tmp = print<Packet1Xh>(a);
|
||||
// If greater, subtract one.
|
||||
PacketMask16 mask = __riscv_vmflt_vv_f16m1_b16(a, tmp, unpacket_traits<PacketXh>::size);
|
||||
return __riscv_vfsub_vf_f16m1_tumu(mask, tmp, tmp, static_cast<_Float16>(1.0), unpacket_traits<PacketXh>::size);
|
||||
PacketMask16 mask = __riscv_vmflt_vv_f16m1_b16(a, tmp, unpacket_traits<Packet1Xh>::size);
|
||||
return __riscv_vfsub_vf_f16m1_tumu(mask, tmp, tmp, static_cast<_Float16>(1.0), unpacket_traits<Packet1Xh>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXh preverse(const PacketXh& a) {
|
||||
PacketXsu idx = __riscv_vrsub_vx_u16m1(__riscv_vid_v_u16m1(unpacket_traits<PacketXh>::size),
|
||||
unpacket_traits<PacketXh>::size - 1, unpacket_traits<PacketXh>::size);
|
||||
return __riscv_vrgather_vv_f16m1(a, idx, unpacket_traits<PacketXh>::size);
|
||||
EIGEN_STRONG_INLINE Packet1Xh preverse(const Packet1Xh& a) {
|
||||
Packet1Xsu idx = __riscv_vrsub_vx_u16m1(__riscv_vid_v_u16m1(unpacket_traits<Packet1Xh>::size),
|
||||
unpacket_traits<Packet1Xh>::size - 1, unpacket_traits<Packet1Xh>::size);
|
||||
return __riscv_vrgather_vv_f16m1(a, idx, unpacket_traits<Packet1Xh>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Eigen::half predux<PacketXh>(const PacketXh& a) {
|
||||
EIGEN_STRONG_INLINE Eigen::half predux<Packet1Xh>(const Packet1Xh& a) {
|
||||
return static_cast<Eigen::half>(__riscv_vfmv_f(__riscv_vfredusum_vs_f16m1_f16m1(
|
||||
a, __riscv_vfmv_v_f_f16m1(static_cast<_Float16>(0.0), unpacket_traits<PacketXh>::size),
|
||||
unpacket_traits<PacketXh>::size)));
|
||||
a, __riscv_vfmv_v_f_f16m1(static_cast<_Float16>(0.0), unpacket_traits<Packet1Xh>::size),
|
||||
unpacket_traits<Packet1Xh>::size)));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Eigen::half predux_mul<PacketXh>(const PacketXh& a) {
|
||||
EIGEN_STRONG_INLINE Eigen::half predux_mul<Packet1Xh>(const Packet1Xh& a) {
|
||||
// Multiply the vector by its reverse
|
||||
PacketXh prod = __riscv_vfmul_vv_f16m1(preverse(a), a, unpacket_traits<PacketXh>::size);
|
||||
PacketXh half_prod;
|
||||
Packet1Xh prod = __riscv_vfmul_vv_f16m1(preverse(a), a, unpacket_traits<Packet1Xh>::size);
|
||||
Packet1Xh half_prod;
|
||||
|
||||
if (EIGEN_RISCV64_RVV_VL >= 1024) {
|
||||
half_prod = __riscv_vslidedown_vx_f16m1(prod, 16, unpacket_traits<PacketXh>::size);
|
||||
prod = __riscv_vfmul_vv_f16m1(prod, half_prod, unpacket_traits<PacketXh>::size);
|
||||
half_prod = __riscv_vslidedown_vx_f16m1(prod, 16, unpacket_traits<Packet1Xh>::size);
|
||||
prod = __riscv_vfmul_vv_f16m1(prod, half_prod, unpacket_traits<Packet1Xh>::size);
|
||||
}
|
||||
if (EIGEN_RISCV64_RVV_VL >= 512) {
|
||||
half_prod = __riscv_vslidedown_vx_f16m1(prod, 8, unpacket_traits<PacketXh>::size);
|
||||
prod = __riscv_vfmul_vv_f16m1(prod, half_prod, unpacket_traits<PacketXh>::size);
|
||||
half_prod = __riscv_vslidedown_vx_f16m1(prod, 8, unpacket_traits<Packet1Xh>::size);
|
||||
prod = __riscv_vfmul_vv_f16m1(prod, half_prod, unpacket_traits<Packet1Xh>::size);
|
||||
}
|
||||
if (EIGEN_RISCV64_RVV_VL >= 256) {
|
||||
half_prod = __riscv_vslidedown_vx_f16m1(prod, 4, unpacket_traits<PacketXh>::size);
|
||||
prod = __riscv_vfmul_vv_f16m1(prod, half_prod, unpacket_traits<PacketXh>::size);
|
||||
half_prod = __riscv_vslidedown_vx_f16m1(prod, 4, unpacket_traits<Packet1Xh>::size);
|
||||
prod = __riscv_vfmul_vv_f16m1(prod, half_prod, unpacket_traits<Packet1Xh>::size);
|
||||
}
|
||||
// Last reduction
|
||||
half_prod = __riscv_vslidedown_vx_f16m1(prod, 2, unpacket_traits<PacketXh>::size);
|
||||
prod = __riscv_vfmul_vv_f16m1(prod, half_prod, unpacket_traits<PacketXh>::size);
|
||||
half_prod = __riscv_vslidedown_vx_f16m1(prod, 2, unpacket_traits<Packet1Xh>::size);
|
||||
prod = __riscv_vfmul_vv_f16m1(prod, half_prod, unpacket_traits<Packet1Xh>::size);
|
||||
|
||||
half_prod = __riscv_vslidedown_vx_f16m1(prod, 1, unpacket_traits<PacketXh>::size);
|
||||
prod = __riscv_vfmul_vv_f16m1(prod, half_prod, unpacket_traits<PacketXh>::size);
|
||||
half_prod = __riscv_vslidedown_vx_f16m1(prod, 1, unpacket_traits<Packet1Xh>::size);
|
||||
prod = __riscv_vfmul_vv_f16m1(prod, half_prod, unpacket_traits<Packet1Xh>::size);
|
||||
|
||||
// The reduction is done to the first element.
|
||||
return pfirst(prod);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Eigen::half predux_min<PacketXh>(const PacketXh& a) {
|
||||
EIGEN_STRONG_INLINE Eigen::half predux_min<Packet1Xh>(const Packet1Xh& a) {
|
||||
const Eigen::half max = (std::numeric_limits<Eigen::half>::max)();
|
||||
return static_cast<Eigen::half>(__riscv_vfmv_f(__riscv_vfredmin_vs_f16m1_f16m1(
|
||||
a, __riscv_vfmv_v_f_f16m1(numext::bit_cast<_Float16>(max), unpacket_traits<PacketXh>::size),
|
||||
unpacket_traits<PacketXh>::size)));
|
||||
a, __riscv_vfmv_v_f_f16m1(numext::bit_cast<_Float16>(max), unpacket_traits<Packet1Xh>::size),
|
||||
unpacket_traits<Packet1Xh>::size)));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Eigen::half predux_max<PacketXh>(const PacketXh& a) {
|
||||
EIGEN_STRONG_INLINE Eigen::half predux_max<Packet1Xh>(const Packet1Xh& a) {
|
||||
const Eigen::half min = (std::numeric_limits<Eigen::half>::min)();
|
||||
return static_cast<Eigen::half>(__riscv_vfmv_f(__riscv_vfredmax_vs_f16m1_f16m1(
|
||||
a, __riscv_vfmv_v_f_f16m1(numext::bit_cast<_Float16>(min), unpacket_traits<PacketXh>::size),
|
||||
unpacket_traits<PacketXh>::size)));
|
||||
a, __riscv_vfmv_v_f_f16m1(numext::bit_cast<_Float16>(min), unpacket_traits<Packet1Xh>::size),
|
||||
unpacket_traits<Packet1Xh>::size)));
|
||||
}
|
||||
|
||||
template <int N>
|
||||
EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<PacketXh, N>& kernel) {
|
||||
Eigen::half buffer[unpacket_traits<PacketXh>::size * N];
|
||||
EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet1Xh, N>& kernel) {
|
||||
Eigen::half buffer[unpacket_traits<Packet1Xh>::size * N];
|
||||
int i = 0;
|
||||
|
||||
for (i = 0; i < N; i++) {
|
||||
__riscv_vsse16(reinterpret_cast<_Float16*>(&buffer[i]), N * sizeof(Eigen::half), kernel.packet[i],
|
||||
unpacket_traits<PacketXh>::size);
|
||||
unpacket_traits<Packet1Xh>::size);
|
||||
}
|
||||
|
||||
for (i = 0; i < N; i++) {
|
||||
kernel.packet[i] = __riscv_vle16_v_f16m1(reinterpret_cast<_Float16*>(&buffer[i * unpacket_traits<PacketXh>::size]),
|
||||
unpacket_traits<PacketXh>::size);
|
||||
kernel.packet[i] = __riscv_vle16_v_f16m1(reinterpret_cast<_Float16*>(&buffer[i * unpacket_traits<Packet1Xh>::size]),
|
||||
unpacket_traits<Packet1Xh>::size);
|
||||
}
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE Packet2Xf half2float(const PacketXh& a) {
|
||||
EIGEN_STRONG_INLINE Packet2Xf half2float(const Packet1Xh& a) {
|
||||
return __riscv_vfwcvt_f_f_v_f32m2(a, unpacket_traits<Packet2Xf>::size);
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE PacketXh float2half(const Packet2Xf& a) {
|
||||
return __riscv_vfncvt_f_f_w_f16m1(a, unpacket_traits<PacketXh>::size);
|
||||
EIGEN_STRONG_INLINE Packet1Xh float2half(const Packet2Xf& a) {
|
||||
return __riscv_vfncvt_f_f_w_f16m1(a, unpacket_traits<Packet1Xh>::size);
|
||||
}
|
||||
|
||||
/********************************* Packet2Xh ************************************/
|
||||
@@ -774,8 +774,8 @@ EIGEN_STRONG_INLINE Eigen::half predux<Packet2Xh>(const Packet2Xh& a) {
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Eigen::half predux_mul<Packet2Xh>(const Packet2Xh& a) {
|
||||
return predux_mul<PacketXh>(__riscv_vfmul_vv_f16m1(__riscv_vget_v_f16m2_f16m1(a, 0), __riscv_vget_v_f16m2_f16m1(a, 1),
|
||||
unpacket_traits<PacketXh>::size));
|
||||
return predux_mul<Packet1Xh>(__riscv_vfmul_vv_f16m1(__riscv_vget_v_f16m2_f16m1(a, 0), __riscv_vget_v_f16m2_f16m1(a, 1),
|
||||
unpacket_traits<Packet1Xh>::size));
|
||||
}
|
||||
|
||||
template <>
|
||||
@@ -822,22 +822,22 @@ EIGEN_STRONG_INLINE Packet2Xh float2half(const Packet4Xf& a) {
|
||||
template <typename Packet = Packet2Xh>
|
||||
EIGEN_STRONG_INLINE
|
||||
typename std::enable_if<std::is_same<Packet, Packet2Xh>::value && (unpacket_traits<Packet2Xh>::size % 8) == 0,
|
||||
PacketXh>::type
|
||||
Packet1Xh>::type
|
||||
predux_half(const Packet2Xh& a) {
|
||||
return __riscv_vfadd_vv_f16m1(__riscv_vget_v_f16m2_f16m1(a, 0), __riscv_vget_v_f16m2_f16m1(a, 1),
|
||||
unpacket_traits<PacketXh>::size);
|
||||
unpacket_traits<Packet1Xh>::size);
|
||||
}
|
||||
|
||||
F16_PACKET_FUNCTION(Packet2Xf, PacketXh, pcos)
|
||||
F16_PACKET_FUNCTION(Packet2Xf, PacketXh, pexp)
|
||||
F16_PACKET_FUNCTION(Packet2Xf, PacketXh, pexpm1)
|
||||
F16_PACKET_FUNCTION(Packet2Xf, PacketXh, plog)
|
||||
F16_PACKET_FUNCTION(Packet2Xf, PacketXh, plog1p)
|
||||
F16_PACKET_FUNCTION(Packet2Xf, PacketXh, plog2)
|
||||
F16_PACKET_FUNCTION(Packet2Xf, PacketXh, preciprocal)
|
||||
F16_PACKET_FUNCTION(Packet2Xf, PacketXh, prsqrt)
|
||||
F16_PACKET_FUNCTION(Packet2Xf, PacketXh, psin)
|
||||
F16_PACKET_FUNCTION(Packet2Xf, PacketXh, ptanh)
|
||||
F16_PACKET_FUNCTION(Packet2Xf, Packet1Xh, pcos)
|
||||
F16_PACKET_FUNCTION(Packet2Xf, Packet1Xh, pexp)
|
||||
F16_PACKET_FUNCTION(Packet2Xf, Packet1Xh, pexpm1)
|
||||
F16_PACKET_FUNCTION(Packet2Xf, Packet1Xh, plog)
|
||||
F16_PACKET_FUNCTION(Packet2Xf, Packet1Xh, plog1p)
|
||||
F16_PACKET_FUNCTION(Packet2Xf, Packet1Xh, plog2)
|
||||
F16_PACKET_FUNCTION(Packet2Xf, Packet1Xh, preciprocal)
|
||||
F16_PACKET_FUNCTION(Packet2Xf, Packet1Xh, prsqrt)
|
||||
F16_PACKET_FUNCTION(Packet2Xf, Packet1Xh, psin)
|
||||
F16_PACKET_FUNCTION(Packet2Xf, Packet1Xh, ptanh)
|
||||
|
||||
F16_PACKET_FUNCTION(Packet4Xf, Packet2Xh, pcos)
|
||||
F16_PACKET_FUNCTION(Packet4Xf, Packet2Xh, pexp)
|
||||
@@ -863,22 +863,22 @@ struct type_casting_traits<numext::int16_t, _Float16> {
|
||||
};
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXh pcast<PacketXs, PacketXh>(const PacketXs& a) {
|
||||
return __riscv_vfcvt_f_x_v_f16m1(a, unpacket_traits<PacketXs>::size);
|
||||
EIGEN_STRONG_INLINE Packet1Xh pcast<Packet1Xs, Packet1Xh>(const Packet1Xs& a) {
|
||||
return __riscv_vfcvt_f_x_v_f16m1(a, unpacket_traits<Packet1Xs>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXs pcast<PacketXh, PacketXs>(const PacketXh& a) {
|
||||
return __riscv_vfcvt_rtz_x_f_v_i16m1(a, unpacket_traits<PacketXh>::size);
|
||||
EIGEN_STRONG_INLINE Packet1Xs pcast<Packet1Xh, Packet1Xs>(const Packet1Xh& a) {
|
||||
return __riscv_vfcvt_rtz_x_f_v_i16m1(a, unpacket_traits<Packet1Xh>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXh preinterpret<PacketXh, PacketXs>(const PacketXs& a) {
|
||||
EIGEN_STRONG_INLINE Packet1Xh preinterpret<Packet1Xh, Packet1Xs>(const Packet1Xs& a) {
|
||||
return __riscv_vreinterpret_v_i16m1_f16m1(a);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXs preinterpret<PacketXs, PacketXh>(const PacketXh& a) {
|
||||
EIGEN_STRONG_INLINE Packet1Xs preinterpret<Packet1Xs, Packet1Xh>(const Packet1Xh& a) {
|
||||
return __riscv_vreinterpret_v_f16m1_i16m1(a);
|
||||
}
|
||||
|
||||
@@ -903,29 +903,29 @@ EIGEN_STRONG_INLINE Packet2Xs preinterpret<Packet2Xs, Packet2Xh>(const Packet2Xh
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4Xs pcast<PacketXh, Packet4Xs>(const PacketXh& a, const PacketXh& b, const PacketXh& c,
|
||||
const PacketXh& d) {
|
||||
return __riscv_vcreate_v_i16m1_i16m4(__riscv_vfcvt_rtz_x_f_v_i16m1(a, unpacket_traits<PacketXh>::size),
|
||||
__riscv_vfcvt_rtz_x_f_v_i16m1(b, unpacket_traits<PacketXh>::size),
|
||||
__riscv_vfcvt_rtz_x_f_v_i16m1(c, unpacket_traits<PacketXh>::size),
|
||||
__riscv_vfcvt_rtz_x_f_v_i16m1(d, unpacket_traits<PacketXh>::size));
|
||||
EIGEN_STRONG_INLINE Packet4Xs pcast<Packet1Xh, Packet4Xs>(const Packet1Xh& a, const Packet1Xh& b, const Packet1Xh& c,
|
||||
const Packet1Xh& d) {
|
||||
return __riscv_vcreate_v_i16m1_i16m4(__riscv_vfcvt_rtz_x_f_v_i16m1(a, unpacket_traits<Packet1Xh>::size),
|
||||
__riscv_vfcvt_rtz_x_f_v_i16m1(b, unpacket_traits<Packet1Xh>::size),
|
||||
__riscv_vfcvt_rtz_x_f_v_i16m1(c, unpacket_traits<Packet1Xh>::size),
|
||||
__riscv_vfcvt_rtz_x_f_v_i16m1(d, unpacket_traits<Packet1Xh>::size));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2Xh pcast<PacketXs, Packet2Xh>(const PacketXs& a, const PacketXs& b) {
|
||||
return __riscv_vcreate_v_f16m1_f16m2(__riscv_vfcvt_f_x_v_f16m1(a, unpacket_traits<PacketXs>::size),
|
||||
__riscv_vfcvt_f_x_v_f16m1(b, unpacket_traits<PacketXs>::size));
|
||||
EIGEN_STRONG_INLINE Packet2Xh pcast<Packet1Xs, Packet2Xh>(const Packet1Xs& a, const Packet1Xs& b) {
|
||||
return __riscv_vcreate_v_f16m1_f16m2(__riscv_vfcvt_f_x_v_f16m1(a, unpacket_traits<Packet1Xs>::size),
|
||||
__riscv_vfcvt_f_x_v_f16m1(b, unpacket_traits<Packet1Xs>::size));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2Xh pcast<PacketXh, Packet2Xh>(const PacketXh& a, const PacketXh& b) {
|
||||
EIGEN_STRONG_INLINE Packet2Xh pcast<Packet1Xh, Packet2Xh>(const Packet1Xh& a, const Packet1Xh& b) {
|
||||
return __riscv_vcreate_v_f16m1_f16m2(a, b);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2Xs pcast<PacketXh, Packet2Xs>(const PacketXh& a, const PacketXh& b) {
|
||||
return __riscv_vcreate_v_i16m1_i16m2(__riscv_vfcvt_rtz_x_f_v_i16m1(a, unpacket_traits<PacketXh>::size),
|
||||
__riscv_vfcvt_rtz_x_f_v_i16m1(b, unpacket_traits<PacketXh>::size));
|
||||
EIGEN_STRONG_INLINE Packet2Xs pcast<Packet1Xh, Packet2Xs>(const Packet1Xh& a, const Packet1Xh& b) {
|
||||
return __riscv_vcreate_v_i16m1_i16m2(__riscv_vfcvt_rtz_x_f_v_i16m1(a, unpacket_traits<Packet1Xh>::size),
|
||||
__riscv_vfcvt_rtz_x_f_v_i16m1(b, unpacket_traits<Packet1Xh>::size));
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
|
||||
@@ -467,6 +467,10 @@ extern "C" {
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(__riscv_zvfbfwma)
|
||||
#define EIGEN_VECTORIZE_RVV10BF16
|
||||
#endif
|
||||
|
||||
#endif // defined(EIGEN_ARCH_RISCV)
|
||||
|
||||
#elif (defined __s390x__ && defined __VEC__)
|
||||
|
||||
Reference in New Issue
Block a user