mirror of
https://gitlab.com/libeigen/eigen.git
synced 2026-04-10 11:34:33 +08:00
2397 lines
82 KiB
C++
2397 lines
82 KiB
C++
// This file is part of Eigen, a lightweight C++ template library
|
|
// for linear algebra.
|
|
//
|
|
// Copyright (C) 2024 Kseniya Zaytseva <kseniya.zaytseva@syntacore.com>
|
|
//
|
|
// This Source Code Form is subject to the terms of the Mozilla
|
|
// Public License v. 2.0. If a copy of the MPL was not distributed
|
|
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
|
|
|
#ifndef EIGEN_PACKET_MATH_RVV10_H
|
|
#define EIGEN_PACKET_MATH_RVV10_H
|
|
|
|
// IWYU pragma: private
|
|
#include "../../InternalHeaderCheck.h"
|
|
|
|
namespace Eigen {
|
|
namespace internal {
|
|
#ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
|
|
#define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 8
|
|
#endif
|
|
|
|
#ifndef EIGEN_HAS_SINGLE_INSTRUCTION_MADD
|
|
#define EIGEN_HAS_SINGLE_INSTRUCTION_MADD
|
|
#endif
|
|
|
|
#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 32
|
|
|
|
template <typename Scalar, std::size_t VectorLength, std::size_t VectorLMul>
|
|
struct rvv_packet_size_selector {
|
|
enum { size = VectorLength * VectorLMul / (sizeof(Scalar) * CHAR_BIT) };
|
|
};
|
|
|
|
template <std::size_t VectorLength, std::size_t VectorLMul>
|
|
struct rvv_packet_alignment_selector {
|
|
enum {
|
|
alignment =
|
|
(VectorLength * VectorLMul) >= 1024
|
|
? Aligned128
|
|
: ((VectorLength * VectorLMul) >= 512 ? Aligned64
|
|
: ((VectorLength * VectorLMul) >= 256 ? Aligned32 : Aligned16))
|
|
};
|
|
};
|
|
|
|
typedef vbool64_t PacketMask64;
|
|
typedef vbool32_t PacketMask32;
|
|
typedef vbool16_t PacketMask16;
|
|
typedef vbool8_t PacketMask8;
|
|
typedef vbool4_t PacketMask4;
|
|
|
|
/********************************* int32 **************************************/
|
|
typedef eigen_packet_wrapper<vint32m1_t __attribute__((riscv_rvv_vector_bits(EIGEN_RISCV64_RVV_VL))), 0> Packet1Xi;
|
|
typedef eigen_packet_wrapper<vuint32m1_t __attribute__((riscv_rvv_vector_bits(EIGEN_RISCV64_RVV_VL))), 1> Packet1Xu;
|
|
|
|
typedef eigen_packet_wrapper<vint32m2_t __attribute__((riscv_rvv_vector_bits(EIGEN_RISCV64_RVV_VL * 2))), 2>
|
|
Packet2Xi;
|
|
typedef eigen_packet_wrapper<vuint32m2_t __attribute__((riscv_rvv_vector_bits(EIGEN_RISCV64_RVV_VL * 2))), 3>
|
|
Packet2Xu;
|
|
|
|
typedef eigen_packet_wrapper<vint32m4_t __attribute__((riscv_rvv_vector_bits(EIGEN_RISCV64_RVV_VL * 4))), 4>
|
|
Packet4Xi;
|
|
typedef eigen_packet_wrapper<vuint32m4_t __attribute__((riscv_rvv_vector_bits(EIGEN_RISCV64_RVV_VL * 4))), 5>
|
|
Packet4Xu;
|
|
|
|
#if EIGEN_RISCV64_DEFAULT_LMUL == 1
|
|
typedef Packet1Xi PacketXi;
|
|
typedef Packet1Xu PacketXu;
|
|
|
|
template <>
|
|
struct packet_traits<numext::int32_t> : default_packet_traits {
|
|
typedef Packet1Xi type;
|
|
typedef Packet1Xi half; // Half not implemented yet
|
|
enum {
|
|
Vectorizable = 1,
|
|
AlignedOnScalar = 1,
|
|
size = rvv_packet_size_selector<numext::int32_t, EIGEN_RISCV64_RVV_VL, 1>::size,
|
|
|
|
HasAdd = 1,
|
|
HasSub = 1,
|
|
HasShift = 1,
|
|
HasMul = 1,
|
|
HasNegate = 1,
|
|
HasAbs = 1,
|
|
HasArg = 0,
|
|
HasAbs2 = 1,
|
|
HasMin = 1,
|
|
HasMax = 1,
|
|
HasConj = 1,
|
|
HasSetLinear = 0,
|
|
HasBlend = 0,
|
|
HasReduxp = 0
|
|
};
|
|
};
|
|
|
|
#elif EIGEN_RISCV64_DEFAULT_LMUL == 2
|
|
typedef Packet2Xi PacketXi;
|
|
typedef Packet2Xu PacketXu;
|
|
|
|
template <>
|
|
struct packet_traits<numext::int32_t> : default_packet_traits {
|
|
typedef Packet2Xi type;
|
|
typedef Packet1Xi half;
|
|
enum {
|
|
Vectorizable = 1,
|
|
AlignedOnScalar = 1,
|
|
size = rvv_packet_size_selector<numext::int32_t, EIGEN_RISCV64_RVV_VL, 2>::size,
|
|
|
|
HasAdd = 1,
|
|
HasSub = 1,
|
|
HasShift = 1,
|
|
HasMul = 1,
|
|
HasNegate = 1,
|
|
HasAbs = 1,
|
|
HasArg = 0,
|
|
HasAbs2 = 1,
|
|
HasMin = 1,
|
|
HasMax = 1,
|
|
HasConj = 1,
|
|
HasSetLinear = 0,
|
|
HasBlend = 0,
|
|
HasReduxp = 0
|
|
};
|
|
};
|
|
|
|
#elif EIGEN_RISCV64_DEFAULT_LMUL == 4
|
|
typedef Packet4Xi PacketXi;
|
|
typedef Packet4Xu PacketXu;
|
|
|
|
template <>
|
|
struct packet_traits<numext::int32_t> : default_packet_traits {
|
|
typedef Packet4Xi type;
|
|
typedef Packet2Xi half;
|
|
enum {
|
|
Vectorizable = 1,
|
|
AlignedOnScalar = 1,
|
|
size = rvv_packet_size_selector<numext::int32_t, EIGEN_RISCV64_RVV_VL, 4>::size,
|
|
|
|
HasAdd = 1,
|
|
HasSub = 1,
|
|
HasShift = 1,
|
|
HasMul = 1,
|
|
HasNegate = 1,
|
|
HasAbs = 1,
|
|
HasArg = 0,
|
|
HasAbs2 = 1,
|
|
HasMin = 1,
|
|
HasMax = 1,
|
|
HasConj = 1,
|
|
HasSetLinear = 0,
|
|
HasBlend = 0,
|
|
HasReduxp = 0
|
|
};
|
|
};
|
|
#endif
|
|
|
|
template <>
|
|
struct unpacket_traits<Packet1Xi> {
|
|
typedef numext::int32_t type;
|
|
typedef Packet1Xi half; // Half not yet implemented
|
|
typedef numext::uint8_t mask_t;
|
|
enum {
|
|
size = rvv_packet_size_selector<numext::int32_t, EIGEN_RISCV64_RVV_VL, 1>::size,
|
|
alignment = rvv_packet_alignment_selector<EIGEN_RISCV64_RVV_VL, 1>::alignment,
|
|
vectorizable = true,
|
|
masked_load_available = false,
|
|
masked_store_available = false
|
|
};
|
|
};
|
|
|
|
template <>
|
|
struct unpacket_traits<Packet2Xi> {
|
|
typedef numext::int32_t type;
|
|
typedef Packet1Xi half;
|
|
typedef numext::uint8_t mask_t;
|
|
enum {
|
|
size = rvv_packet_size_selector<numext::int32_t, EIGEN_RISCV64_RVV_VL, 2>::size,
|
|
alignment = rvv_packet_alignment_selector<EIGEN_RISCV64_RVV_VL, 2>::alignment,
|
|
vectorizable = true,
|
|
masked_load_available = false,
|
|
masked_store_available = false
|
|
};
|
|
};
|
|
|
|
template <>
|
|
struct unpacket_traits<Packet4Xi> {
|
|
typedef numext::int32_t type;
|
|
typedef Packet2Xi half;
|
|
typedef numext::uint8_t mask_t;
|
|
enum {
|
|
size = rvv_packet_size_selector<numext::int32_t, EIGEN_RISCV64_RVV_VL, 4>::size,
|
|
alignment = rvv_packet_alignment_selector<EIGEN_RISCV64_RVV_VL, 4>::alignment,
|
|
vectorizable = true,
|
|
masked_load_available = false,
|
|
masked_store_available = false
|
|
};
|
|
};
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE void prefetch<numext::int32_t>(const numext::int32_t* addr) {
|
|
#if EIGEN_HAS_BUILTIN(__builtin_prefetch) || EIGEN_COMP_GNUC
|
|
__builtin_prefetch(addr);
|
|
#endif
|
|
}
|
|
|
|
/********************************* Packet1Xi ************************************/
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xi pset1<Packet1Xi>(const numext::int32_t& from) {
|
|
return __riscv_vmv_v_x_i32m1(from, unpacket_traits<Packet1Xi>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xi plset<Packet1Xi>(const numext::int32_t& a) {
|
|
Packet1Xi idx = __riscv_vreinterpret_v_u32m1_i32m1(__riscv_vid_v_u32m1(unpacket_traits<Packet1Xi>::size));
|
|
return __riscv_vadd_vx_i32m1(idx, a, unpacket_traits<Packet1Xi>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xi pzero<Packet1Xi>(const Packet1Xi& /*a*/) {
|
|
return __riscv_vmv_v_x_i32m1(0, unpacket_traits<Packet1Xi>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xi padd<Packet1Xi>(const Packet1Xi& a, const Packet1Xi& b) {
|
|
return __riscv_vadd_vv_i32m1(a, b, unpacket_traits<Packet1Xi>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xi psub<Packet1Xi>(const Packet1Xi& a, const Packet1Xi& b) {
|
|
return __riscv_vsub(a, b, unpacket_traits<Packet1Xi>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xi pnegate(const Packet1Xi& a) {
|
|
return __riscv_vneg(a, unpacket_traits<Packet1Xi>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xi pconj(const Packet1Xi& a) {
|
|
return a;
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xi pmul<Packet1Xi>(const Packet1Xi& a, const Packet1Xi& b) {
|
|
return __riscv_vmul(a, b, unpacket_traits<Packet1Xi>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xi pdiv<Packet1Xi>(const Packet1Xi& a, const Packet1Xi& b) {
|
|
return __riscv_vdiv(a, b, unpacket_traits<Packet1Xi>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xi pmadd(const Packet1Xi& a, const Packet1Xi& b, const Packet1Xi& c) {
|
|
return __riscv_vmadd(a, b, c, unpacket_traits<Packet1Xi>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xi pmsub(const Packet1Xi& a, const Packet1Xi& b, const Packet1Xi& c) {
|
|
return __riscv_vmadd(a, b, pnegate(c), unpacket_traits<Packet1Xi>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xi pnmadd(const Packet1Xi& a, const Packet1Xi& b, const Packet1Xi& c) {
|
|
return __riscv_vnmsub_vv_i32m1(a, b, c, unpacket_traits<Packet1Xi>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xi pnmsub(const Packet1Xi& a, const Packet1Xi& b, const Packet1Xi& c) {
|
|
return __riscv_vnmsub_vv_i32m1(a, b, pnegate(c), unpacket_traits<Packet1Xi>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xi pmin<Packet1Xi>(const Packet1Xi& a, const Packet1Xi& b) {
|
|
return __riscv_vmin(a, b, unpacket_traits<Packet1Xi>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xi pmax<Packet1Xi>(const Packet1Xi& a, const Packet1Xi& b) {
|
|
return __riscv_vmax(a, b, unpacket_traits<Packet1Xi>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xi pcmp_le<Packet1Xi>(const Packet1Xi& a, const Packet1Xi& b) {
|
|
PacketMask32 mask = __riscv_vmsle_vv_i32m1_b32(a, b, unpacket_traits<Packet1Xi>::size);
|
|
return __riscv_vmerge_vxm_i32m1(pzero(a), 0xffffffff, mask, unpacket_traits<Packet1Xi>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xi pcmp_lt<Packet1Xi>(const Packet1Xi& a, const Packet1Xi& b) {
|
|
PacketMask32 mask = __riscv_vmslt_vv_i32m1_b32(a, b, unpacket_traits<Packet1Xi>::size);
|
|
return __riscv_vmerge_vxm_i32m1(pzero(a), 0xffffffff, mask, unpacket_traits<Packet1Xi>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xi pcmp_eq<Packet1Xi>(const Packet1Xi& a, const Packet1Xi& b) {
|
|
PacketMask32 mask = __riscv_vmseq_vv_i32m1_b32(a, b, unpacket_traits<Packet1Xi>::size);
|
|
return __riscv_vmerge_vxm_i32m1(pzero(a), 0xffffffff, mask, unpacket_traits<Packet1Xi>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xi ptrue<Packet1Xi>(const Packet1Xi& /*a*/) {
|
|
return __riscv_vmv_v_x_i32m1(0xffffffffu, unpacket_traits<Packet1Xi>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xi pand<Packet1Xi>(const Packet1Xi& a, const Packet1Xi& b) {
|
|
return __riscv_vand_vv_i32m1(a, b, unpacket_traits<Packet1Xi>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xi por<Packet1Xi>(const Packet1Xi& a, const Packet1Xi& b) {
|
|
return __riscv_vor_vv_i32m1(a, b, unpacket_traits<Packet1Xi>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xi pxor<Packet1Xi>(const Packet1Xi& a, const Packet1Xi& b) {
|
|
return __riscv_vxor_vv_i32m1(a, b, unpacket_traits<Packet1Xi>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xi pandnot<Packet1Xi>(const Packet1Xi& a, const Packet1Xi& b) {
|
|
return __riscv_vand_vv_i32m1(a, __riscv_vnot_v_i32m1(b, unpacket_traits<Packet1Xi>::size),
|
|
unpacket_traits<Packet1Xi>::size);
|
|
}
|
|
|
|
template <int N>
|
|
EIGEN_STRONG_INLINE Packet1Xi parithmetic_shift_right(Packet1Xi a) {
|
|
return __riscv_vsra_vx_i32m1(a, N, unpacket_traits<Packet1Xi>::size);
|
|
}
|
|
|
|
template <int N>
|
|
EIGEN_STRONG_INLINE Packet1Xi plogical_shift_right(Packet1Xi a) {
|
|
return __riscv_vreinterpret_i32m1(
|
|
__riscv_vsrl_vx_u32m1(__riscv_vreinterpret_u32m1(a), N, unpacket_traits<Packet1Xi>::size));
|
|
}
|
|
|
|
template <int N>
|
|
EIGEN_STRONG_INLINE Packet1Xi plogical_shift_left(Packet1Xi a) {
|
|
return __riscv_vsll_vx_i32m1(a, N, unpacket_traits<Packet1Xi>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xi pload<Packet1Xi>(const numext::int32_t* from) {
|
|
EIGEN_DEBUG_ALIGNED_LOAD return __riscv_vle32_v_i32m1(from, unpacket_traits<Packet1Xi>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xi ploadu<Packet1Xi>(const numext::int32_t* from) {
|
|
EIGEN_DEBUG_UNALIGNED_LOAD return __riscv_vle32_v_i32m1(from, unpacket_traits<Packet1Xi>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xi ploaddup<Packet1Xi>(const numext::int32_t* from) {
|
|
Packet1Xu idx = __riscv_vid_v_u32m1(unpacket_traits<Packet1Xi>::size);
|
|
idx = __riscv_vsll_vx_u32m1(__riscv_vand_vx_u32m1(idx, 0xfffffffeu, unpacket_traits<Packet1Xi>::size), 1,
|
|
unpacket_traits<Packet1Xi>::size);
|
|
// idx = 0 0 sizeof(int32_t) sizeof(int32_t) 2*sizeof(int32_t) 2*sizeof(int32_t) ...
|
|
return __riscv_vloxei32_v_i32m1(from, idx, unpacket_traits<Packet1Xi>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xi ploadquad<Packet1Xi>(const numext::int32_t* from) {
|
|
Packet1Xu idx = __riscv_vid_v_u32m1(unpacket_traits<Packet1Xi>::size);
|
|
idx = __riscv_vand_vx_u32m1(idx, 0xfffffffcu, unpacket_traits<Packet1Xi>::size);
|
|
return __riscv_vloxei32_v_i32m1(from, idx, unpacket_traits<Packet1Xi>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE void pstore<numext::int32_t>(numext::int32_t* to, const Packet1Xi& from) {
|
|
EIGEN_DEBUG_ALIGNED_STORE __riscv_vse32_v_i32m1(to, from, unpacket_traits<Packet1Xi>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE void pstoreu<numext::int32_t>(numext::int32_t* to, const Packet1Xi& from) {
|
|
EIGEN_DEBUG_UNALIGNED_STORE __riscv_vse32_v_i32m1(to, from, unpacket_traits<Packet1Xi>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_DEVICE_FUNC inline Packet1Xi pgather<numext::int32_t, Packet1Xi>(const numext::int32_t* from, Index stride) {
|
|
return __riscv_vlse32_v_i32m1(from, stride * sizeof(numext::int32_t), unpacket_traits<Packet1Xi>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_DEVICE_FUNC inline void pscatter<numext::int32_t, Packet1Xi>(numext::int32_t* to, const Packet1Xi& from,
|
|
Index stride) {
|
|
__riscv_vsse32(to, stride * sizeof(numext::int32_t), from, unpacket_traits<Packet1Xi>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE numext::int32_t pfirst<Packet1Xi>(const Packet1Xi& a) {
|
|
return __riscv_vmv_x_s_i32m1_i32(a);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xi preverse(const Packet1Xi& a) {
|
|
Packet1Xu idx = __riscv_vrsub_vx_u32m1(__riscv_vid_v_u32m1(unpacket_traits<Packet1Xi>::size),
|
|
unpacket_traits<Packet1Xi>::size - 1, unpacket_traits<Packet1Xi>::size);
|
|
return __riscv_vrgather_vv_i32m1(a, idx, unpacket_traits<Packet1Xi>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xi pabs(const Packet1Xi& a) {
|
|
Packet1Xi mask = __riscv_vsra_vx_i32m1(a, 31, unpacket_traits<Packet1Xi>::size);
|
|
return __riscv_vsub_vv_i32m1(__riscv_vxor_vv_i32m1(a, mask, unpacket_traits<Packet1Xi>::size), mask,
|
|
unpacket_traits<Packet1Xi>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE numext::int32_t predux<Packet1Xi>(const Packet1Xi& a) {
|
|
return __riscv_vmv_x(__riscv_vredsum_vs_i32m1_i32m1(a, __riscv_vmv_v_x_i32m1(0, unpacket_traits<Packet1Xi>::size),
|
|
unpacket_traits<Packet1Xi>::size));
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE numext::int32_t predux_mul<Packet1Xi>(const Packet1Xi& a) {
|
|
// Multiply the vector by its reverse
|
|
Packet1Xi prod = __riscv_vmul_vv_i32m1(preverse(a), a, unpacket_traits<Packet1Xi>::size);
|
|
Packet1Xi half_prod;
|
|
|
|
if (EIGEN_RISCV64_RVV_VL >= 1024) {
|
|
half_prod = __riscv_vslidedown_vx_i32m1(prod, 8, unpacket_traits<Packet1Xi>::size);
|
|
prod = __riscv_vmul_vv_i32m1(prod, half_prod, unpacket_traits<Packet1Xi>::size);
|
|
}
|
|
if (EIGEN_RISCV64_RVV_VL >= 512) {
|
|
half_prod = __riscv_vslidedown_vx_i32m1(prod, 4, unpacket_traits<Packet1Xi>::size);
|
|
prod = __riscv_vmul_vv_i32m1(prod, half_prod, unpacket_traits<Packet1Xi>::size);
|
|
}
|
|
if (EIGEN_RISCV64_RVV_VL >= 256) {
|
|
half_prod = __riscv_vslidedown_vx_i32m1(prod, 2, unpacket_traits<Packet1Xi>::size);
|
|
prod = __riscv_vmul_vv_i32m1(prod, half_prod, unpacket_traits<Packet1Xi>::size);
|
|
}
|
|
// Last reduction
|
|
half_prod = __riscv_vslidedown_vx_i32m1(prod, 1, unpacket_traits<Packet1Xi>::size);
|
|
prod = __riscv_vmul_vv_i32m1(prod, half_prod, unpacket_traits<Packet1Xi>::size);
|
|
|
|
// The reduction is done to the first element.
|
|
return pfirst(prod);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE numext::int32_t predux_min<Packet1Xi>(const Packet1Xi& a) {
|
|
return __riscv_vmv_x(__riscv_vredmin_vs_i32m1_i32m1(
|
|
a, __riscv_vmv_v_x_i32m1((std::numeric_limits<numext::int32_t>::max)(), unpacket_traits<Packet1Xi>::size),
|
|
unpacket_traits<Packet1Xi>::size));
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE numext::int32_t predux_max<Packet1Xi>(const Packet1Xi& a) {
|
|
return __riscv_vmv_x(__riscv_vredmax_vs_i32m1_i32m1(
|
|
a, __riscv_vmv_v_x_i32m1((std::numeric_limits<numext::int32_t>::min)(), unpacket_traits<Packet1Xi>::size),
|
|
unpacket_traits<Packet1Xi>::size));
|
|
}
|
|
|
|
template <int N>
|
|
EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet1Xi, N>& kernel) {
|
|
numext::int32_t buffer[unpacket_traits<Packet1Xi>::size * N] = {0};
|
|
int i = 0;
|
|
|
|
for (i = 0; i < N; i++) {
|
|
__riscv_vsse32(&buffer[i], N * sizeof(numext::int32_t), kernel.packet[i], unpacket_traits<Packet1Xi>::size);
|
|
}
|
|
for (i = 0; i < N; i++) {
|
|
kernel.packet[i] =
|
|
__riscv_vle32_v_i32m1(&buffer[i * unpacket_traits<Packet1Xi>::size], unpacket_traits<Packet1Xi>::size);
|
|
}
|
|
}
|
|
|
|
/********************************* float32 ************************************/
|
|
|
|
typedef eigen_packet_wrapper<vfloat32m1_t __attribute__((riscv_rvv_vector_bits(EIGEN_RISCV64_RVV_VL))), 6> Packet1Xf;
|
|
typedef eigen_packet_wrapper<vfloat32m2_t __attribute__((riscv_rvv_vector_bits(EIGEN_RISCV64_RVV_VL * 2))), 7>
|
|
Packet2Xf;
|
|
typedef eigen_packet_wrapper<vfloat32m4_t __attribute__((riscv_rvv_vector_bits(EIGEN_RISCV64_RVV_VL * 4))), 8>
|
|
Packet4Xf;
|
|
|
|
#if EIGEN_RISCV64_DEFAULT_LMUL == 1
|
|
typedef Packet1Xf PacketXf;
|
|
|
|
template <>
|
|
struct packet_traits<float> : default_packet_traits {
|
|
typedef Packet1Xf type;
|
|
typedef Packet1Xf half;
|
|
|
|
enum {
|
|
Vectorizable = 1,
|
|
AlignedOnScalar = 1,
|
|
size = rvv_packet_size_selector<float, EIGEN_RISCV64_RVV_VL, 1>::size,
|
|
|
|
HasAdd = 1,
|
|
HasSub = 1,
|
|
HasShift = 1,
|
|
HasMul = 1,
|
|
HasNegate = 1,
|
|
HasAbs = 1,
|
|
HasArg = 0,
|
|
HasAbs2 = 1,
|
|
HasMin = 1,
|
|
HasMax = 1,
|
|
HasConj = 1,
|
|
HasSetLinear = 0,
|
|
HasBlend = 0,
|
|
HasReduxp = 0,
|
|
|
|
HasCmp = 1,
|
|
HasDiv = 1,
|
|
HasRound = 1,
|
|
|
|
HasSin = EIGEN_FAST_MATH,
|
|
HasCos = EIGEN_FAST_MATH,
|
|
HasTan = EIGEN_FAST_MATH,
|
|
HasLog = 1,
|
|
HasExp = 1,
|
|
HasSqrt = 1,
|
|
HasTanh = EIGEN_FAST_MATH,
|
|
HasErf = EIGEN_FAST_MATH
|
|
};
|
|
};
|
|
|
|
#elif EIGEN_RISCV64_DEFAULT_LMUL == 2
|
|
typedef Packet2Xf PacketXf;
|
|
|
|
template <>
|
|
struct packet_traits<float> : default_packet_traits {
|
|
typedef Packet2Xf type;
|
|
typedef Packet1Xf half;
|
|
|
|
enum {
|
|
Vectorizable = 1,
|
|
AlignedOnScalar = 1,
|
|
size = rvv_packet_size_selector<float, EIGEN_RISCV64_RVV_VL, 2>::size,
|
|
|
|
HasAdd = 1,
|
|
HasSub = 1,
|
|
HasShift = 1,
|
|
HasMul = 1,
|
|
HasNegate = 1,
|
|
HasAbs = 1,
|
|
HasArg = 0,
|
|
HasAbs2 = 1,
|
|
HasMin = 1,
|
|
HasMax = 1,
|
|
HasConj = 1,
|
|
HasSetLinear = 0,
|
|
HasBlend = 0,
|
|
HasReduxp = 0,
|
|
|
|
HasCmp = 1,
|
|
HasDiv = 1,
|
|
HasRound = 1,
|
|
|
|
HasSin = EIGEN_FAST_MATH,
|
|
HasCos = EIGEN_FAST_MATH,
|
|
HasLog = 1,
|
|
HasExp = 1,
|
|
HasSqrt = 1,
|
|
HasTanh = EIGEN_FAST_MATH,
|
|
HasErf = EIGEN_FAST_MATH
|
|
};
|
|
};
|
|
|
|
#elif EIGEN_RISCV64_DEFAULT_LMUL == 4
|
|
typedef Packet4Xf PacketXf;
|
|
|
|
template <>
|
|
struct packet_traits<float> : default_packet_traits {
|
|
typedef Packet4Xf type;
|
|
typedef Packet2Xf half;
|
|
|
|
enum {
|
|
Vectorizable = 1,
|
|
AlignedOnScalar = 1,
|
|
size = rvv_packet_size_selector<float, EIGEN_RISCV64_RVV_VL, 4>::size,
|
|
|
|
HasAdd = 1,
|
|
HasSub = 1,
|
|
HasShift = 1,
|
|
HasMul = 1,
|
|
HasNegate = 1,
|
|
HasAbs = 1,
|
|
HasArg = 0,
|
|
HasAbs2 = 1,
|
|
HasMin = 1,
|
|
HasMax = 1,
|
|
HasConj = 1,
|
|
HasSetLinear = 0,
|
|
HasBlend = 0,
|
|
HasReduxp = 0,
|
|
|
|
HasCmp = 1,
|
|
HasDiv = 1,
|
|
HasRound = 1,
|
|
|
|
HasSin = EIGEN_FAST_MATH,
|
|
HasCos = EIGEN_FAST_MATH,
|
|
HasLog = 1,
|
|
HasExp = 1,
|
|
HasSqrt = 1,
|
|
HasTanh = EIGEN_FAST_MATH,
|
|
HasErf = EIGEN_FAST_MATH
|
|
};
|
|
};
|
|
#endif
|
|
|
|
template <>
|
|
struct unpacket_traits<Packet1Xf> {
|
|
typedef float type;
|
|
typedef Packet1Xf half; // Half not yet implemented
|
|
typedef Packet1Xi integer_packet;
|
|
typedef numext::uint8_t mask_t;
|
|
typedef PacketMask32 packet_mask;
|
|
|
|
enum {
|
|
size = rvv_packet_size_selector<float, EIGEN_RISCV64_RVV_VL, 1>::size,
|
|
alignment = rvv_packet_alignment_selector<EIGEN_RISCV64_RVV_VL, 1>::alignment,
|
|
vectorizable = true,
|
|
masked_load_available = false,
|
|
masked_store_available = false
|
|
};
|
|
};
|
|
|
|
template <>
|
|
struct unpacket_traits<Packet2Xf> {
|
|
typedef float type;
|
|
typedef Packet1Xf half;
|
|
typedef Packet2Xi integer_packet;
|
|
typedef numext::uint8_t mask_t;
|
|
typedef PacketMask16 packet_mask;
|
|
|
|
enum {
|
|
size = rvv_packet_size_selector<float, EIGEN_RISCV64_RVV_VL, 2>::size,
|
|
alignment = rvv_packet_alignment_selector<EIGEN_RISCV64_RVV_VL, 2>::alignment,
|
|
vectorizable = true,
|
|
masked_load_available = false,
|
|
masked_store_available = false
|
|
};
|
|
};
|
|
|
|
template <>
|
|
struct unpacket_traits<Packet4Xf> {
|
|
typedef float type;
|
|
typedef Packet2Xf half;
|
|
typedef Packet4Xi integer_packet;
|
|
typedef numext::uint8_t mask_t;
|
|
typedef PacketMask8 packet_mask;
|
|
|
|
enum {
|
|
size = rvv_packet_size_selector<float, EIGEN_RISCV64_RVV_VL, 4>::size,
|
|
alignment = rvv_packet_alignment_selector<EIGEN_RISCV64_RVV_VL, 4>::alignment,
|
|
vectorizable = true,
|
|
masked_load_available = false,
|
|
masked_store_available = false
|
|
};
|
|
};
|
|
|
|
/********************************* Packet1Xf ************************************/
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xf ptrue<Packet1Xf>(const Packet1Xf& /*a*/) {
|
|
return __riscv_vreinterpret_f32m1(__riscv_vmv_v_x_u32m1(0xffffffffu, unpacket_traits<Packet1Xf>::size));
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xf pzero<Packet1Xf>(const Packet1Xf& /*a*/) {
|
|
return __riscv_vfmv_v_f_f32m1(0.0f, unpacket_traits<Packet1Xf>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xf pabs(const Packet1Xf& a) {
|
|
return __riscv_vfabs_v_f32m1(a, unpacket_traits<Packet1Xf>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xf pset1<Packet1Xf>(const float& from) {
|
|
return __riscv_vfmv_v_f_f32m1(from, unpacket_traits<Packet1Xf>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xf pset1frombits<Packet1Xf>(numext::uint32_t from) {
|
|
return __riscv_vreinterpret_f32m1(__riscv_vmv_v_x_u32m1(from, unpacket_traits<Packet1Xf>::size));
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xf plset<Packet1Xf>(const float& a) {
|
|
Packet1Xf idx = __riscv_vfcvt_f_x_v_f32m1(
|
|
__riscv_vreinterpret_v_u32m1_i32m1(__riscv_vid_v_u32m1(unpacket_traits<Packet1Xi>::size)),
|
|
unpacket_traits<Packet1Xf>::size);
|
|
return __riscv_vfadd_vf_f32m1(idx, a, unpacket_traits<Packet1Xf>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xf padd<Packet1Xf>(const Packet1Xf& a, const Packet1Xf& b) {
|
|
return __riscv_vfadd_vv_f32m1(a, b, unpacket_traits<Packet1Xf>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xf psub<Packet1Xf>(const Packet1Xf& a, const Packet1Xf& b) {
|
|
return __riscv_vfsub_vv_f32m1(a, b, unpacket_traits<Packet1Xf>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xf pnegate(const Packet1Xf& a) {
|
|
return __riscv_vfneg_v_f32m1(a, unpacket_traits<Packet1Xf>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xf pconj(const Packet1Xf& a) {
|
|
return a;
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xf pmul<Packet1Xf>(const Packet1Xf& a, const Packet1Xf& b) {
|
|
return __riscv_vfmul_vv_f32m1(a, b, unpacket_traits<Packet1Xf>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xf pdiv<Packet1Xf>(const Packet1Xf& a, const Packet1Xf& b) {
|
|
return __riscv_vfdiv_vv_f32m1(a, b, unpacket_traits<Packet1Xf>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xf pmadd(const Packet1Xf& a, const Packet1Xf& b, const Packet1Xf& c) {
|
|
return __riscv_vfmadd_vv_f32m1(a, b, c, unpacket_traits<Packet1Xf>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xf pmsub(const Packet1Xf& a, const Packet1Xf& b, const Packet1Xf& c) {
|
|
return __riscv_vfmsub_vv_f32m1(a, b, c, unpacket_traits<Packet1Xf>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xf pnmadd(const Packet1Xf& a, const Packet1Xf& b, const Packet1Xf& c) {
|
|
return __riscv_vfnmsub_vv_f32m1(a, b, c, unpacket_traits<Packet1Xf>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xf pnmsub(const Packet1Xf& a, const Packet1Xf& b, const Packet1Xf& c) {
|
|
return __riscv_vfnmadd_vv_f32m1(a, b, c, unpacket_traits<Packet1Xf>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xf pmin<Packet1Xf>(const Packet1Xf& a, const Packet1Xf& b) {
|
|
Packet1Xf nans = __riscv_vfmv_v_f_f32m1((std::numeric_limits<float>::quiet_NaN)(), unpacket_traits<Packet1Xf>::size);
|
|
PacketMask32 mask = __riscv_vmfeq_vv_f32m1_b32(a, a, unpacket_traits<Packet1Xf>::size);
|
|
PacketMask32 mask2 = __riscv_vmfeq_vv_f32m1_b32(b, b, unpacket_traits<Packet1Xf>::size);
|
|
mask = __riscv_vmand_mm_b32(mask, mask2, unpacket_traits<Packet1Xf>::size);
|
|
|
|
return __riscv_vfmin_vv_f32m1_tumu(mask, nans, a, b, unpacket_traits<Packet1Xf>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xf pmin<PropagateNaN, Packet1Xf>(const Packet1Xf& a, const Packet1Xf& b) {
|
|
return pmin<Packet1Xf>(a, b);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xf pmin<PropagateNumbers, Packet1Xf>(const Packet1Xf& a, const Packet1Xf& b) {
|
|
return __riscv_vfmin_vv_f32m1(a, b, unpacket_traits<Packet1Xf>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xf pmax<Packet1Xf>(const Packet1Xf& a, const Packet1Xf& b) {
|
|
Packet1Xf nans = __riscv_vfmv_v_f_f32m1((std::numeric_limits<float>::quiet_NaN)(), unpacket_traits<Packet1Xf>::size);
|
|
PacketMask32 mask = __riscv_vmfeq_vv_f32m1_b32(a, a, unpacket_traits<Packet1Xf>::size);
|
|
PacketMask32 mask2 = __riscv_vmfeq_vv_f32m1_b32(b, b, unpacket_traits<Packet1Xf>::size);
|
|
mask = __riscv_vmand_mm_b32(mask, mask2, unpacket_traits<Packet1Xf>::size);
|
|
|
|
return __riscv_vfmax_vv_f32m1_tumu(mask, nans, a, b, unpacket_traits<Packet1Xf>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xf pmax<PropagateNaN, Packet1Xf>(const Packet1Xf& a, const Packet1Xf& b) {
|
|
return pmax<Packet1Xf>(a, b);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xf pmax<PropagateNumbers, Packet1Xf>(const Packet1Xf& a, const Packet1Xf& b) {
|
|
return __riscv_vfmax_vv_f32m1(a, b, unpacket_traits<Packet1Xf>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xf pcmp_le<Packet1Xf>(const Packet1Xf& a, const Packet1Xf& b) {
|
|
PacketMask32 mask = __riscv_vmfle_vv_f32m1_b32(a, b, unpacket_traits<Packet1Xf>::size);
|
|
return __riscv_vmerge_vvm_f32m1(pzero<Packet1Xf>(a), ptrue<Packet1Xf>(a), mask, unpacket_traits<Packet1Xf>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xf pcmp_lt<Packet1Xf>(const Packet1Xf& a, const Packet1Xf& b) {
|
|
PacketMask32 mask = __riscv_vmflt_vv_f32m1_b32(a, b, unpacket_traits<Packet1Xf>::size);
|
|
return __riscv_vmerge_vvm_f32m1(pzero<Packet1Xf>(a), ptrue<Packet1Xf>(a), mask, unpacket_traits<Packet1Xf>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xf pcmp_eq<Packet1Xf>(const Packet1Xf& a, const Packet1Xf& b) {
|
|
PacketMask32 mask = __riscv_vmfeq_vv_f32m1_b32(a, b, unpacket_traits<Packet1Xf>::size);
|
|
return __riscv_vmerge_vvm_f32m1(pzero<Packet1Xf>(a), ptrue<Packet1Xf>(a), mask, unpacket_traits<Packet1Xf>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xf pcmp_lt_or_nan<Packet1Xf>(const Packet1Xf& a, const Packet1Xf& b) {
|
|
PacketMask32 mask = __riscv_vmfge_vv_f32m1_b32(a, b, unpacket_traits<Packet1Xf>::size);
|
|
return __riscv_vfmerge_vfm_f32m1(ptrue<Packet1Xf>(a), 0.0f, mask, unpacket_traits<Packet1Xf>::size);
|
|
}
|
|
|
|
// Logical Operations are not supported for float, so reinterpret casts
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xf pand<Packet1Xf>(const Packet1Xf& a, const Packet1Xf& b) {
|
|
return __riscv_vreinterpret_v_u32m1_f32m1(__riscv_vand_vv_u32m1(
|
|
__riscv_vreinterpret_v_f32m1_u32m1(a), __riscv_vreinterpret_v_f32m1_u32m1(b), unpacket_traits<Packet1Xf>::size));
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xf por<Packet1Xf>(const Packet1Xf& a, const Packet1Xf& b) {
|
|
return __riscv_vreinterpret_v_u32m1_f32m1(__riscv_vor_vv_u32m1(
|
|
__riscv_vreinterpret_v_f32m1_u32m1(a), __riscv_vreinterpret_v_f32m1_u32m1(b), unpacket_traits<Packet1Xf>::size));
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xf pxor<Packet1Xf>(const Packet1Xf& a, const Packet1Xf& b) {
|
|
return __riscv_vreinterpret_v_u32m1_f32m1(__riscv_vxor_vv_u32m1(
|
|
__riscv_vreinterpret_v_f32m1_u32m1(a), __riscv_vreinterpret_v_f32m1_u32m1(b), unpacket_traits<Packet1Xf>::size));
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xf pandnot<Packet1Xf>(const Packet1Xf& a, const Packet1Xf& b) {
|
|
return __riscv_vreinterpret_v_u32m1_f32m1(__riscv_vand_vv_u32m1(
|
|
__riscv_vreinterpret_v_f32m1_u32m1(a),
|
|
__riscv_vnot_v_u32m1(__riscv_vreinterpret_v_f32m1_u32m1(b), unpacket_traits<Packet1Xf>::size),
|
|
unpacket_traits<Packet1Xf>::size));
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xf pload<Packet1Xf>(const float* from) {
|
|
EIGEN_DEBUG_ALIGNED_LOAD return __riscv_vle32_v_f32m1(from, unpacket_traits<Packet1Xf>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xf ploadu<Packet1Xf>(const float* from) {
|
|
EIGEN_DEBUG_UNALIGNED_LOAD return __riscv_vle32_v_f32m1(from, unpacket_traits<Packet1Xf>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xf ploaddup<Packet1Xf>(const float* from) {
|
|
Packet1Xu idx = __riscv_vid_v_u32m1(unpacket_traits<Packet1Xf>::size);
|
|
idx = __riscv_vsll_vx_u32m1(__riscv_vand_vx_u32m1(idx, 0xfffffffeu, unpacket_traits<Packet1Xf>::size), 1,
|
|
unpacket_traits<Packet1Xf>::size);
|
|
return __riscv_vloxei32_v_f32m1(from, idx, unpacket_traits<Packet1Xf>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xf ploadquad<Packet1Xf>(const float* from) {
|
|
Packet1Xu idx = __riscv_vid_v_u32m1(unpacket_traits<Packet1Xf>::size);
|
|
idx = __riscv_vand_vx_u32m1(idx, 0xfffffffcu, unpacket_traits<Packet1Xf>::size);
|
|
return __riscv_vloxei32_v_f32m1(from, idx, unpacket_traits<Packet1Xf>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE void pstore<float>(float* to, const Packet1Xf& from) {
|
|
EIGEN_DEBUG_ALIGNED_STORE __riscv_vse32_v_f32m1(to, from, unpacket_traits<Packet1Xf>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet1Xf& from) {
|
|
EIGEN_DEBUG_UNALIGNED_STORE __riscv_vse32_v_f32m1(to, from, unpacket_traits<Packet1Xf>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_DEVICE_FUNC inline Packet1Xf pgather<float, Packet1Xf>(const float* from, Index stride) {
|
|
return __riscv_vlse32_v_f32m1(from, stride * sizeof(float), unpacket_traits<Packet1Xf>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_DEVICE_FUNC inline void pscatter<float, Packet1Xf>(float* to, const Packet1Xf& from, Index stride) {
|
|
__riscv_vsse32(to, stride * sizeof(float), from, unpacket_traits<Packet1Xf>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE float pfirst<Packet1Xf>(const Packet1Xf& a) {
|
|
return __riscv_vfmv_f_s_f32m1_f32(a);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xf psqrt(const Packet1Xf& a) {
|
|
return __riscv_vfsqrt_v_f32m1(a, unpacket_traits<Packet1Xf>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xf print<Packet1Xf>(const Packet1Xf& a) {
|
|
const Packet1Xf limit = pset1<Packet1Xf>(static_cast<float>(1 << 23));
|
|
const Packet1Xf abs_a = pabs(a);
|
|
|
|
PacketMask32 mask = __riscv_vmfne_vv_f32m1_b32(a, a, unpacket_traits<Packet1Xf>::size);
|
|
const Packet1Xf x = __riscv_vfadd_vv_f32m1_tumu(mask, a, a, a, unpacket_traits<Packet1Xf>::size);
|
|
const Packet1Xf new_x = __riscv_vfcvt_f_x_v_f32m1(__riscv_vfcvt_x_f_v_i32m1(a, unpacket_traits<Packet1Xf>::size),
|
|
unpacket_traits<Packet1Xf>::size);
|
|
|
|
mask = __riscv_vmflt_vv_f32m1_b32(abs_a, limit, unpacket_traits<Packet1Xf>::size);
|
|
Packet1Xf signed_x = __riscv_vfsgnj_vv_f32m1(new_x, x, unpacket_traits<Packet1Xf>::size);
|
|
return __riscv_vmerge_vvm_f32m1(x, signed_x, mask, unpacket_traits<Packet1Xf>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xf pfloor<Packet1Xf>(const Packet1Xf& a) {
|
|
Packet1Xf tmp = print<Packet1Xf>(a);
|
|
// If greater, subtract one.
|
|
PacketMask32 mask = __riscv_vmflt_vv_f32m1_b32(a, tmp, unpacket_traits<Packet1Xf>::size);
|
|
return __riscv_vfsub_vf_f32m1_tumu(mask, tmp, tmp, 1.0f, unpacket_traits<Packet1Xf>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xf preverse(const Packet1Xf& a) {
|
|
Packet1Xu idx = __riscv_vrsub_vx_u32m1(__riscv_vid_v_u32m1(unpacket_traits<Packet1Xf>::size),
|
|
unpacket_traits<Packet1Xf>::size - 1, unpacket_traits<Packet1Xf>::size);
|
|
return __riscv_vrgather_vv_f32m1(a, idx, unpacket_traits<Packet1Xf>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xf pfrexp<Packet1Xf>(const Packet1Xf& a, Packet1Xf& exponent) {
|
|
return pfrexp_generic(a, exponent);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE float predux<Packet1Xf>(const Packet1Xf& a) {
|
|
return __riscv_vfmv_f(__riscv_vfredusum_vs_f32m1_f32m1(
|
|
a, __riscv_vfmv_v_f_f32m1(0.0, unpacket_traits<Packet1Xf>::size), unpacket_traits<Packet1Xf>::size));
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE float predux_mul<Packet1Xf>(const Packet1Xf& a) {
|
|
// Multiply the vector by its reverse
|
|
Packet1Xf prod = __riscv_vfmul_vv_f32m1(preverse(a), a, unpacket_traits<Packet1Xf>::size);
|
|
Packet1Xf half_prod;
|
|
|
|
if (EIGEN_RISCV64_RVV_VL >= 1024) {
|
|
half_prod = __riscv_vslidedown_vx_f32m1(prod, 8, unpacket_traits<Packet1Xf>::size);
|
|
prod = __riscv_vfmul_vv_f32m1(prod, half_prod, unpacket_traits<Packet1Xf>::size);
|
|
}
|
|
if (EIGEN_RISCV64_RVV_VL >= 512) {
|
|
half_prod = __riscv_vslidedown_vx_f32m1(prod, 4, unpacket_traits<Packet1Xf>::size);
|
|
prod = __riscv_vfmul_vv_f32m1(prod, half_prod, unpacket_traits<Packet1Xf>::size);
|
|
}
|
|
if (EIGEN_RISCV64_RVV_VL >= 256) {
|
|
half_prod = __riscv_vslidedown_vx_f32m1(prod, 2, unpacket_traits<Packet1Xf>::size);
|
|
prod = __riscv_vfmul_vv_f32m1(prod, half_prod, unpacket_traits<Packet1Xf>::size);
|
|
}
|
|
// Last reduction
|
|
half_prod = __riscv_vslidedown_vx_f32m1(prod, 1, unpacket_traits<Packet1Xf>::size);
|
|
prod = __riscv_vfmul_vv_f32m1(prod, half_prod, unpacket_traits<Packet1Xf>::size);
|
|
|
|
// The reduction is done to the first element.
|
|
return pfirst(prod);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE float predux_min<Packet1Xf>(const Packet1Xf& a) {
|
|
return (
|
|
std::min)(__riscv_vfmv_f(__riscv_vfredmin_vs_f32m1_f32m1(
|
|
a,
|
|
__riscv_vfmv_v_f_f32m1((std::numeric_limits<float>::quiet_NaN)(), unpacket_traits<Packet1Xf>::size),
|
|
unpacket_traits<Packet1Xf>::size)),
|
|
(std::numeric_limits<float>::max)());
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE float predux_max<Packet1Xf>(const Packet1Xf& a) {
|
|
return (
|
|
std::max)(__riscv_vfmv_f(__riscv_vfredmax_vs_f32m1_f32m1(
|
|
a,
|
|
__riscv_vfmv_v_f_f32m1((std::numeric_limits<float>::quiet_NaN)(), unpacket_traits<Packet1Xf>::size),
|
|
unpacket_traits<Packet1Xf>::size)),
|
|
-(std::numeric_limits<float>::max)());
|
|
}
|
|
|
|
template <int N>
|
|
EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet1Xf, N>& kernel) {
|
|
float buffer[unpacket_traits<Packet1Xf>::size * N];
|
|
int i = 0;
|
|
|
|
for (i = 0; i < N; i++) {
|
|
__riscv_vsse32(&buffer[i], N * sizeof(float), kernel.packet[i], unpacket_traits<Packet1Xf>::size);
|
|
}
|
|
|
|
for (i = 0; i < N; i++) {
|
|
kernel.packet[i] =
|
|
__riscv_vle32_v_f32m1(&buffer[i * unpacket_traits<Packet1Xf>::size], unpacket_traits<Packet1Xf>::size);
|
|
}
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xf pldexp<Packet1Xf>(const Packet1Xf& a, const Packet1Xf& exponent) {
|
|
return pldexp_generic(a, exponent);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE PacketMask32 por(const PacketMask32& a, const PacketMask32& b) {
|
|
return __riscv_vmor_mm_b32(a, b, unpacket_traits<Packet1Xf>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE PacketMask32 pand(const PacketMask32& a, const PacketMask32& b) {
|
|
return __riscv_vmand_mm_b32(a, b, unpacket_traits<Packet1Xf>::size);
|
|
}
|
|
|
|
EIGEN_STRONG_INLINE PacketMask32 pcmp_eq_mask(const Packet1Xf& a, const Packet1Xf& b) {
|
|
return __riscv_vmfeq_vv_f32m1_b32(a, b, unpacket_traits<Packet1Xf>::size);
|
|
}
|
|
|
|
EIGEN_STRONG_INLINE PacketMask32 pcmp_lt_mask(const Packet1Xf& a, const Packet1Xf& b) {
|
|
return __riscv_vmflt_vv_f32m1_b32(a, b, unpacket_traits<Packet1Xf>::size);
|
|
}
|
|
|
|
EIGEN_STRONG_INLINE Packet1Xf pselect(const PacketMask32& mask, const Packet1Xf& a, const Packet1Xf& b) {
|
|
return __riscv_vmerge_vvm_f32m1(b, a, mask, unpacket_traits<Packet1Xf>::size);
|
|
}
|
|
|
|
/********************************* int64 **************************************/
|
|
|
|
typedef eigen_packet_wrapper<vint64m1_t __attribute__((riscv_rvv_vector_bits(EIGEN_RISCV64_RVV_VL))), 9> Packet1Xl;
|
|
typedef eigen_packet_wrapper<vuint64m1_t __attribute__((riscv_rvv_vector_bits(EIGEN_RISCV64_RVV_VL))), 10> Packet1Xul;
|
|
|
|
typedef eigen_packet_wrapper<vint64m2_t __attribute__((riscv_rvv_vector_bits(EIGEN_RISCV64_RVV_VL * 2))), 11>
|
|
Packet2Xl;
|
|
typedef eigen_packet_wrapper<vuint64m2_t __attribute__((riscv_rvv_vector_bits(EIGEN_RISCV64_RVV_VL * 2))), 12>
|
|
Packet2Xul;
|
|
|
|
typedef eigen_packet_wrapper<vint64m4_t __attribute__((riscv_rvv_vector_bits(EIGEN_RISCV64_RVV_VL * 4))), 13>
|
|
Packet4Xl;
|
|
typedef eigen_packet_wrapper<vuint64m4_t __attribute__((riscv_rvv_vector_bits(EIGEN_RISCV64_RVV_VL * 4))), 14>
|
|
Packet4Xul;
|
|
|
|
#if EIGEN_RISCV64_DEFAULT_LMUL == 1
|
|
typedef Packet1Xl PacketXl;
|
|
typedef Packet1Xul PacketXul;
|
|
|
|
template <>
|
|
struct packet_traits<numext::int64_t> : default_packet_traits {
|
|
typedef Packet1Xl type;
|
|
typedef Packet1Xl half; // Half not implemented yet
|
|
enum {
|
|
Vectorizable = 1,
|
|
AlignedOnScalar = 1,
|
|
size = rvv_packet_size_selector<numext::int64_t, EIGEN_RISCV64_RVV_VL, 1>::size,
|
|
|
|
HasAdd = 1,
|
|
HasSub = 1,
|
|
HasShift = 1,
|
|
HasMul = 1,
|
|
HasNegate = 1,
|
|
HasAbs = 1,
|
|
HasArg = 0,
|
|
HasAbs2 = 1,
|
|
HasMin = 1,
|
|
HasMax = 1,
|
|
HasConj = 1,
|
|
HasSetLinear = 0,
|
|
HasBlend = 0,
|
|
HasReduxp = 0
|
|
};
|
|
};
|
|
|
|
#elif EIGEN_RISCV64_DEFAULT_LMUL == 2
|
|
typedef Packet2Xl PacketXl;
|
|
typedef Packet2Xul PacketXul;
|
|
|
|
template <>
|
|
struct packet_traits<numext::int64_t> : default_packet_traits {
|
|
typedef Packet2Xl type;
|
|
typedef Packet1Xl half;
|
|
enum {
|
|
Vectorizable = 1,
|
|
AlignedOnScalar = 1,
|
|
size = rvv_packet_size_selector<numext::int64_t, EIGEN_RISCV64_RVV_VL, 2>::size,
|
|
|
|
HasAdd = 1,
|
|
HasSub = 1,
|
|
HasShift = 1,
|
|
HasMul = 1,
|
|
HasNegate = 1,
|
|
HasAbs = 1,
|
|
HasArg = 0,
|
|
HasAbs2 = 1,
|
|
HasMin = 1,
|
|
HasMax = 1,
|
|
HasConj = 1,
|
|
HasSetLinear = 0,
|
|
HasBlend = 0,
|
|
HasReduxp = 0
|
|
};
|
|
};
|
|
|
|
#elif EIGEN_RISCV64_DEFAULT_LMUL == 4
|
|
typedef Packet4Xl PacketXl;
|
|
typedef Packet4Xul PacketXul;
|
|
|
|
template <>
|
|
struct packet_traits<numext::int64_t> : default_packet_traits {
|
|
typedef Packet4Xl type;
|
|
typedef Packet2Xl half;
|
|
enum {
|
|
Vectorizable = 1,
|
|
AlignedOnScalar = 1,
|
|
size = rvv_packet_size_selector<numext::int64_t, EIGEN_RISCV64_RVV_VL, 4>::size,
|
|
|
|
HasAdd = 1,
|
|
HasSub = 1,
|
|
HasShift = 1,
|
|
HasMul = 1,
|
|
HasNegate = 1,
|
|
HasAbs = 1,
|
|
HasArg = 0,
|
|
HasAbs2 = 1,
|
|
HasMin = 1,
|
|
HasMax = 1,
|
|
HasConj = 1,
|
|
HasSetLinear = 0,
|
|
HasBlend = 0,
|
|
HasReduxp = 0
|
|
};
|
|
};
|
|
#endif
|
|
|
|
template <>
|
|
struct unpacket_traits<Packet1Xl> {
|
|
typedef numext::int64_t type;
|
|
typedef Packet1Xl half; // Half not yet implemented
|
|
typedef numext::uint8_t mask_t;
|
|
enum {
|
|
size = rvv_packet_size_selector<numext::int64_t, EIGEN_RISCV64_RVV_VL, 1>::size,
|
|
alignment = rvv_packet_alignment_selector<EIGEN_RISCV64_RVV_VL, 1>::alignment,
|
|
vectorizable = true,
|
|
masked_load_available = false,
|
|
masked_store_available = false
|
|
};
|
|
};
|
|
|
|
template <>
|
|
struct unpacket_traits<Packet2Xl> {
|
|
typedef numext::int64_t type;
|
|
typedef Packet1Xl half;
|
|
typedef numext::uint8_t mask_t;
|
|
enum {
|
|
size = rvv_packet_size_selector<numext::int64_t, EIGEN_RISCV64_RVV_VL, 2>::size,
|
|
alignment = rvv_packet_alignment_selector<EIGEN_RISCV64_RVV_VL, 2>::alignment,
|
|
vectorizable = true,
|
|
masked_load_available = false,
|
|
masked_store_available = false
|
|
};
|
|
};
|
|
|
|
template <>
|
|
struct unpacket_traits<Packet4Xl> {
|
|
typedef numext::int64_t type;
|
|
typedef Packet2Xl half;
|
|
typedef numext::uint8_t mask_t;
|
|
enum {
|
|
size = rvv_packet_size_selector<numext::int64_t, EIGEN_RISCV64_RVV_VL, 4>::size,
|
|
alignment = rvv_packet_alignment_selector<EIGEN_RISCV64_RVV_VL, 4>::alignment,
|
|
vectorizable = true,
|
|
masked_load_available = false,
|
|
masked_store_available = false
|
|
};
|
|
};
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE void prefetch<numext::int64_t>(const numext::int64_t* addr) {
|
|
#if EIGEN_HAS_BUILTIN(__builtin_prefetch) || EIGEN_COMP_GNUC
|
|
__builtin_prefetch(addr);
|
|
#endif
|
|
}
|
|
|
|
/********************************* Packet1Xl ************************************/
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xl pset1<Packet1Xl>(const numext::int64_t& from) {
|
|
return __riscv_vmv_v_x_i64m1(from, unpacket_traits<Packet1Xl>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xl plset<Packet1Xl>(const numext::int64_t& a) {
|
|
Packet1Xl idx = __riscv_vreinterpret_v_u64m1_i64m1(__riscv_vid_v_u64m1(unpacket_traits<Packet1Xl>::size));
|
|
return __riscv_vadd_vx_i64m1(idx, a, unpacket_traits<Packet1Xl>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xl pzero<Packet1Xl>(const Packet1Xl& /*a*/) {
|
|
return __riscv_vmv_v_x_i64m1(0, unpacket_traits<Packet1Xl>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xl padd<Packet1Xl>(const Packet1Xl& a, const Packet1Xl& b) {
|
|
return __riscv_vadd_vv_i64m1(a, b, unpacket_traits<Packet1Xl>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xl psub<Packet1Xl>(const Packet1Xl& a, const Packet1Xl& b) {
|
|
return __riscv_vsub(a, b, unpacket_traits<Packet1Xl>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xl pnegate(const Packet1Xl& a) {
|
|
return __riscv_vneg(a, unpacket_traits<Packet1Xl>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xl pconj(const Packet1Xl& a) {
|
|
return a;
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xl pmul<Packet1Xl>(const Packet1Xl& a, const Packet1Xl& b) {
|
|
return __riscv_vmul(a, b, unpacket_traits<Packet1Xl>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xl pdiv<Packet1Xl>(const Packet1Xl& a, const Packet1Xl& b) {
|
|
return __riscv_vdiv(a, b, unpacket_traits<Packet1Xl>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xl pmadd(const Packet1Xl& a, const Packet1Xl& b, const Packet1Xl& c) {
|
|
return __riscv_vmadd(a, b, c, unpacket_traits<Packet1Xl>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xl pmsub(const Packet1Xl& a, const Packet1Xl& b, const Packet1Xl& c) {
|
|
return __riscv_vmadd(a, b, pnegate(c), unpacket_traits<Packet1Xl>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xl pnmadd(const Packet1Xl& a, const Packet1Xl& b, const Packet1Xl& c) {
|
|
return __riscv_vnmsub_vv_i64m1(a, b, c, unpacket_traits<Packet1Xl>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xl pnmsub(const Packet1Xl& a, const Packet1Xl& b, const Packet1Xl& c) {
|
|
return __riscv_vnmsub_vv_i64m1(a, b, pnegate(c), unpacket_traits<Packet1Xl>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xl pmin<Packet1Xl>(const Packet1Xl& a, const Packet1Xl& b) {
|
|
return __riscv_vmin(a, b, unpacket_traits<Packet1Xl>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xl pmax<Packet1Xl>(const Packet1Xl& a, const Packet1Xl& b) {
|
|
return __riscv_vmax(a, b, unpacket_traits<Packet1Xl>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xl pcmp_le<Packet1Xl>(const Packet1Xl& a, const Packet1Xl& b) {
|
|
PacketMask64 mask = __riscv_vmsle_vv_i64m1_b64(a, b, unpacket_traits<Packet1Xl>::size);
|
|
return __riscv_vmerge_vxm_i64m1(pzero(a), 0xffffffffffffffff, mask, unpacket_traits<Packet1Xl>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xl pcmp_lt<Packet1Xl>(const Packet1Xl& a, const Packet1Xl& b) {
|
|
PacketMask64 mask = __riscv_vmslt_vv_i64m1_b64(a, b, unpacket_traits<Packet1Xl>::size);
|
|
return __riscv_vmerge_vxm_i64m1(pzero(a), 0xffffffffffffffff, mask, unpacket_traits<Packet1Xl>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xl pcmp_eq<Packet1Xl>(const Packet1Xl& a, const Packet1Xl& b) {
|
|
PacketMask64 mask = __riscv_vmseq_vv_i64m1_b64(a, b, unpacket_traits<Packet1Xl>::size);
|
|
return __riscv_vmerge_vxm_i64m1(pzero(a), 0xffffffffffffffff, mask, unpacket_traits<Packet1Xl>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xl ptrue<Packet1Xl>(const Packet1Xl& /*a*/) {
|
|
return __riscv_vmv_v_x_i64m1(0xffffffffffffffffu, unpacket_traits<Packet1Xl>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xl pand<Packet1Xl>(const Packet1Xl& a, const Packet1Xl& b) {
|
|
return __riscv_vand_vv_i64m1(a, b, unpacket_traits<Packet1Xl>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xl por<Packet1Xl>(const Packet1Xl& a, const Packet1Xl& b) {
|
|
return __riscv_vor_vv_i64m1(a, b, unpacket_traits<Packet1Xl>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xl pxor<Packet1Xl>(const Packet1Xl& a, const Packet1Xl& b) {
|
|
return __riscv_vxor_vv_i64m1(a, b, unpacket_traits<Packet1Xl>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xl pandnot<Packet1Xl>(const Packet1Xl& a, const Packet1Xl& b) {
|
|
return __riscv_vand_vv_i64m1(a, __riscv_vnot_v_i64m1(b, unpacket_traits<Packet1Xl>::size),
|
|
unpacket_traits<Packet1Xl>::size);
|
|
}
|
|
|
|
template <int N>
|
|
EIGEN_STRONG_INLINE Packet1Xl parithmetic_shift_right(Packet1Xl a) {
|
|
return __riscv_vsra_vx_i64m1(a, N, unpacket_traits<Packet1Xl>::size);
|
|
}
|
|
|
|
template <int N>
|
|
EIGEN_STRONG_INLINE Packet1Xl plogical_shift_right(Packet1Xl a) {
|
|
return __riscv_vreinterpret_i64m1(
|
|
__riscv_vsrl_vx_u64m1(__riscv_vreinterpret_u64m1(a), N, unpacket_traits<Packet1Xl>::size));
|
|
}
|
|
|
|
template <int N>
|
|
EIGEN_STRONG_INLINE Packet1Xl plogical_shift_left(Packet1Xl a) {
|
|
return __riscv_vsll_vx_i64m1(a, N, unpacket_traits<Packet1Xl>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xl pload<Packet1Xl>(const numext::int64_t* from) {
|
|
EIGEN_DEBUG_ALIGNED_LOAD return __riscv_vle64_v_i64m1(from, unpacket_traits<Packet1Xl>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xl ploadu<Packet1Xl>(const numext::int64_t* from) {
|
|
EIGEN_DEBUG_UNALIGNED_LOAD return __riscv_vle64_v_i64m1(from, unpacket_traits<Packet1Xl>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xl ploaddup<Packet1Xl>(const numext::int64_t* from) {
|
|
Packet1Xul idx = __riscv_vid_v_u64m1(unpacket_traits<Packet1Xl>::size);
|
|
idx = __riscv_vsll_vx_u64m1(__riscv_vand_vx_u64m1(idx, 0xfffffffffffffffeu, unpacket_traits<Packet1Xl>::size), 2,
|
|
unpacket_traits<Packet1Xl>::size);
|
|
// idx = 0 0 sizeof(int64_t) sizeof(int64_t) 2*sizeof(int64_t) 2*sizeof(int64_t) ...
|
|
return __riscv_vloxei64_v_i64m1(from, idx, unpacket_traits<Packet1Xl>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xl ploadquad<Packet1Xl>(const numext::int64_t* from) {
|
|
Packet1Xul idx = __riscv_vid_v_u64m1(unpacket_traits<Packet1Xl>::size);
|
|
idx = __riscv_vsll_vx_u64m1(__riscv_vand_vx_u64m1(idx, 0xfffffffffffffffcu, unpacket_traits<Packet1Xl>::size), 1,
|
|
unpacket_traits<Packet1Xl>::size);
|
|
;
|
|
return __riscv_vloxei64_v_i64m1(from, idx, unpacket_traits<Packet1Xl>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE void pstore<numext::int64_t>(numext::int64_t* to, const Packet1Xl& from) {
|
|
EIGEN_DEBUG_ALIGNED_STORE __riscv_vse64_v_i64m1(to, from, unpacket_traits<Packet1Xl>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE void pstoreu<numext::int64_t>(numext::int64_t* to, const Packet1Xl& from) {
|
|
EIGEN_DEBUG_UNALIGNED_STORE __riscv_vse64_v_i64m1(to, from, unpacket_traits<Packet1Xl>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_DEVICE_FUNC inline Packet1Xl pgather<numext::int64_t, Packet1Xl>(const numext::int64_t* from, Index stride) {
|
|
return __riscv_vlse64_v_i64m1(from, stride * sizeof(numext::int64_t), unpacket_traits<Packet1Xl>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_DEVICE_FUNC inline void pscatter<numext::int64_t, Packet1Xl>(numext::int64_t* to, const Packet1Xl& from,
|
|
Index stride) {
|
|
__riscv_vsse64(to, stride * sizeof(numext::int64_t), from, unpacket_traits<Packet1Xl>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE numext::int64_t pfirst<Packet1Xl>(const Packet1Xl& a) {
|
|
return __riscv_vmv_x_s_i64m1_i64(a);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xl preverse(const Packet1Xl& a) {
|
|
Packet1Xul idx = __riscv_vrsub_vx_u64m1(__riscv_vid_v_u64m1(unpacket_traits<Packet1Xl>::size),
|
|
unpacket_traits<Packet1Xl>::size - 1, unpacket_traits<Packet1Xl>::size);
|
|
return __riscv_vrgather_vv_i64m1(a, idx, unpacket_traits<Packet1Xl>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xl pabs(const Packet1Xl& a) {
|
|
Packet1Xl mask = __riscv_vsra_vx_i64m1(a, 63, unpacket_traits<Packet1Xl>::size);
|
|
return __riscv_vsub_vv_i64m1(__riscv_vxor_vv_i64m1(a, mask, unpacket_traits<Packet1Xl>::size), mask,
|
|
unpacket_traits<Packet1Xl>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE numext::int64_t predux<Packet1Xl>(const Packet1Xl& a) {
|
|
return __riscv_vmv_x(__riscv_vredsum_vs_i64m1_i64m1(a, __riscv_vmv_v_x_i64m1(0, unpacket_traits<Packet1Xl>::size),
|
|
unpacket_traits<Packet1Xl>::size));
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE numext::int64_t predux_mul<Packet1Xl>(const Packet1Xl& a) {
|
|
// Multiply the vector by its reverse
|
|
Packet1Xl prod = __riscv_vmul_vv_i64m1(preverse(a), a, unpacket_traits<Packet1Xl>::size);
|
|
Packet1Xl half_prod;
|
|
|
|
if (EIGEN_RISCV64_RVV_VL >= 1024) {
|
|
half_prod = __riscv_vslidedown_vx_i64m1(prod, 4, unpacket_traits<Packet1Xl>::size);
|
|
prod = __riscv_vmul_vv_i64m1(prod, half_prod, unpacket_traits<Packet1Xl>::size);
|
|
}
|
|
if (EIGEN_RISCV64_RVV_VL >= 512) {
|
|
half_prod = __riscv_vslidedown_vx_i64m1(prod, 2, unpacket_traits<Packet1Xl>::size);
|
|
prod = __riscv_vmul_vv_i64m1(prod, half_prod, unpacket_traits<Packet1Xl>::size);
|
|
}
|
|
if (EIGEN_RISCV64_RVV_VL >= 256) {
|
|
half_prod = __riscv_vslidedown_vx_i64m1(prod, 1, unpacket_traits<Packet1Xl>::size);
|
|
prod = __riscv_vmul_vv_i64m1(prod, half_prod, unpacket_traits<Packet1Xl>::size);
|
|
}
|
|
|
|
// The reduction is done to the first element.
|
|
return pfirst(prod);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE numext::int64_t predux_min<Packet1Xl>(const Packet1Xl& a) {
|
|
return __riscv_vmv_x(__riscv_vredmin_vs_i64m1_i64m1(
|
|
a, __riscv_vmv_v_x_i64m1((std::numeric_limits<numext::int64_t>::max)(), unpacket_traits<Packet1Xl>::size),
|
|
unpacket_traits<Packet1Xl>::size));
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE numext::int64_t predux_max<Packet1Xl>(const Packet1Xl& a) {
|
|
return __riscv_vmv_x(__riscv_vredmax_vs_i64m1_i64m1(
|
|
a, __riscv_vmv_v_x_i64m1((std::numeric_limits<numext::int64_t>::min)(), unpacket_traits<Packet1Xl>::size),
|
|
unpacket_traits<Packet1Xl>::size));
|
|
}
|
|
|
|
template <int N>
|
|
EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet1Xl, N>& kernel) {
|
|
numext::int64_t buffer[unpacket_traits<Packet1Xl>::size * N] = {0};
|
|
int i = 0;
|
|
|
|
for (i = 0; i < N; i++) {
|
|
__riscv_vsse64(&buffer[i], N * sizeof(numext::int64_t), kernel.packet[i], unpacket_traits<Packet1Xl>::size);
|
|
}
|
|
for (i = 0; i < N; i++) {
|
|
kernel.packet[i] =
|
|
__riscv_vle64_v_i64m1(&buffer[i * unpacket_traits<Packet1Xl>::size], unpacket_traits<Packet1Xl>::size);
|
|
}
|
|
}
|
|
|
|
/********************************* double ************************************/
|
|
|
|
typedef eigen_packet_wrapper<vfloat64m1_t __attribute__((riscv_rvv_vector_bits(EIGEN_RISCV64_RVV_VL))), 15> Packet1Xd;
|
|
typedef eigen_packet_wrapper<vfloat64m2_t __attribute__((riscv_rvv_vector_bits(EIGEN_RISCV64_RVV_VL * 2))), 16>
|
|
Packet2Xd;
|
|
typedef eigen_packet_wrapper<vfloat64m4_t __attribute__((riscv_rvv_vector_bits(EIGEN_RISCV64_RVV_VL * 4))), 17>
|
|
Packet4Xd;
|
|
|
|
#if EIGEN_RISCV64_DEFAULT_LMUL == 1
|
|
typedef Packet1Xd PacketXd;
|
|
|
|
template <>
|
|
struct packet_traits<double> : default_packet_traits {
|
|
typedef Packet1Xd type;
|
|
typedef Packet1Xd half;
|
|
|
|
enum {
|
|
Vectorizable = 1,
|
|
AlignedOnScalar = 1,
|
|
size = rvv_packet_size_selector<double, EIGEN_RISCV64_RVV_VL, 1>::size,
|
|
|
|
HasAdd = 1,
|
|
HasSub = 1,
|
|
HasShift = 1,
|
|
HasMul = 1,
|
|
HasNegate = 1,
|
|
HasAbs = 1,
|
|
HasArg = 0,
|
|
HasAbs2 = 1,
|
|
HasMin = 1,
|
|
HasMax = 1,
|
|
HasConj = 1,
|
|
HasSetLinear = 0,
|
|
HasBlend = 0,
|
|
HasReduxp = 0,
|
|
|
|
HasCmp = 1,
|
|
HasDiv = 1,
|
|
HasRound = 1,
|
|
|
|
HasLog = 1,
|
|
HasExp = 1,
|
|
HasSqrt = 1
|
|
};
|
|
};
|
|
|
|
#elif EIGEN_RISCV64_DEFAULT_LMUL == 2
|
|
typedef Packet2Xd PacketXd;
|
|
|
|
template <>
|
|
struct packet_traits<double> : default_packet_traits {
|
|
typedef Packet2Xd type;
|
|
typedef Packet1Xd half;
|
|
|
|
enum {
|
|
Vectorizable = 1,
|
|
AlignedOnScalar = 1,
|
|
size = rvv_packet_size_selector<double, EIGEN_RISCV64_RVV_VL, 2>::size,
|
|
|
|
HasAdd = 1,
|
|
HasSub = 1,
|
|
HasShift = 1,
|
|
HasMul = 1,
|
|
HasNegate = 1,
|
|
HasAbs = 1,
|
|
HasArg = 0,
|
|
HasAbs2 = 1,
|
|
HasMin = 1,
|
|
HasMax = 1,
|
|
HasConj = 1,
|
|
HasSetLinear = 0,
|
|
HasBlend = 0,
|
|
HasReduxp = 0,
|
|
|
|
HasCmp = 1,
|
|
HasDiv = 1,
|
|
HasRound = 1,
|
|
|
|
HasLog = 1,
|
|
HasExp = 1,
|
|
HasSqrt = 1
|
|
};
|
|
};
|
|
|
|
#elif EIGEN_RISCV64_DEFAULT_LMUL == 4
|
|
typedef Packet4Xd PacketXd;
|
|
|
|
template <>
|
|
struct packet_traits<double> : default_packet_traits {
|
|
typedef Packet4Xd type;
|
|
typedef Packet2Xd half;
|
|
|
|
enum {
|
|
Vectorizable = 1,
|
|
AlignedOnScalar = 1,
|
|
size = rvv_packet_size_selector<double, EIGEN_RISCV64_RVV_VL, 4>::size,
|
|
|
|
HasAdd = 1,
|
|
HasSub = 1,
|
|
HasShift = 1,
|
|
HasMul = 1,
|
|
HasNegate = 1,
|
|
HasAbs = 1,
|
|
HasArg = 0,
|
|
HasAbs2 = 1,
|
|
HasMin = 1,
|
|
HasMax = 1,
|
|
HasConj = 1,
|
|
HasSetLinear = 0,
|
|
HasBlend = 0,
|
|
HasReduxp = 0,
|
|
|
|
HasCmp = 1,
|
|
HasDiv = 1,
|
|
HasRound = 1,
|
|
|
|
HasLog = 1,
|
|
HasExp = 1,
|
|
HasSqrt = 1
|
|
};
|
|
};
|
|
#endif
|
|
|
|
template <>
|
|
struct unpacket_traits<Packet1Xd> {
|
|
typedef double type;
|
|
typedef Packet1Xd half; // Half not yet implemented
|
|
typedef Packet1Xl integer_packet;
|
|
typedef numext::uint8_t mask_t;
|
|
typedef PacketMask64 packet_mask;
|
|
|
|
enum {
|
|
size = rvv_packet_size_selector<double, EIGEN_RISCV64_RVV_VL, 1>::size,
|
|
alignment = rvv_packet_alignment_selector<EIGEN_RISCV64_RVV_VL, 1>::alignment,
|
|
vectorizable = true,
|
|
masked_load_available = false,
|
|
masked_store_available = false
|
|
};
|
|
};
|
|
|
|
template <>
|
|
struct unpacket_traits<Packet2Xd> {
|
|
typedef double type;
|
|
typedef Packet1Xd half;
|
|
typedef Packet2Xl integer_packet;
|
|
typedef numext::uint8_t mask_t;
|
|
typedef PacketMask32 packet_mask;
|
|
|
|
enum {
|
|
size = rvv_packet_size_selector<double, EIGEN_RISCV64_RVV_VL, 2>::size,
|
|
alignment = rvv_packet_alignment_selector<EIGEN_RISCV64_RVV_VL, 2>::alignment,
|
|
vectorizable = true,
|
|
masked_load_available = false,
|
|
masked_store_available = false
|
|
};
|
|
};
|
|
|
|
template <>
|
|
struct unpacket_traits<Packet4Xd> {
|
|
typedef double type;
|
|
typedef Packet2Xd half;
|
|
typedef Packet4Xl integer_packet;
|
|
typedef numext::uint8_t mask_t;
|
|
typedef PacketMask16 packet_mask;
|
|
|
|
enum {
|
|
size = rvv_packet_size_selector<double, EIGEN_RISCV64_RVV_VL, 4>::size,
|
|
alignment = rvv_packet_alignment_selector<EIGEN_RISCV64_RVV_VL, 4>::alignment,
|
|
vectorizable = true,
|
|
masked_load_available = false,
|
|
masked_store_available = false
|
|
};
|
|
};
|
|
|
|
/********************************* Packet1Xd ************************************/
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xd ptrue<Packet1Xd>(const Packet1Xd& /*a*/) {
|
|
return __riscv_vreinterpret_f64m1(__riscv_vmv_v_x_u64m1(0xffffffffffffffffu, unpacket_traits<Packet1Xd>::size));
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xd pzero<Packet1Xd>(const Packet1Xd& /*a*/) {
|
|
return __riscv_vfmv_v_f_f64m1(0.0, unpacket_traits<Packet1Xd>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xd pabs(const Packet1Xd& a) {
|
|
return __riscv_vfabs_v_f64m1(a, unpacket_traits<Packet1Xd>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xd pset1<Packet1Xd>(const double& from) {
|
|
return __riscv_vfmv_v_f_f64m1(from, unpacket_traits<Packet1Xd>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xd pset1frombits<Packet1Xd>(numext::uint64_t from) {
|
|
return __riscv_vreinterpret_f64m1(__riscv_vmv_v_x_u64m1(from, unpacket_traits<Packet1Xd>::size));
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xd plset<Packet1Xd>(const double& a) {
|
|
Packet1Xd idx = __riscv_vfcvt_f_x_v_f64m1(
|
|
__riscv_vreinterpret_v_u64m1_i64m1(__riscv_vid_v_u64m1(unpacket_traits<Packet1Xl>::size)),
|
|
unpacket_traits<Packet1Xd>::size);
|
|
return __riscv_vfadd_vf_f64m1(idx, a, unpacket_traits<Packet1Xd>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xd padd<Packet1Xd>(const Packet1Xd& a, const Packet1Xd& b) {
|
|
return __riscv_vfadd_vv_f64m1(a, b, unpacket_traits<Packet1Xd>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xd psub<Packet1Xd>(const Packet1Xd& a, const Packet1Xd& b) {
|
|
return __riscv_vfsub_vv_f64m1(a, b, unpacket_traits<Packet1Xd>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xd pnegate(const Packet1Xd& a) {
|
|
return __riscv_vfneg_v_f64m1(a, unpacket_traits<Packet1Xd>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xd pconj(const Packet1Xd& a) {
|
|
return a;
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xd pmul<Packet1Xd>(const Packet1Xd& a, const Packet1Xd& b) {
|
|
return __riscv_vfmul_vv_f64m1(a, b, unpacket_traits<Packet1Xd>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xd pdiv<Packet1Xd>(const Packet1Xd& a, const Packet1Xd& b) {
|
|
return __riscv_vfdiv_vv_f64m1(a, b, unpacket_traits<Packet1Xd>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xd pmadd(const Packet1Xd& a, const Packet1Xd& b, const Packet1Xd& c) {
|
|
return __riscv_vfmadd_vv_f64m1(a, b, c, unpacket_traits<Packet1Xd>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xd pmsub(const Packet1Xd& a, const Packet1Xd& b, const Packet1Xd& c) {
|
|
return __riscv_vfmsub_vv_f64m1(a, b, c, unpacket_traits<Packet1Xd>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xd pnmadd(const Packet1Xd& a, const Packet1Xd& b, const Packet1Xd& c) {
|
|
return __riscv_vfnmsub_vv_f64m1(a, b, c, unpacket_traits<Packet1Xd>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xd pnmsub(const Packet1Xd& a, const Packet1Xd& b, const Packet1Xd& c) {
|
|
return __riscv_vfnmadd_vv_f64m1(a, b, c, unpacket_traits<Packet1Xd>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xd pmin<Packet1Xd>(const Packet1Xd& a, const Packet1Xd& b) {
|
|
Packet1Xd nans = __riscv_vfmv_v_f_f64m1((std::numeric_limits<double>::quiet_NaN)(), unpacket_traits<Packet1Xd>::size);
|
|
PacketMask64 mask = __riscv_vmfeq_vv_f64m1_b64(a, a, unpacket_traits<Packet1Xd>::size);
|
|
PacketMask64 mask2 = __riscv_vmfeq_vv_f64m1_b64(b, b, unpacket_traits<Packet1Xd>::size);
|
|
mask = __riscv_vmand_mm_b64(mask, mask2, unpacket_traits<Packet1Xd>::size);
|
|
|
|
return __riscv_vfmin_vv_f64m1_tumu(mask, nans, a, b, unpacket_traits<Packet1Xd>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xd pmin<PropagateNaN, Packet1Xd>(const Packet1Xd& a, const Packet1Xd& b) {
|
|
return pmin<Packet1Xd>(a, b);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xd pmin<PropagateNumbers, Packet1Xd>(const Packet1Xd& a, const Packet1Xd& b) {
|
|
return __riscv_vfmin_vv_f64m1(a, b, unpacket_traits<Packet1Xd>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xd pmax<Packet1Xd>(const Packet1Xd& a, const Packet1Xd& b) {
|
|
Packet1Xd nans = __riscv_vfmv_v_f_f64m1((std::numeric_limits<double>::quiet_NaN)(), unpacket_traits<Packet1Xd>::size);
|
|
PacketMask64 mask = __riscv_vmfeq_vv_f64m1_b64(a, a, unpacket_traits<Packet1Xd>::size);
|
|
PacketMask64 mask2 = __riscv_vmfeq_vv_f64m1_b64(b, b, unpacket_traits<Packet1Xd>::size);
|
|
mask = __riscv_vmand_mm_b64(mask, mask2, unpacket_traits<Packet1Xd>::size);
|
|
|
|
return __riscv_vfmax_vv_f64m1_tumu(mask, nans, a, b, unpacket_traits<Packet1Xd>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xd pmax<PropagateNaN, Packet1Xd>(const Packet1Xd& a, const Packet1Xd& b) {
|
|
return pmax<Packet1Xd>(a, b);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xd pmax<PropagateNumbers, Packet1Xd>(const Packet1Xd& a, const Packet1Xd& b) {
|
|
return __riscv_vfmax_vv_f64m1(a, b, unpacket_traits<Packet1Xd>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xd pcmp_le<Packet1Xd>(const Packet1Xd& a, const Packet1Xd& b) {
|
|
PacketMask64 mask = __riscv_vmfle_vv_f64m1_b64(a, b, unpacket_traits<Packet1Xd>::size);
|
|
return __riscv_vmerge_vvm_f64m1(pzero<Packet1Xd>(a), ptrue<Packet1Xd>(a), mask, unpacket_traits<Packet1Xd>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xd pcmp_lt<Packet1Xd>(const Packet1Xd& a, const Packet1Xd& b) {
|
|
PacketMask64 mask = __riscv_vmflt_vv_f64m1_b64(a, b, unpacket_traits<Packet1Xd>::size);
|
|
return __riscv_vmerge_vvm_f64m1(pzero<Packet1Xd>(a), ptrue<Packet1Xd>(a), mask, unpacket_traits<Packet1Xd>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xd pcmp_eq<Packet1Xd>(const Packet1Xd& a, const Packet1Xd& b) {
|
|
PacketMask64 mask = __riscv_vmfeq_vv_f64m1_b64(a, b, unpacket_traits<Packet1Xd>::size);
|
|
return __riscv_vmerge_vvm_f64m1(pzero<Packet1Xd>(a), ptrue<Packet1Xd>(a), mask, unpacket_traits<Packet1Xd>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xd pcmp_lt_or_nan<Packet1Xd>(const Packet1Xd& a, const Packet1Xd& b) {
|
|
PacketMask64 mask = __riscv_vmfge_vv_f64m1_b64(a, b, unpacket_traits<Packet1Xd>::size);
|
|
return __riscv_vfmerge_vfm_f64m1(ptrue<Packet1Xd>(a), 0.0, mask, unpacket_traits<Packet1Xd>::size);
|
|
}
|
|
|
|
// Logical Operations are not supported for double, so reinterpret casts
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xd pand<Packet1Xd>(const Packet1Xd& a, const Packet1Xd& b) {
|
|
return __riscv_vreinterpret_v_u64m1_f64m1(__riscv_vand_vv_u64m1(
|
|
__riscv_vreinterpret_v_f64m1_u64m1(a), __riscv_vreinterpret_v_f64m1_u64m1(b), unpacket_traits<Packet1Xd>::size));
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xd por<Packet1Xd>(const Packet1Xd& a, const Packet1Xd& b) {
|
|
return __riscv_vreinterpret_v_u64m1_f64m1(__riscv_vor_vv_u64m1(
|
|
__riscv_vreinterpret_v_f64m1_u64m1(a), __riscv_vreinterpret_v_f64m1_u64m1(b), unpacket_traits<Packet1Xd>::size));
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xd pxor<Packet1Xd>(const Packet1Xd& a, const Packet1Xd& b) {
|
|
return __riscv_vreinterpret_v_u64m1_f64m1(__riscv_vxor_vv_u64m1(
|
|
__riscv_vreinterpret_v_f64m1_u64m1(a), __riscv_vreinterpret_v_f64m1_u64m1(b), unpacket_traits<Packet1Xd>::size));
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xd pandnot<Packet1Xd>(const Packet1Xd& a, const Packet1Xd& b) {
|
|
return __riscv_vreinterpret_v_u64m1_f64m1(__riscv_vand_vv_u64m1(
|
|
__riscv_vreinterpret_v_f64m1_u64m1(a),
|
|
__riscv_vnot_v_u64m1(__riscv_vreinterpret_v_f64m1_u64m1(b), unpacket_traits<Packet1Xd>::size),
|
|
unpacket_traits<Packet1Xd>::size));
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xd pload<Packet1Xd>(const double* from) {
|
|
EIGEN_DEBUG_ALIGNED_LOAD return __riscv_vle64_v_f64m1(from, unpacket_traits<Packet1Xd>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xd ploadu<Packet1Xd>(const double* from) {
|
|
EIGEN_DEBUG_UNALIGNED_LOAD return __riscv_vle64_v_f64m1(from, unpacket_traits<Packet1Xd>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xd ploaddup<Packet1Xd>(const double* from) {
|
|
Packet1Xul idx = __riscv_vid_v_u64m1(unpacket_traits<Packet1Xd>::size);
|
|
idx = __riscv_vsll_vx_u64m1(__riscv_vand_vx_u64m1(idx, 0xfffffffffffffffeu, unpacket_traits<Packet1Xd>::size), 2,
|
|
unpacket_traits<Packet1Xd>::size);
|
|
return __riscv_vloxei64_v_f64m1(from, idx, unpacket_traits<Packet1Xd>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xd ploadquad<Packet1Xd>(const double* from) {
|
|
Packet1Xul idx = __riscv_vid_v_u64m1(unpacket_traits<Packet1Xd>::size);
|
|
idx = __riscv_vsll_vx_u64m1(__riscv_vand_vx_u64m1(idx, 0xfffffffffffffffcu, unpacket_traits<Packet1Xd>::size), 1,
|
|
unpacket_traits<Packet1Xd>::size);
|
|
;
|
|
return __riscv_vloxei64_v_f64m1(from, idx, unpacket_traits<Packet1Xd>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE void pstore<double>(double* to, const Packet1Xd& from) {
|
|
EIGEN_DEBUG_ALIGNED_STORE __riscv_vse64_v_f64m1(to, from, unpacket_traits<Packet1Xd>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE void pstoreu<double>(double* to, const Packet1Xd& from) {
|
|
EIGEN_DEBUG_UNALIGNED_STORE __riscv_vse64_v_f64m1(to, from, unpacket_traits<Packet1Xd>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_DEVICE_FUNC inline Packet1Xd pgather<double, Packet1Xd>(const double* from, Index stride) {
|
|
return __riscv_vlse64_v_f64m1(from, stride * sizeof(double), unpacket_traits<Packet1Xd>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_DEVICE_FUNC inline void pscatter<double, Packet1Xd>(double* to, const Packet1Xd& from, Index stride) {
|
|
__riscv_vsse64(to, stride * sizeof(double), from, unpacket_traits<Packet1Xd>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE double pfirst<Packet1Xd>(const Packet1Xd& a) {
|
|
return __riscv_vfmv_f_s_f64m1_f64(a);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xd psqrt(const Packet1Xd& a) {
|
|
return __riscv_vfsqrt_v_f64m1(a, unpacket_traits<Packet1Xd>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xd print<Packet1Xd>(const Packet1Xd& a) {
|
|
const Packet1Xd limit = pset1<Packet1Xd>(static_cast<double>(1ull << 52));
|
|
const Packet1Xd abs_a = pabs(a);
|
|
|
|
PacketMask64 mask = __riscv_vmfne_vv_f64m1_b64(a, a, unpacket_traits<Packet1Xd>::size);
|
|
const Packet1Xd x = __riscv_vfadd_vv_f64m1_tumu(mask, a, a, a, unpacket_traits<Packet1Xd>::size);
|
|
const Packet1Xd new_x = __riscv_vfcvt_f_x_v_f64m1(__riscv_vfcvt_x_f_v_i64m1(a, unpacket_traits<Packet1Xd>::size),
|
|
unpacket_traits<Packet1Xd>::size);
|
|
|
|
mask = __riscv_vmflt_vv_f64m1_b64(abs_a, limit, unpacket_traits<Packet1Xd>::size);
|
|
Packet1Xd signed_x = __riscv_vfsgnj_vv_f64m1(new_x, x, unpacket_traits<Packet1Xd>::size);
|
|
return __riscv_vmerge_vvm_f64m1(x, signed_x, mask, unpacket_traits<Packet1Xd>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xd pfloor<Packet1Xd>(const Packet1Xd& a) {
|
|
Packet1Xd tmp = print<Packet1Xd>(a);
|
|
// If greater, subtract one.
|
|
PacketMask64 mask = __riscv_vmflt_vv_f64m1_b64(a, tmp, unpacket_traits<Packet1Xd>::size);
|
|
return __riscv_vfsub_vf_f64m1_tumu(mask, tmp, tmp, 1.0, unpacket_traits<Packet1Xd>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xd preverse(const Packet1Xd& a) {
|
|
Packet1Xul idx = __riscv_vrsub_vx_u64m1(__riscv_vid_v_u64m1(unpacket_traits<Packet1Xd>::size),
|
|
unpacket_traits<Packet1Xd>::size - 1, unpacket_traits<Packet1Xd>::size);
|
|
return __riscv_vrgather_vv_f64m1(a, idx, unpacket_traits<Packet1Xd>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xd pfrexp<Packet1Xd>(const Packet1Xd& a, Packet1Xd& exponent) {
|
|
return pfrexp_generic(a, exponent);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE double predux<Packet1Xd>(const Packet1Xd& a) {
|
|
return __riscv_vfmv_f(__riscv_vfredusum_vs_f64m1_f64m1(
|
|
a, __riscv_vfmv_v_f_f64m1(0.0, unpacket_traits<Packet1Xd>::size), unpacket_traits<Packet1Xd>::size));
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE double predux_mul<Packet1Xd>(const Packet1Xd& a) {
|
|
// Multiply the vector by its reverse
|
|
Packet1Xd prod = __riscv_vfmul_vv_f64m1(preverse(a), a, unpacket_traits<Packet1Xd>::size);
|
|
Packet1Xd half_prod;
|
|
|
|
if (EIGEN_RISCV64_RVV_VL >= 1024) {
|
|
half_prod = __riscv_vslidedown_vx_f64m1(prod, 4, unpacket_traits<Packet1Xd>::size);
|
|
prod = __riscv_vfmul_vv_f64m1(prod, half_prod, unpacket_traits<Packet1Xd>::size);
|
|
}
|
|
if (EIGEN_RISCV64_RVV_VL >= 512) {
|
|
half_prod = __riscv_vslidedown_vx_f64m1(prod, 2, unpacket_traits<Packet1Xd>::size);
|
|
prod = __riscv_vfmul_vv_f64m1(prod, half_prod, unpacket_traits<Packet1Xd>::size);
|
|
}
|
|
if (EIGEN_RISCV64_RVV_VL >= 256) {
|
|
half_prod = __riscv_vslidedown_vx_f64m1(prod, 1, unpacket_traits<Packet1Xd>::size);
|
|
prod = __riscv_vfmul_vv_f64m1(prod, half_prod, unpacket_traits<Packet1Xd>::size);
|
|
}
|
|
|
|
// The reduction is done to the first element.
|
|
return pfirst(prod);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE double predux_min<Packet1Xd>(const Packet1Xd& a) {
|
|
return (
|
|
std::min)(__riscv_vfmv_f(__riscv_vfredmin_vs_f64m1_f64m1(
|
|
a,
|
|
__riscv_vfmv_v_f_f64m1((std::numeric_limits<double>::quiet_NaN)(), unpacket_traits<Packet1Xd>::size),
|
|
unpacket_traits<Packet1Xd>::size)),
|
|
(std::numeric_limits<double>::max)());
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE double predux_max<Packet1Xd>(const Packet1Xd& a) {
|
|
return (
|
|
std::max)(__riscv_vfmv_f(__riscv_vfredmax_vs_f64m1_f64m1(
|
|
a,
|
|
__riscv_vfmv_v_f_f64m1((std::numeric_limits<double>::quiet_NaN)(), unpacket_traits<Packet1Xd>::size),
|
|
unpacket_traits<Packet1Xd>::size)),
|
|
-(std::numeric_limits<double>::max)());
|
|
}
|
|
|
|
template <int N>
|
|
EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet1Xd, N>& kernel) {
|
|
double buffer[unpacket_traits<Packet1Xd>::size * N];
|
|
int i = 0;
|
|
|
|
for (i = 0; i < N; i++) {
|
|
__riscv_vsse64(&buffer[i], N * sizeof(double), kernel.packet[i], unpacket_traits<Packet1Xd>::size);
|
|
}
|
|
|
|
for (i = 0; i < N; i++) {
|
|
kernel.packet[i] =
|
|
__riscv_vle64_v_f64m1(&buffer[i * unpacket_traits<Packet1Xd>::size], unpacket_traits<Packet1Xd>::size);
|
|
}
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xd pldexp<Packet1Xd>(const Packet1Xd& a, const Packet1Xd& exponent) {
|
|
return pldexp_generic(a, exponent);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE PacketMask64 por(const PacketMask64& a, const PacketMask64& b) {
|
|
return __riscv_vmor_mm_b64(a, b, unpacket_traits<Packet1Xd>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE PacketMask64 pandnot(const PacketMask64& a, const PacketMask64& b) {
|
|
return __riscv_vmor_mm_b64(a, b, unpacket_traits<Packet1Xd>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE PacketMask64 pand(const PacketMask64& a, const PacketMask64& b) {
|
|
return __riscv_vmand_mm_b64(a, b, unpacket_traits<Packet1Xd>::size);
|
|
}
|
|
|
|
EIGEN_STRONG_INLINE PacketMask64 pcmp_eq_mask(const Packet1Xd& a, const Packet1Xd& b) {
|
|
return __riscv_vmfeq_vv_f64m1_b64(a, b, unpacket_traits<Packet1Xd>::size);
|
|
}
|
|
|
|
EIGEN_STRONG_INLINE PacketMask64 pcmp_lt_mask(const Packet1Xd& a, const Packet1Xd& b) {
|
|
return __riscv_vmflt_vv_f64m1_b64(a, b, unpacket_traits<Packet1Xd>::size);
|
|
}
|
|
|
|
EIGEN_STRONG_INLINE Packet1Xd pselect(const PacketMask64& mask, const Packet1Xd& a, const Packet1Xd& b) {
|
|
return __riscv_vmerge_vvm_f64m1(b, a, mask, unpacket_traits<Packet1Xd>::size);
|
|
}
|
|
|
|
/********************************* short **************************************/
|
|
|
|
typedef eigen_packet_wrapper<vint16m1_t __attribute__((riscv_rvv_vector_bits(EIGEN_RISCV64_RVV_VL))), 18> Packet1Xs;
|
|
typedef eigen_packet_wrapper<vuint16m1_t __attribute__((riscv_rvv_vector_bits(EIGEN_RISCV64_RVV_VL))), 19> Packet1Xsu;
|
|
|
|
typedef eigen_packet_wrapper<vint16m2_t __attribute__((riscv_rvv_vector_bits(EIGEN_RISCV64_RVV_VL * 2))), 20>
|
|
Packet2Xs;
|
|
typedef eigen_packet_wrapper<vuint16m2_t __attribute__((riscv_rvv_vector_bits(EIGEN_RISCV64_RVV_VL * 2))), 21>
|
|
Packet2Xsu;
|
|
|
|
typedef eigen_packet_wrapper<vint16m4_t __attribute__((riscv_rvv_vector_bits(EIGEN_RISCV64_RVV_VL * 4))), 22>
|
|
Packet4Xs;
|
|
typedef eigen_packet_wrapper<vuint16m4_t __attribute__((riscv_rvv_vector_bits(EIGEN_RISCV64_RVV_VL * 4))), 23>
|
|
Packet4Xsu;
|
|
|
|
#if EIGEN_RISCV64_DEFAULT_LMUL == 1
|
|
typedef Packet1Xs PacketXs;
|
|
typedef Packet1Xsu PacketXsu;
|
|
|
|
template <>
|
|
struct packet_traits<numext::int16_t> : default_packet_traits {
|
|
typedef Packet1Xs type;
|
|
typedef Packet1Xs half; // Half not implemented yet
|
|
enum {
|
|
Vectorizable = 1,
|
|
AlignedOnScalar = 1,
|
|
size = rvv_packet_size_selector<numext::int16_t, EIGEN_RISCV64_RVV_VL, 1>::size,
|
|
|
|
HasAdd = 1,
|
|
HasSub = 1,
|
|
HasShift = 1,
|
|
HasMul = 1,
|
|
HasNegate = 1,
|
|
HasAbs = 1,
|
|
HasArg = 0,
|
|
HasAbs2 = 1,
|
|
HasMin = 1,
|
|
HasMax = 1,
|
|
HasConj = 1,
|
|
HasSetLinear = 0,
|
|
HasBlend = 0,
|
|
HasReduxp = 0
|
|
};
|
|
};
|
|
|
|
#elif EIGEN_RISCV64_DEFAULT_LMUL == 2
|
|
typedef Packet2Xs PacketXs;
|
|
typedef Packet2Xsu PacketXsu;
|
|
|
|
template <>
|
|
struct packet_traits<numext::int16_t> : default_packet_traits {
|
|
typedef Packet2Xs type;
|
|
typedef Packet1Xs half;
|
|
enum {
|
|
Vectorizable = 1,
|
|
AlignedOnScalar = 1,
|
|
size = rvv_packet_size_selector<numext::int16_t, EIGEN_RISCV64_RVV_VL, 2>::size,
|
|
|
|
HasAdd = 1,
|
|
HasSub = 1,
|
|
HasShift = 1,
|
|
HasMul = 1,
|
|
HasNegate = 1,
|
|
HasAbs = 1,
|
|
HasArg = 0,
|
|
HasAbs2 = 1,
|
|
HasMin = 1,
|
|
HasMax = 1,
|
|
HasConj = 1,
|
|
HasSetLinear = 0,
|
|
HasBlend = 0,
|
|
HasReduxp = 0
|
|
};
|
|
};
|
|
|
|
#elif EIGEN_RISCV64_DEFAULT_LMUL == 4
|
|
typedef Packet4Xs PacketXs;
|
|
typedef Packet4Xsu PacketXsu;
|
|
|
|
template <>
|
|
struct packet_traits<numext::int16_t> : default_packet_traits {
|
|
typedef Packet4Xs type;
|
|
typedef Packet2Xs half;
|
|
enum {
|
|
Vectorizable = 1,
|
|
AlignedOnScalar = 1,
|
|
size = rvv_packet_size_selector<numext::int16_t, EIGEN_RISCV64_RVV_VL, 4>::size,
|
|
|
|
HasAdd = 1,
|
|
HasSub = 1,
|
|
HasShift = 1,
|
|
HasMul = 1,
|
|
HasNegate = 1,
|
|
HasAbs = 1,
|
|
HasArg = 0,
|
|
HasAbs2 = 1,
|
|
HasMin = 1,
|
|
HasMax = 1,
|
|
HasConj = 1,
|
|
HasSetLinear = 0,
|
|
HasBlend = 0,
|
|
HasReduxp = 0
|
|
};
|
|
};
|
|
#endif
|
|
|
|
template <>
|
|
struct unpacket_traits<Packet1Xs> {
|
|
typedef numext::int16_t type;
|
|
typedef Packet1Xs half; // Half not yet implemented
|
|
typedef numext::uint8_t mask_t;
|
|
enum {
|
|
size = rvv_packet_size_selector<numext::int16_t, EIGEN_RISCV64_RVV_VL, 1>::size,
|
|
alignment = rvv_packet_alignment_selector<EIGEN_RISCV64_RVV_VL, 1>::alignment,
|
|
vectorizable = true,
|
|
masked_load_available = false,
|
|
masked_store_available = false
|
|
};
|
|
};
|
|
|
|
template <>
|
|
struct unpacket_traits<Packet2Xs> {
|
|
typedef numext::int16_t type;
|
|
typedef Packet1Xs half;
|
|
typedef numext::uint8_t mask_t;
|
|
enum {
|
|
size = rvv_packet_size_selector<numext::int16_t, EIGEN_RISCV64_RVV_VL, 2>::size,
|
|
alignment = rvv_packet_alignment_selector<EIGEN_RISCV64_RVV_VL, 2>::alignment,
|
|
vectorizable = true,
|
|
masked_load_available = false,
|
|
masked_store_available = false
|
|
};
|
|
};
|
|
|
|
template <>
|
|
struct unpacket_traits<Packet4Xs> {
|
|
typedef numext::int16_t type;
|
|
typedef Packet2Xs half;
|
|
typedef numext::uint8_t mask_t;
|
|
enum {
|
|
size = rvv_packet_size_selector<numext::int16_t, EIGEN_RISCV64_RVV_VL, 4>::size,
|
|
alignment = rvv_packet_alignment_selector<EIGEN_RISCV64_RVV_VL, 4>::alignment,
|
|
vectorizable = true,
|
|
masked_load_available = false,
|
|
masked_store_available = false
|
|
};
|
|
};
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE void prefetch<numext::int16_t>(const numext::int16_t* addr) {
|
|
#if EIGEN_HAS_BUILTIN(__builtin_prefetch) || EIGEN_COMP_GNUC
|
|
__builtin_prefetch(addr);
|
|
#endif
|
|
}
|
|
|
|
/********************************* Packet1Xs ************************************/
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xs pset1<Packet1Xs>(const numext::int16_t& from) {
|
|
return __riscv_vmv_v_x_i16m1(from, unpacket_traits<Packet1Xs>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xs plset<Packet1Xs>(const numext::int16_t& a) {
|
|
Packet1Xs idx = __riscv_vreinterpret_v_u16m1_i16m1(__riscv_vid_v_u16m1(unpacket_traits<Packet1Xs>::size));
|
|
return __riscv_vadd_vx_i16m1(idx, a, unpacket_traits<Packet1Xs>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xs pzero<Packet1Xs>(const Packet1Xs& /*a*/) {
|
|
return __riscv_vmv_v_x_i16m1(0, unpacket_traits<Packet1Xs>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xs padd<Packet1Xs>(const Packet1Xs& a, const Packet1Xs& b) {
|
|
return __riscv_vadd_vv_i16m1(a, b, unpacket_traits<Packet1Xs>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xs psub<Packet1Xs>(const Packet1Xs& a, const Packet1Xs& b) {
|
|
return __riscv_vsub(a, b, unpacket_traits<Packet1Xs>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xs pnegate(const Packet1Xs& a) {
|
|
return __riscv_vneg(a, unpacket_traits<Packet1Xs>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xs pconj(const Packet1Xs& a) {
|
|
return a;
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xs pmul<Packet1Xs>(const Packet1Xs& a, const Packet1Xs& b) {
|
|
return __riscv_vmul(a, b, unpacket_traits<Packet1Xs>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xs pdiv<Packet1Xs>(const Packet1Xs& a, const Packet1Xs& b) {
|
|
return __riscv_vdiv(a, b, unpacket_traits<Packet1Xs>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xs pmadd(const Packet1Xs& a, const Packet1Xs& b, const Packet1Xs& c) {
|
|
return __riscv_vmadd(a, b, c, unpacket_traits<Packet1Xs>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xs pmsub(const Packet1Xs& a, const Packet1Xs& b, const Packet1Xs& c) {
|
|
return __riscv_vmadd(a, b, pnegate(c), unpacket_traits<Packet1Xs>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xs pnmadd(const Packet1Xs& a, const Packet1Xs& b, const Packet1Xs& c) {
|
|
return __riscv_vnmsub_vv_i16m1(a, b, c, unpacket_traits<Packet1Xs>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xs pnmsub(const Packet1Xs& a, const Packet1Xs& b, const Packet1Xs& c) {
|
|
return __riscv_vnmsub_vv_i16m1(a, b, pnegate(c), unpacket_traits<Packet1Xs>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xs pmin<Packet1Xs>(const Packet1Xs& a, const Packet1Xs& b) {
|
|
return __riscv_vmin(a, b, unpacket_traits<Packet1Xs>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xs pmax<Packet1Xs>(const Packet1Xs& a, const Packet1Xs& b) {
|
|
return __riscv_vmax(a, b, unpacket_traits<Packet1Xs>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xs pcmp_le<Packet1Xs>(const Packet1Xs& a, const Packet1Xs& b) {
|
|
PacketMask16 mask = __riscv_vmsle_vv_i16m1_b16(a, b, unpacket_traits<Packet1Xs>::size);
|
|
return __riscv_vmerge_vxm_i16m1(pzero(a), static_cast<short>(0xffff), mask, unpacket_traits<Packet1Xs>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xs pcmp_lt<Packet1Xs>(const Packet1Xs& a, const Packet1Xs& b) {
|
|
PacketMask16 mask = __riscv_vmslt_vv_i16m1_b16(a, b, unpacket_traits<Packet1Xs>::size);
|
|
return __riscv_vmerge_vxm_i16m1(pzero(a), static_cast<short>(0xffff), mask, unpacket_traits<Packet1Xs>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xs pcmp_eq<Packet1Xs>(const Packet1Xs& a, const Packet1Xs& b) {
|
|
PacketMask16 mask = __riscv_vmseq_vv_i16m1_b16(a, b, unpacket_traits<Packet1Xs>::size);
|
|
return __riscv_vmerge_vxm_i16m1(pzero(a), static_cast<short>(0xffff), mask, unpacket_traits<Packet1Xs>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xs ptrue<Packet1Xs>(const Packet1Xs& /*a*/) {
|
|
return __riscv_vmv_v_x_i16m1(static_cast<unsigned short>(0xffffu), unpacket_traits<Packet1Xs>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xs pand<Packet1Xs>(const Packet1Xs& a, const Packet1Xs& b) {
|
|
return __riscv_vand_vv_i16m1(a, b, unpacket_traits<Packet1Xs>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xs por<Packet1Xs>(const Packet1Xs& a, const Packet1Xs& b) {
|
|
return __riscv_vor_vv_i16m1(a, b, unpacket_traits<Packet1Xs>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xs pxor<Packet1Xs>(const Packet1Xs& a, const Packet1Xs& b) {
|
|
return __riscv_vxor_vv_i16m1(a, b, unpacket_traits<Packet1Xs>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xs pandnot<Packet1Xs>(const Packet1Xs& a, const Packet1Xs& b) {
|
|
return __riscv_vand_vv_i16m1(a, __riscv_vnot_v_i16m1(b, unpacket_traits<Packet1Xs>::size),
|
|
unpacket_traits<Packet1Xs>::size);
|
|
}
|
|
|
|
template <int N>
|
|
EIGEN_STRONG_INLINE Packet1Xs parithmetic_shift_right(Packet1Xs a) {
|
|
return __riscv_vsra_vx_i16m1(a, N, unpacket_traits<Packet1Xs>::size);
|
|
}
|
|
|
|
template <int N>
|
|
EIGEN_STRONG_INLINE Packet1Xs plogical_shift_right(Packet1Xs a) {
|
|
return __riscv_vreinterpret_i16m1(
|
|
__riscv_vsrl_vx_u16m1(__riscv_vreinterpret_u16m1(a), N, unpacket_traits<Packet1Xs>::size));
|
|
}
|
|
|
|
template <int N>
|
|
EIGEN_STRONG_INLINE Packet1Xs plogical_shift_left(Packet1Xs a) {
|
|
return __riscv_vsll_vx_i16m1(a, N, unpacket_traits<Packet1Xs>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xs pload<Packet1Xs>(const numext::int16_t* from) {
|
|
EIGEN_DEBUG_ALIGNED_LOAD return __riscv_vle16_v_i16m1(from, unpacket_traits<Packet1Xs>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xs ploadu<Packet1Xs>(const numext::int16_t* from) {
|
|
EIGEN_DEBUG_UNALIGNED_LOAD return __riscv_vle16_v_i16m1(from, unpacket_traits<Packet1Xs>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xs ploaddup<Packet1Xs>(const numext::int16_t* from) {
|
|
Packet1Xsu idx = __riscv_vid_v_u16m1(unpacket_traits<Packet1Xs>::size);
|
|
idx = __riscv_vand_vx_u16m1(idx, 0xfffeu, unpacket_traits<Packet1Xs>::size);
|
|
// idx = 0 0 sizeof(int16_t) sizeof(int16_t) 2*sizeof(int16_t) 2*sizeof(int16_t) ...
|
|
return __riscv_vloxei16_v_i16m1(from, idx, unpacket_traits<Packet1Xs>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xs ploadquad<Packet1Xs>(const numext::int16_t* from) {
|
|
Packet1Xsu idx = __riscv_vid_v_u16m1(unpacket_traits<Packet1Xs>::size);
|
|
idx = __riscv_vsrl_vx_u16m1(__riscv_vand_vx_u16m1(idx, 0xfffcu, unpacket_traits<Packet1Xs>::size), 1,
|
|
unpacket_traits<Packet1Xs>::size);
|
|
return __riscv_vloxei16_v_i16m1(from, idx, unpacket_traits<Packet1Xs>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE void pstore<numext::int16_t>(numext::int16_t* to, const Packet1Xs& from) {
|
|
EIGEN_DEBUG_ALIGNED_STORE __riscv_vse16_v_i16m1(to, from, unpacket_traits<Packet1Xs>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE void pstoreu<numext::int16_t>(numext::int16_t* to, const Packet1Xs& from) {
|
|
EIGEN_DEBUG_UNALIGNED_STORE __riscv_vse16_v_i16m1(to, from, unpacket_traits<Packet1Xs>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_DEVICE_FUNC inline Packet1Xs pgather<numext::int16_t, Packet1Xs>(const numext::int16_t* from, Index stride) {
|
|
return __riscv_vlse16_v_i16m1(from, stride * sizeof(numext::int16_t), unpacket_traits<Packet1Xs>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_DEVICE_FUNC inline void pscatter<numext::int16_t, Packet1Xs>(numext::int16_t* to, const Packet1Xs& from,
|
|
Index stride) {
|
|
__riscv_vsse16(to, stride * sizeof(numext::int16_t), from, unpacket_traits<Packet1Xs>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE numext::int16_t pfirst<Packet1Xs>(const Packet1Xs& a) {
|
|
return __riscv_vmv_x_s_i16m1_i16(a);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xs preverse(const Packet1Xs& a) {
|
|
Packet1Xsu idx = __riscv_vrsub_vx_u16m1(__riscv_vid_v_u16m1(unpacket_traits<Packet1Xs>::size),
|
|
unpacket_traits<Packet1Xs>::size - 1, unpacket_traits<Packet1Xs>::size);
|
|
return __riscv_vrgather_vv_i16m1(a, idx, unpacket_traits<Packet1Xs>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE Packet1Xs pabs(const Packet1Xs& a) {
|
|
Packet1Xs mask = __riscv_vsra_vx_i16m1(a, 15, unpacket_traits<Packet1Xs>::size);
|
|
return __riscv_vsub_vv_i16m1(__riscv_vxor_vv_i16m1(a, mask, unpacket_traits<Packet1Xs>::size), mask,
|
|
unpacket_traits<Packet1Xs>::size);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE numext::int16_t predux<Packet1Xs>(const Packet1Xs& a) {
|
|
return __riscv_vmv_x(__riscv_vredsum_vs_i16m1_i16m1(a, __riscv_vmv_v_x_i16m1(0, unpacket_traits<Packet1Xs>::size),
|
|
unpacket_traits<Packet1Xs>::size));
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE numext::int16_t predux_mul<Packet1Xs>(const Packet1Xs& a) {
|
|
// Multiply the vector by its reverse
|
|
Packet1Xs prod = __riscv_vmul_vv_i16m1(preverse(a), a, unpacket_traits<Packet1Xs>::size);
|
|
Packet1Xs half_prod;
|
|
|
|
if (EIGEN_RISCV64_RVV_VL >= 1024) {
|
|
half_prod = __riscv_vslidedown_vx_i16m1(prod, 16, unpacket_traits<Packet1Xs>::size);
|
|
prod = __riscv_vmul_vv_i16m1(prod, half_prod, unpacket_traits<Packet1Xs>::size);
|
|
}
|
|
if (EIGEN_RISCV64_RVV_VL >= 512) {
|
|
half_prod = __riscv_vslidedown_vx_i16m1(prod, 8, unpacket_traits<Packet1Xs>::size);
|
|
prod = __riscv_vmul_vv_i16m1(prod, half_prod, unpacket_traits<Packet1Xs>::size);
|
|
}
|
|
if (EIGEN_RISCV64_RVV_VL >= 256) {
|
|
half_prod = __riscv_vslidedown_vx_i16m1(prod, 4, unpacket_traits<Packet1Xs>::size);
|
|
prod = __riscv_vmul_vv_i16m1(prod, half_prod, unpacket_traits<Packet1Xs>::size);
|
|
}
|
|
// Last reduction
|
|
half_prod = __riscv_vslidedown_vx_i16m1(prod, 2, unpacket_traits<Packet1Xs>::size);
|
|
prod = __riscv_vmul_vv_i16m1(prod, half_prod, unpacket_traits<Packet1Xs>::size);
|
|
|
|
half_prod = __riscv_vslidedown_vx_i16m1(prod, 1, unpacket_traits<Packet1Xs>::size);
|
|
prod = __riscv_vmul_vv_i16m1(prod, half_prod, unpacket_traits<Packet1Xs>::size);
|
|
|
|
// The reduction is done to the first element.
|
|
return pfirst(prod);
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE numext::int16_t predux_min<Packet1Xs>(const Packet1Xs& a) {
|
|
return __riscv_vmv_x(__riscv_vredmin_vs_i16m1_i16m1(
|
|
a, __riscv_vmv_v_x_i16m1((std::numeric_limits<numext::int16_t>::max)(), unpacket_traits<Packet1Xs>::size),
|
|
unpacket_traits<Packet1Xs>::size));
|
|
}
|
|
|
|
template <>
|
|
EIGEN_STRONG_INLINE numext::int16_t predux_max<Packet1Xs>(const Packet1Xs& a) {
|
|
return __riscv_vmv_x(__riscv_vredmax_vs_i16m1_i16m1(
|
|
a, __riscv_vmv_v_x_i16m1((std::numeric_limits<numext::int16_t>::min)(), unpacket_traits<Packet1Xs>::size),
|
|
unpacket_traits<Packet1Xs>::size));
|
|
}
|
|
|
|
template <int N>
|
|
EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet1Xs, N>& kernel) {
|
|
numext::int16_t buffer[unpacket_traits<Packet1Xs>::size * N] = {0};
|
|
int i = 0;
|
|
|
|
for (i = 0; i < N; i++) {
|
|
__riscv_vsse16(&buffer[i], N * sizeof(numext::int16_t), kernel.packet[i], unpacket_traits<Packet1Xs>::size);
|
|
}
|
|
for (i = 0; i < N; i++) {
|
|
kernel.packet[i] =
|
|
__riscv_vle16_v_i16m1(&buffer[i * unpacket_traits<Packet1Xs>::size], unpacket_traits<Packet1Xs>::size);
|
|
}
|
|
}
|
|
|
|
} // namespace internal
|
|
} // namespace Eigen
|
|
|
|
#endif // EIGEN_PACKET_MATH_RVV10_H
|