[SYCL Backend]

* Adding Missing operations for vector comparison in SYCL. This caused compiler error for vector comparison when compiling SYCL * Fixing the compiler error for placement new in TensorForcedEval.h This caused compiler error when compiling SYCL backend * Reducing the SYCL warning by removing the abort function inside the kernel * Adding Strong inline to functions inside SYCL interop.
2026-04-10 11:34:33 +08:00 · 2020-01-07 15:13:37 +00:00
parent eedb7eeacf
commit d0ae052da4
3 changed files with 149 additions and 19 deletions
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h
@@ -77,6 +77,28 @@ class TensorForcedEvalOp : public TensorBase<TensorForcedEvalOp<XprType>, ReadOn
    typename XprType::Nested m_xpr;
 };

+namespace internal {
+template <typename Device, typename CoeffReturnType>
+struct non_integral_type_placement_new{
+  template <typename StorageType>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void operator()(Index numValues, StorageType m_buffer) {
+   // Initialize non-trivially constructible types.
+    if (!internal::is_arithmetic<CoeffReturnType>::value) {
+      for (Index i = 0; i < numValues; ++i) new (m_buffer + i) CoeffReturnType();
+    }
+}
+};
+
+// SYCL does not support non-integral types 
+// having new (m_buffer + i) CoeffReturnType() causes the following compiler error for SYCL Devices 
+// no matching function for call to 'operator new'
+template <typename CoeffReturnType>
+struct non_integral_type_placement_new<Eigen::SyclDevice, CoeffReturnType> {
+  template <typename StorageType>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void operator()(Index, StorageType) {
+}
+};
+} // end namespace internal

 template<typename ArgType_, typename Device>
 struct TensorEvaluator<const TensorForcedEvalOp<ArgType_>, Device>
@@ -127,10 +149,7 @@ struct TensorEvaluator<const TensorForcedEvalOp<ArgType_>, Device>
    const Index numValues =  internal::array_prod(m_impl.dimensions());
    m_buffer = m_device.get((CoeffReturnType*)m_device.allocate_temp(numValues * sizeof(CoeffReturnType)));

-    // Initialize non-trivially constructible types.
-    if (!internal::is_arithmetic<CoeffReturnType>::value) {
-      for (Index i = 0; i < numValues; ++i) new (m_buffer + i) CoeffReturnType();
-    }
+   internal::non_integral_type_placement_new<Device, CoeffReturnType>()(numValues, m_buffer);

    typedef TensorEvalToOp< const typename internal::remove_const<ArgType>::type > EvalTo;
    EvalTo evalToTmp(m_device.get(m_buffer), m_op);