mirror of
https://gitlab.com/libeigen/eigen.git
synced 2024-12-27 07:29:52 +08:00
[SYCL Backend]
* Adding Missing operations for vector comparison in SYCL. This caused compiler error for vector comparison when compiling SYCL * Fixing the compiler error for placement new in TensorForcedEval.h This caused compiler error when compiling SYCL backend * Reducing the SYCL warning by removing the abort function inside the kernel * Adding Strong inline to functions inside SYCL interop.
This commit is contained in:
parent
eedb7eeacf
commit
d0ae052da4
@ -147,7 +147,7 @@ struct PacketWrapper<PacketReturnType, 4> {
|
||||
typedef typename ::Eigen::internal::unpacket_traits<PacketReturnType>::type
|
||||
Scalar;
|
||||
template <typename Index>
|
||||
EIGEN_DEVICE_FUNC static Scalar scalarize(Index index, PacketReturnType &in) {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static Scalar scalarize(Index index, PacketReturnType &in) {
|
||||
switch (index) {
|
||||
case 0:
|
||||
return in.x();
|
||||
@ -158,17 +158,18 @@ struct PacketWrapper<PacketReturnType, 4> {
|
||||
case 3:
|
||||
return in.w();
|
||||
default:
|
||||
eigen_assert(false && "INDEX MUST BE BETWEEN 0 and 3");
|
||||
abort();
|
||||
//INDEX MUST BE BETWEEN 0 and 3.There is no abort function in SYCL kernel. so we cannot use abort here.
|
||||
// The code will never reach here
|
||||
__builtin_unreachable();
|
||||
}
|
||||
__builtin_unreachable();
|
||||
|
||||
}
|
||||
EIGEN_DEVICE_FUNC static PacketReturnType convert_to_packet_type(
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static PacketReturnType convert_to_packet_type(
|
||||
Scalar in, Scalar other) {
|
||||
return PacketReturnType(in, other, other, other);
|
||||
}
|
||||
EIGEN_DEVICE_FUNC static void set_packet(PacketReturnType &lhs, Scalar *rhs) {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static void set_packet(PacketReturnType &lhs, Scalar *rhs) {
|
||||
lhs = PacketReturnType(rhs[0], rhs[1], rhs[2], rhs[3]);
|
||||
}
|
||||
};
|
||||
@ -178,14 +179,14 @@ struct PacketWrapper<PacketReturnType, 1> {
|
||||
typedef typename ::Eigen::internal::unpacket_traits<PacketReturnType>::type
|
||||
Scalar;
|
||||
template <typename Index>
|
||||
EIGEN_DEVICE_FUNC static Scalar scalarize(Index, PacketReturnType &in) {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static Scalar scalarize(Index, PacketReturnType &in) {
|
||||
return in;
|
||||
}
|
||||
EIGEN_DEVICE_FUNC static PacketReturnType convert_to_packet_type(Scalar in,
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static PacketReturnType convert_to_packet_type(Scalar in,
|
||||
Scalar) {
|
||||
return PacketReturnType(in);
|
||||
}
|
||||
EIGEN_DEVICE_FUNC static void set_packet(PacketReturnType &lhs, Scalar *rhs) {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static void set_packet(PacketReturnType &lhs, Scalar *rhs) {
|
||||
lhs = rhs[0];
|
||||
}
|
||||
};
|
||||
@ -195,24 +196,25 @@ struct PacketWrapper<PacketReturnType, 2> {
|
||||
typedef typename ::Eigen::internal::unpacket_traits<PacketReturnType>::type
|
||||
Scalar;
|
||||
template <typename Index>
|
||||
EIGEN_DEVICE_FUNC static Scalar scalarize(Index index, PacketReturnType &in) {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static Scalar scalarize(Index index, PacketReturnType &in) {
|
||||
switch (index) {
|
||||
case 0:
|
||||
return in.x();
|
||||
case 1:
|
||||
return in.y();
|
||||
default:
|
||||
eigen_assert(false && "INDEX MUST BE BETWEEN 0 and 1");
|
||||
abort();
|
||||
//INDEX MUST BE BETWEEN 0 and 1.There is no abort function in SYCL kernel. so we cannot use abort here.
|
||||
// The code will never reach here
|
||||
__builtin_unreachable();
|
||||
}
|
||||
__builtin_unreachable();
|
||||
|
||||
}
|
||||
EIGEN_DEVICE_FUNC static PacketReturnType convert_to_packet_type(
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static PacketReturnType convert_to_packet_type(
|
||||
Scalar in, Scalar other) {
|
||||
return PacketReturnType(in, other);
|
||||
}
|
||||
EIGEN_DEVICE_FUNC static void set_packet(PacketReturnType &lhs, Scalar *rhs) {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static void set_packet(PacketReturnType &lhs, Scalar *rhs) {
|
||||
lhs = PacketReturnType(rhs[0], rhs[1]);
|
||||
}
|
||||
};
|
||||
|
@ -472,6 +472,115 @@ pabs<cl::sycl::cl_double2>(const cl::sycl::cl_double2& a) {
|
||||
return cl::sycl::cl_double2(cl::sycl::fabs(a.x()), cl::sycl::fabs(a.y()));
|
||||
}
|
||||
|
||||
template <typename Packet>
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet sycl_pcmp_le(const Packet &a,
|
||||
const Packet &b) {
|
||||
return ((a <= b)
|
||||
.template convert<typename unpacket_traits<Packet>::type,
|
||||
cl::sycl::rounding_mode::automatic>());
|
||||
}
|
||||
|
||||
template <typename Packet>
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet sycl_pcmp_lt(const Packet &a,
|
||||
const Packet &b) {
|
||||
return ((a < b)
|
||||
.template convert<typename unpacket_traits<Packet>::type,
|
||||
cl::sycl::rounding_mode::automatic>());
|
||||
}
|
||||
|
||||
template <typename Packet>
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet sycl_pcmp_eq(const Packet &a,
|
||||
const Packet &b) {
|
||||
return ((a == b)
|
||||
.template convert<typename unpacket_traits<Packet>::type,
|
||||
cl::sycl::rounding_mode::automatic>());
|
||||
}
|
||||
|
||||
#define SYCL_PCMP(OP, TYPE) \
|
||||
template <> \
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE TYPE pcmp_##OP<TYPE>(const TYPE &a, \
|
||||
const TYPE &b) { \
|
||||
return sycl_pcmp_##OP<TYPE>(a, b); \
|
||||
}
|
||||
|
||||
SYCL_PCMP(le, cl::sycl::cl_float4)
|
||||
SYCL_PCMP(lt, cl::sycl::cl_float4)
|
||||
SYCL_PCMP(eq, cl::sycl::cl_float4)
|
||||
SYCL_PCMP(le, cl::sycl::cl_double2)
|
||||
SYCL_PCMP(lt, cl::sycl::cl_double2)
|
||||
SYCL_PCMP(eq, cl::sycl::cl_double2)
|
||||
#undef SYCL_PCMP
|
||||
|
||||
template <typename T> struct convert_to_integer;
|
||||
|
||||
template <> struct convert_to_integer<float> {
|
||||
using type = int;
|
||||
using packet_type = cl::sycl::cl_int4;
|
||||
};
|
||||
template <> struct convert_to_integer<double> {
|
||||
using type = long;
|
||||
using packet_type = cl::sycl::cl_long2;
|
||||
};
|
||||
|
||||
template <typename PacketIn>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename convert_to_integer<
|
||||
typename unpacket_traits<PacketIn>::type>::packet_type
|
||||
vector_as_int(const PacketIn &p) {
|
||||
return (
|
||||
p.template convert<typename convert_to_integer<
|
||||
typename unpacket_traits<PacketIn>::type>::type,
|
||||
cl::sycl::rounding_mode::automatic>());
|
||||
}
|
||||
|
||||
template <typename packetOut, typename PacketIn>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packetOut
|
||||
convert_vector(const PacketIn &p) {
|
||||
return (p.template convert<typename unpacket_traits<packetOut>::type,
|
||||
cl::sycl::rounding_mode::automatic>());
|
||||
}
|
||||
|
||||
#define SYCL_PAND(TYPE) \
|
||||
template <> \
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TYPE pand<TYPE>(const TYPE &a, \
|
||||
const TYPE &b) { \
|
||||
return convert_vector<TYPE>(vector_as_int(a) & vector_as_int(b)); \
|
||||
}
|
||||
SYCL_PAND(cl::sycl::cl_float4)
|
||||
SYCL_PAND(cl::sycl::cl_double2)
|
||||
#undef SYCL_PAND
|
||||
|
||||
#define SYCL_POR(TYPE) \
|
||||
template <> \
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TYPE por<TYPE>(const TYPE &a, \
|
||||
const TYPE &b) { \
|
||||
return convert_vector<TYPE>(vector_as_int(a) | vector_as_int(b)); \
|
||||
}
|
||||
|
||||
SYCL_POR(cl::sycl::cl_float4)
|
||||
SYCL_POR(cl::sycl::cl_double2)
|
||||
#undef SYCL_POR
|
||||
|
||||
#define SYCL_PXOR(TYPE) \
|
||||
template <> \
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TYPE pxor<TYPE>(const TYPE &a, \
|
||||
const TYPE &b) { \
|
||||
return convert_vector<TYPE>(vector_as_int(a) ^ vector_as_int(b)); \
|
||||
}
|
||||
|
||||
SYCL_PXOR(cl::sycl::cl_float4)
|
||||
SYCL_PXOR(cl::sycl::cl_double2)
|
||||
#undef SYCL_PXOR
|
||||
|
||||
#define SYCL_PANDNOT(TYPE) \
|
||||
template <> \
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TYPE pandnot<TYPE>(const TYPE &a, \
|
||||
const TYPE &b) { \
|
||||
return convert_vector<TYPE>(vector_as_int(a) & (~vector_as_int(b))); \
|
||||
}
|
||||
SYCL_PANDNOT(cl::sycl::cl_float4)
|
||||
SYCL_PANDNOT(cl::sycl::cl_double2)
|
||||
#undef SYCL_PANDNOT
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void ptranspose(
|
||||
PacketBlock<cl::sycl::cl_float4, 4>& kernel) {
|
||||
float tmp = kernel.packet[0].y();
|
||||
|
@ -77,6 +77,28 @@ class TensorForcedEvalOp : public TensorBase<TensorForcedEvalOp<XprType>, ReadOn
|
||||
typename XprType::Nested m_xpr;
|
||||
};
|
||||
|
||||
namespace internal {
|
||||
template <typename Device, typename CoeffReturnType>
|
||||
struct non_integral_type_placement_new{
|
||||
template <typename StorageType>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void operator()(Index numValues, StorageType m_buffer) {
|
||||
// Initialize non-trivially constructible types.
|
||||
if (!internal::is_arithmetic<CoeffReturnType>::value) {
|
||||
for (Index i = 0; i < numValues; ++i) new (m_buffer + i) CoeffReturnType();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// SYCL does not support non-integral types
|
||||
// having new (m_buffer + i) CoeffReturnType() causes the following compiler error for SYCL Devices
|
||||
// no matching function for call to 'operator new'
|
||||
template <typename CoeffReturnType>
|
||||
struct non_integral_type_placement_new<Eigen::SyclDevice, CoeffReturnType> {
|
||||
template <typename StorageType>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void operator()(Index, StorageType) {
|
||||
}
|
||||
};
|
||||
} // end namespace internal
|
||||
|
||||
template<typename ArgType_, typename Device>
|
||||
struct TensorEvaluator<const TensorForcedEvalOp<ArgType_>, Device>
|
||||
@ -127,10 +149,7 @@ struct TensorEvaluator<const TensorForcedEvalOp<ArgType_>, Device>
|
||||
const Index numValues = internal::array_prod(m_impl.dimensions());
|
||||
m_buffer = m_device.get((CoeffReturnType*)m_device.allocate_temp(numValues * sizeof(CoeffReturnType)));
|
||||
|
||||
// Initialize non-trivially constructible types.
|
||||
if (!internal::is_arithmetic<CoeffReturnType>::value) {
|
||||
for (Index i = 0; i < numValues; ++i) new (m_buffer + i) CoeffReturnType();
|
||||
}
|
||||
internal::non_integral_type_placement_new<Device, CoeffReturnType>()(numValues, m_buffer);
|
||||
|
||||
typedef TensorEvalToOp< const typename internal::remove_const<ArgType>::type > EvalTo;
|
||||
EvalTo evalToTmp(m_device.get(m_buffer), m_op);
|
||||
|
Loading…
Reference in New Issue
Block a user