mirror of
https://gitlab.com/libeigen/eigen.git
synced 2024-12-27 07:29:52 +08:00
Implemented the pgather/pscatter packet primitives for the arm/NEON architecture
This commit is contained in:
parent
d936ddc3d1
commit
6d6df90c9a
@ -111,6 +111,22 @@ template<> EIGEN_STRONG_INLINE Packet2cf ploaddup<Packet2cf>(const std::complex<
|
||||
template<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((float*)to, from.v); }
|
||||
template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((float*)to, from.v); }
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline Packet2cf pgather<std::complex<float>, Packet2cf>(const std::complex<float>* from, int stride)
|
||||
{
|
||||
Packet4f res;
|
||||
res = vsetq_lane_f32(std::real(from[0*stride]), res, 0);
|
||||
res = vsetq_lane_f32(std::imag(from[0*stride]), res, 1);
|
||||
res = vsetq_lane_f32(std::real(from[1*stride]), res, 2);
|
||||
res = vsetq_lane_f32(std::imag(from[1*stride]), res, 3);
|
||||
return Packet2cf(res);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf>(std::complex<float>* to, const Packet2cf& from, int stride)
|
||||
{
|
||||
to[stride*0] = std::complex<float>(vgetq_lane_f32(from.v, 0), vgetq_lane_f32(from.v, 1));
|
||||
to[stride*1] = std::complex<float>(vgetq_lane_f32(from.v, 2), vgetq_lane_f32(from.v, 3));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> * addr) { EIGEN_ARM_PREFETCH((float *)addr); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet2cf>(const Packet2cf& a)
|
||||
|
@ -221,6 +221,40 @@ template<> EIGEN_STRONG_INLINE void pstore<int>(int* to, const Packet4i& f
|
||||
template<> EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_UNALIGNED_STORE vst1q_f32(to, from); }
|
||||
template<> EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_UNALIGNED_STORE vst1q_s32(to, from); }
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline Packet4f pgather<float, Packet4f>(const float* from, int stride)
|
||||
{
|
||||
Packet4f res;
|
||||
res = vsetq_lane_f32(from[0*stride], res, 0);
|
||||
res = vsetq_lane_f32(from[1*stride], res, 1);
|
||||
res = vsetq_lane_f32(from[2*stride], res, 2);
|
||||
res = vsetq_lane_f32(from[3*stride], res, 3);
|
||||
return res;
|
||||
}
|
||||
template<> EIGEN_DEVICE_FUNC inline Packet4i pgather<int, Packet4i>(const int* from, int stride)
|
||||
{
|
||||
Packet4i res;
|
||||
res = vsetq_lane_s32(from[0*stride], res, 0);
|
||||
res = vsetq_lane_s32(from[1*stride], res, 1);
|
||||
res = vsetq_lane_s32(from[2*stride], res, 2);
|
||||
res = vsetq_lane_s32(from[3*stride], res, 3);
|
||||
return res;
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet4f>(float* to, const Packet4f& from, int stride)
|
||||
{
|
||||
to[stride*0] = vgetq_lane_f32(from, 0);
|
||||
to[stride*1] = vgetq_lane_f32(from, 1);
|
||||
to[stride*2] = vgetq_lane_f32(from, 2);
|
||||
to[stride*3] = vgetq_lane_f32(from, 3);
|
||||
}
|
||||
template<> EIGEN_DEVICE_FUNC inline void pscatter<int, Packet4i>(int* to, const Packet4i& from, int stride)
|
||||
{
|
||||
to[stride*0] = vgetq_lane_s32(from, 0);
|
||||
to[stride*1] = vgetq_lane_s32(from, 1);
|
||||
to[stride*2] = vgetq_lane_s32(from, 2);
|
||||
to[stride*3] = vgetq_lane_s32(from, 3);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { EIGEN_ARM_PREFETCH(addr); }
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { EIGEN_ARM_PREFETCH(addr); }
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user