This commit is contained in:
Gael Guennebaud 2014-04-17 22:50:49 +02:00
commit 2c3c95990d
2 changed files with 50 additions and 0 deletions

View File

@ -111,6 +111,22 @@ template<> EIGEN_STRONG_INLINE Packet2cf ploaddup<Packet2cf>(const std::complex<
template<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((float*)to, from.v); }
template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((float*)to, from.v); }
template<> EIGEN_DEVICE_FUNC inline Packet2cf pgather<std::complex<float>, Packet2cf>(const std::complex<float>* from, int stride)
{
Packet4f res;
res = vsetq_lane_f32(std::real(from[0*stride]), res, 0);
res = vsetq_lane_f32(std::imag(from[0*stride]), res, 1);
res = vsetq_lane_f32(std::real(from[1*stride]), res, 2);
res = vsetq_lane_f32(std::imag(from[1*stride]), res, 3);
return Packet2cf(res);
}
template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf>(std::complex<float>* to, const Packet2cf& from, int stride)
{
to[stride*0] = std::complex<float>(vgetq_lane_f32(from.v, 0), vgetq_lane_f32(from.v, 1));
to[stride*1] = std::complex<float>(vgetq_lane_f32(from.v, 2), vgetq_lane_f32(from.v, 3));
}
template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> * addr) { EIGEN_ARM_PREFETCH((float *)addr); }
template<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet2cf>(const Packet2cf& a)

View File

@ -221,6 +221,40 @@ template<> EIGEN_STRONG_INLINE void pstore<int>(int* to, const Packet4i& f
template<> EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_UNALIGNED_STORE vst1q_f32(to, from); }
template<> EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_UNALIGNED_STORE vst1q_s32(to, from); }
template<> EIGEN_DEVICE_FUNC inline Packet4f pgather<float, Packet4f>(const float* from, int stride)
{
Packet4f res;
res = vsetq_lane_f32(from[0*stride], res, 0);
res = vsetq_lane_f32(from[1*stride], res, 1);
res = vsetq_lane_f32(from[2*stride], res, 2);
res = vsetq_lane_f32(from[3*stride], res, 3);
return res;
}
template<> EIGEN_DEVICE_FUNC inline Packet4i pgather<int, Packet4i>(const int* from, int stride)
{
Packet4i res;
res = vsetq_lane_s32(from[0*stride], res, 0);
res = vsetq_lane_s32(from[1*stride], res, 1);
res = vsetq_lane_s32(from[2*stride], res, 2);
res = vsetq_lane_s32(from[3*stride], res, 3);
return res;
}
template<> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet4f>(float* to, const Packet4f& from, int stride)
{
to[stride*0] = vgetq_lane_f32(from, 0);
to[stride*1] = vgetq_lane_f32(from, 1);
to[stride*2] = vgetq_lane_f32(from, 2);
to[stride*3] = vgetq_lane_f32(from, 3);
}
template<> EIGEN_DEVICE_FUNC inline void pscatter<int, Packet4i>(int* to, const Packet4i& from, int stride)
{
to[stride*0] = vgetq_lane_s32(from, 0);
to[stride*1] = vgetq_lane_s32(from, 1);
to[stride*2] = vgetq_lane_s32(from, 2);
to[stride*3] = vgetq_lane_s32(from, 3);
}
template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { EIGEN_ARM_PREFETCH(addr); }
template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { EIGEN_ARM_PREFETCH(addr); }