mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-03-07 18:27:40 +08:00
Fixed performance issues for complex VSX and P10 MMA in gebp_kernel (level 3).
This commit is contained in:
parent
e7b8643d70
commit
d59ef212e1
@ -31,6 +31,52 @@ struct Packet2cf
|
||||
{
|
||||
EIGEN_STRONG_INLINE explicit Packet2cf() {}
|
||||
EIGEN_STRONG_INLINE explicit Packet2cf(const Packet4f& a) : v(a) {}
|
||||
|
||||
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b)
|
||||
{
|
||||
Packet4f v1, v2;
|
||||
|
||||
// Permute and multiply the real parts of a and b
|
||||
v1 = vec_perm(a.v, a.v, p16uc_PSET32_WODD);
|
||||
// Get the imaginary parts of a
|
||||
v2 = vec_perm(a.v, a.v, p16uc_PSET32_WEVEN);
|
||||
// multiply a_re * b
|
||||
v1 = vec_madd(v1, b.v, p4f_ZERO);
|
||||
// multiply a_im * b and get the conjugate result
|
||||
v2 = vec_madd(v2, b.v, p4f_ZERO);
|
||||
v2 = reinterpret_cast<Packet4f>(pxor(v2, reinterpret_cast<Packet4f>(p4ui_CONJ_XOR)));
|
||||
// permute back to a proper order
|
||||
v2 = vec_perm(v2, v2, p16uc_COMPLEX32_REV);
|
||||
|
||||
return Packet2cf(padd<Packet4f>(v1, v2));
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE Packet2cf& operator*=(const Packet2cf& b) {
|
||||
v = pmul(Packet2cf(*this), b).v;
|
||||
return *this;
|
||||
}
|
||||
EIGEN_STRONG_INLINE Packet2cf operator*(const Packet2cf& b) const {
|
||||
return Packet2cf(*this) *= b;
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE Packet2cf& operator+=(const Packet2cf& b) {
|
||||
v = padd(v, b.v);
|
||||
return *this;
|
||||
}
|
||||
EIGEN_STRONG_INLINE Packet2cf operator+(const Packet2cf& b) const {
|
||||
return Packet2cf(*this) += b;
|
||||
}
|
||||
EIGEN_STRONG_INLINE Packet2cf& operator-=(const Packet2cf& b) {
|
||||
v = psub(v, b.v);
|
||||
return *this;
|
||||
}
|
||||
EIGEN_STRONG_INLINE Packet2cf operator-(const Packet2cf& b) const {
|
||||
return Packet2cf(*this) -= b;
|
||||
}
|
||||
EIGEN_STRONG_INLINE Packet2cf operator-(void) const {
|
||||
return Packet2cf(vec_neg(v));
|
||||
}
|
||||
|
||||
Packet4f v;
|
||||
};
|
||||
|
||||
@ -81,6 +127,25 @@ template<> EIGEN_STRONG_INLINE Packet2cf ploaddup<Packet2cf>(const std::complex<
|
||||
template<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { pstore((float*)to, from.v); }
|
||||
template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { pstoreu((float*)to, from.v); }
|
||||
|
||||
EIGEN_STRONG_INLINE Packet2cf pload2(const std::complex<float>* from0, const std::complex<float>* from1)
|
||||
{
|
||||
Packet4f res0, res1;
|
||||
#ifdef __VSX__
|
||||
__asm__ ("lxsdx %x0,%y1" : "=wa" (res0) : "Z" (*from0));
|
||||
__asm__ ("lxsdx %x0,%y1" : "=wa" (res1) : "Z" (*from1));
|
||||
#ifdef _BIG_ENDIAN
|
||||
__asm__ ("xxpermdi %x0, %x1, %x2, 0" : "=wa" (res0) : "wa" (res0), "wa" (res1));
|
||||
#else
|
||||
__asm__ ("xxpermdi %x0, %x2, %x1, 0" : "=wa" (res0) : "wa" (res0), "wa" (res1));
|
||||
#endif
|
||||
#else
|
||||
*((std::complex<float> *)&res0[0]) = *from0;
|
||||
*((std::complex<float> *)&res1[0]) = *from1;
|
||||
res0 = vec_perm(res0, res1, p16uc_TRANSPOSE64_HI);
|
||||
#endif
|
||||
return Packet2cf(res0);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline Packet2cf pgather<std::complex<float>, Packet2cf>(const std::complex<float>* from, Index stride)
|
||||
{
|
||||
EIGEN_ALIGN16 std::complex<float> af[2];
|
||||
@ -101,25 +166,6 @@ template<> EIGEN_STRONG_INLINE Packet2cf psub<Packet2cf>(const Packet2cf& a, con
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pnegate(const Packet2cf& a) { return Packet2cf(pnegate(a.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf& a) { return Packet2cf(pxor<Packet4f>(a.v, reinterpret_cast<Packet4f>(p4ui_CONJ_XOR))); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pmul<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
|
||||
{
|
||||
Packet4f v1, v2;
|
||||
|
||||
// Permute and multiply the real parts of a and b
|
||||
v1 = vec_perm(a.v, a.v, p16uc_PSET32_WODD);
|
||||
// Get the imaginary parts of a
|
||||
v2 = vec_perm(a.v, a.v, p16uc_PSET32_WEVEN);
|
||||
// multiply a_re * b
|
||||
v1 = vec_madd(v1, b.v, p4f_ZERO);
|
||||
// multiply a_im * b and get the conjugate result
|
||||
v2 = vec_madd(v2, b.v, p4f_ZERO);
|
||||
v2 = reinterpret_cast<Packet4f>(pxor(v2, reinterpret_cast<Packet4f>(p4ui_CONJ_XOR)));
|
||||
// permute back to a proper order
|
||||
v2 = vec_perm(v2, v2, p16uc_COMPLEX32_REV);
|
||||
|
||||
return Packet2cf(padd<Packet4f>(v1, v2));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pand <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(pand<Packet4f>(a.v, b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf por <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(por<Packet4f>(a.v, b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pxor <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(pxor<Packet4f>(a.v, b.v)); }
|
||||
@ -239,6 +285,51 @@ struct Packet1cd
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet1cd() {}
|
||||
EIGEN_STRONG_INLINE explicit Packet1cd(const Packet2d& a) : v(a) {}
|
||||
|
||||
EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b)
|
||||
{
|
||||
Packet2d a_re, a_im, v1, v2;
|
||||
|
||||
// Permute and multiply the real parts of a and b
|
||||
a_re = vec_perm(a.v, a.v, p16uc_PSET64_HI);
|
||||
// Get the imaginary parts of a
|
||||
a_im = vec_perm(a.v, a.v, p16uc_PSET64_LO);
|
||||
// multiply a_re * b
|
||||
v1 = vec_madd(a_re, b.v, p2d_ZERO);
|
||||
// multiply a_im * b and get the conjugate result
|
||||
v2 = vec_madd(a_im, b.v, p2d_ZERO);
|
||||
v2 = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4ui>(v2), reinterpret_cast<Packet4ui>(v2), 8));
|
||||
v2 = pxor(v2, reinterpret_cast<Packet2d>(p2ul_CONJ_XOR1));
|
||||
|
||||
return Packet1cd(padd<Packet2d>(v1, v2));
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE Packet1cd& operator*=(const Packet1cd& b) {
|
||||
v = pmul(Packet1cd(*this), b).v;
|
||||
return *this;
|
||||
}
|
||||
EIGEN_STRONG_INLINE Packet1cd operator*(const Packet1cd& b) const {
|
||||
return Packet1cd(*this) *= b;
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE Packet1cd& operator+=(const Packet1cd& b) {
|
||||
v = padd(v, b.v);
|
||||
return *this;
|
||||
}
|
||||
EIGEN_STRONG_INLINE Packet1cd operator+(const Packet1cd& b) const {
|
||||
return Packet1cd(*this) += b;
|
||||
}
|
||||
EIGEN_STRONG_INLINE Packet1cd& operator-=(const Packet1cd& b) {
|
||||
v = psub(v, b.v);
|
||||
return *this;
|
||||
}
|
||||
EIGEN_STRONG_INLINE Packet1cd operator-(const Packet1cd& b) const {
|
||||
return Packet1cd(*this) -= b;
|
||||
}
|
||||
EIGEN_STRONG_INLINE Packet1cd operator-(void) const {
|
||||
return Packet1cd(vec_neg(v));
|
||||
}
|
||||
|
||||
Packet2d v;
|
||||
};
|
||||
|
||||
@ -290,24 +381,6 @@ template<> EIGEN_STRONG_INLINE Packet1cd psub<Packet1cd>(const Packet1cd& a, con
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd pnegate(const Packet1cd& a) { return Packet1cd(pnegate(Packet2d(a.v))); }
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd pconj(const Packet1cd& a) { return Packet1cd(pxor(a.v, reinterpret_cast<Packet2d>(p2ul_CONJ_XOR2))); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd pmul<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
|
||||
{
|
||||
Packet2d a_re, a_im, v1, v2;
|
||||
|
||||
// Permute and multiply the real parts of a and b
|
||||
a_re = vec_perm(a.v, a.v, p16uc_PSET64_HI);
|
||||
// Get the imaginary parts of a
|
||||
a_im = vec_perm(a.v, a.v, p16uc_PSET64_LO);
|
||||
// multiply a_re * b
|
||||
v1 = vec_madd(a_re, b.v, p2d_ZERO);
|
||||
// multiply a_im * b and get the conjugate result
|
||||
v2 = vec_madd(a_im, b.v, p2d_ZERO);
|
||||
v2 = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4ui>(v2), reinterpret_cast<Packet4ui>(v2), 8));
|
||||
v2 = pxor(v2, reinterpret_cast<Packet2d>(p2ul_CONJ_XOR1));
|
||||
|
||||
return Packet1cd(padd<Packet2d>(v1, v2));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd pand <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(pand(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd por <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(por(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd pxor <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(pxor(a.v,b.v)); }
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -1,3 +1,10 @@
|
||||
//#define EIGEN_POWER_USE_PREFETCH // Use prefetching in gemm routines
|
||||
#ifdef EIGEN_POWER_USE_PREFETCH
|
||||
#define EIGEN_POWER_PREFETCH(p) prefetch(p)
|
||||
#else
|
||||
#define EIGEN_POWER_PREFETCH(p)
|
||||
#endif
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
@ -5,8 +12,8 @@ namespace internal {
|
||||
template<typename Scalar, typename Packet, typename DataMapper, typename Index, const Index accRows>
|
||||
EIGEN_STRONG_INLINE void gemm_extra_col(
|
||||
const DataMapper& res,
|
||||
const Scalar *lhs_base,
|
||||
const Scalar *rhs_base,
|
||||
const Scalar* lhs_base,
|
||||
const Scalar* rhs_base,
|
||||
Index depth,
|
||||
Index strideA,
|
||||
Index offsetA,
|
||||
@ -16,16 +23,17 @@ EIGEN_STRONG_INLINE void gemm_extra_col(
|
||||
Index remaining_cols,
|
||||
const Packet& pAlpha);
|
||||
|
||||
template<typename Scalar, typename Packet, typename DataMapper, typename Index, const Index accRows>
|
||||
template<typename Scalar, typename Packet, typename DataMapper, typename Index, const Index accRows, const Index accCols>
|
||||
EIGEN_STRONG_INLINE void gemm_extra_row(
|
||||
const DataMapper& res,
|
||||
const Scalar *lhs_base,
|
||||
const Scalar *rhs_base,
|
||||
const Scalar* lhs_base,
|
||||
const Scalar* rhs_base,
|
||||
Index depth,
|
||||
Index strideA,
|
||||
Index offsetA,
|
||||
Index row,
|
||||
Index col,
|
||||
Index rows,
|
||||
Index cols,
|
||||
Index remaining_rows,
|
||||
const Packet& pAlpha,
|
||||
@ -34,8 +42,8 @@ EIGEN_STRONG_INLINE void gemm_extra_row(
|
||||
template<typename Scalar, typename Packet, typename DataMapper, typename Index, const Index accCols>
|
||||
EIGEN_STRONG_INLINE void gemm_unrolled_col(
|
||||
const DataMapper& res,
|
||||
const Scalar *lhs_base,
|
||||
const Scalar *rhs_base,
|
||||
const Scalar* lhs_base,
|
||||
const Scalar* rhs_base,
|
||||
Index depth,
|
||||
Index strideA,
|
||||
Index offsetA,
|
||||
@ -48,6 +56,71 @@ EIGEN_STRONG_INLINE void gemm_unrolled_col(
|
||||
template<typename Packet>
|
||||
EIGEN_STRONG_INLINE Packet bmask(const int remaining_rows);
|
||||
|
||||
template<typename Scalar, typename Packet, typename Packetc, typename DataMapper, typename Index, const Index accRows, const Index accCols, bool ConjugateLhs, bool ConjugateRhs, bool LhsIsReal, bool RhsIsReal>
|
||||
EIGEN_STRONG_INLINE void gemm_complex_extra_col(
|
||||
const DataMapper& res,
|
||||
const Scalar* lhs_base,
|
||||
const Scalar* rhs_base,
|
||||
Index depth,
|
||||
Index strideA,
|
||||
Index offsetA,
|
||||
Index strideB,
|
||||
Index row,
|
||||
Index col,
|
||||
Index remaining_rows,
|
||||
Index remaining_cols,
|
||||
const Packet& pAlphaReal,
|
||||
const Packet& pAlphaImag);
|
||||
|
||||
template<typename Scalar, typename Packet, typename Packetc, typename DataMapper, typename Index, const Index accRows, const Index accCols, bool ConjugateLhs, bool ConjugateRhs, bool LhsIsReal, bool RhsIsReal>
|
||||
EIGEN_STRONG_INLINE void gemm_complex_extra_row(
|
||||
const DataMapper& res,
|
||||
const Scalar* lhs_base,
|
||||
const Scalar* rhs_base,
|
||||
Index depth,
|
||||
Index strideA,
|
||||
Index offsetA,
|
||||
Index strideB,
|
||||
Index row,
|
||||
Index col,
|
||||
Index rows,
|
||||
Index cols,
|
||||
Index remaining_rows,
|
||||
const Packet& pAlphaReal,
|
||||
const Packet& pAlphaImag,
|
||||
const Packet& pMask);
|
||||
|
||||
template<typename Scalar, typename Packet, typename Packetc, typename DataMapper, typename Index, const Index accCols, bool ConjugateLhs, bool ConjugateRhs, bool LhsIsReal, bool RhsIsReal>
|
||||
EIGEN_STRONG_INLINE void gemm_complex_unrolled_col(
|
||||
const DataMapper& res,
|
||||
const Scalar* lhs_base,
|
||||
const Scalar* rhs_base,
|
||||
Index depth,
|
||||
Index strideA,
|
||||
Index offsetA,
|
||||
Index strideB,
|
||||
Index& row,
|
||||
Index rows,
|
||||
Index col,
|
||||
Index remaining_cols,
|
||||
const Packet& pAlphaReal,
|
||||
const Packet& pAlphaImag);
|
||||
|
||||
template<typename Scalar, typename Packet>
|
||||
EIGEN_STRONG_INLINE Packet ploadLhs(const Scalar* lhs);
|
||||
|
||||
template<typename DataMapper, typename Packet, typename Index, const Index accCols, int N, int StorageOrder>
|
||||
EIGEN_STRONG_INLINE void bload(PacketBlock<Packet,4>& acc, const DataMapper& res, Index row, Index col);
|
||||
|
||||
template<typename DataMapper, typename Packet, typename Index, const Index accCols, int N, int StorageOrder>
|
||||
EIGEN_STRONG_INLINE void bload(PacketBlock<Packet,8>& acc, const DataMapper& res, Index row, Index col);
|
||||
|
||||
template<typename Packet>
|
||||
EIGEN_STRONG_INLINE void bscale(PacketBlock<Packet,4>& acc, PacketBlock<Packet,4>& accZ, const Packet& pAlpha);
|
||||
|
||||
template<typename Packet, int N>
|
||||
EIGEN_STRONG_INLINE void bscalec(PacketBlock<Packet,N>& aReal, PacketBlock<Packet,N>& aImag, const Packet& bReal, const Packet& bImag, PacketBlock<Packet,N>& cReal, PacketBlock<Packet,N>& cImag);
|
||||
|
||||
const static Packet16uc p16uc_SETCOMPLEX32_FIRST = { 0, 1, 2, 3,
|
||||
16, 17, 18, 19,
|
||||
4, 5, 6, 7,
|
||||
@ -68,7 +141,7 @@ const static Packet16uc p16uc_SETCOMPLEX64_SECOND = { 8, 9, 10, 11, 12, 13, 14
|
||||
|
||||
// Grab two decouples real/imaginary PacketBlocks and return two coupled (real/imaginary pairs) PacketBlocks.
|
||||
template<typename Packet, typename Packetc>
|
||||
EIGEN_STRONG_INLINE void bcouple(PacketBlock<Packet,4>& taccReal, PacketBlock<Packet,4>& taccImag, PacketBlock<Packetc,8>& tRes, PacketBlock<Packetc, 4>& acc1, PacketBlock<Packetc, 4>& acc2)
|
||||
EIGEN_STRONG_INLINE void bcouple_common(PacketBlock<Packet,4>& taccReal, PacketBlock<Packet,4>& taccImag, PacketBlock<Packetc, 4>& acc1, PacketBlock<Packetc, 4>& acc2)
|
||||
{
|
||||
acc1.packet[0].v = vec_perm(taccReal.packet[0], taccImag.packet[0], p16uc_SETCOMPLEX32_FIRST);
|
||||
acc1.packet[1].v = vec_perm(taccReal.packet[1], taccImag.packet[1], p16uc_SETCOMPLEX32_FIRST);
|
||||
@ -79,6 +152,12 @@ EIGEN_STRONG_INLINE void bcouple(PacketBlock<Packet,4>& taccReal, PacketBlock<Pa
|
||||
acc2.packet[1].v = vec_perm(taccReal.packet[1], taccImag.packet[1], p16uc_SETCOMPLEX32_SECOND);
|
||||
acc2.packet[2].v = vec_perm(taccReal.packet[2], taccImag.packet[2], p16uc_SETCOMPLEX32_SECOND);
|
||||
acc2.packet[3].v = vec_perm(taccReal.packet[3], taccImag.packet[3], p16uc_SETCOMPLEX32_SECOND);
|
||||
}
|
||||
|
||||
template<typename Packet, typename Packetc>
|
||||
EIGEN_STRONG_INLINE void bcouple(PacketBlock<Packet,4>& taccReal, PacketBlock<Packet,4>& taccImag, PacketBlock<Packetc,8>& tRes, PacketBlock<Packetc, 4>& acc1, PacketBlock<Packetc, 4>& acc2)
|
||||
{
|
||||
bcouple_common<Packet, Packetc>(taccReal, taccImag, acc1, acc2);
|
||||
|
||||
acc1.packet[0] = padd<Packetc>(tRes.packet[0], acc1.packet[0]);
|
||||
acc1.packet[1] = padd<Packetc>(tRes.packet[1], acc1.packet[1]);
|
||||
@ -91,8 +170,26 @@ EIGEN_STRONG_INLINE void bcouple(PacketBlock<Packet,4>& taccReal, PacketBlock<Pa
|
||||
acc2.packet[3] = padd<Packetc>(tRes.packet[7], acc2.packet[3]);
|
||||
}
|
||||
|
||||
template<typename Packet, typename Packetc>
|
||||
EIGEN_STRONG_INLINE void bcouple_common(PacketBlock<Packet,1>& taccReal, PacketBlock<Packet,1>& taccImag, PacketBlock<Packetc, 1>& acc1, PacketBlock<Packetc, 1>& acc2)
|
||||
{
|
||||
acc1.packet[0].v = vec_perm(taccReal.packet[0], taccImag.packet[0], p16uc_SETCOMPLEX32_FIRST);
|
||||
|
||||
acc2.packet[0].v = vec_perm(taccReal.packet[0], taccImag.packet[0], p16uc_SETCOMPLEX32_SECOND);
|
||||
}
|
||||
|
||||
template<typename Packet, typename Packetc>
|
||||
EIGEN_STRONG_INLINE void bcouple(PacketBlock<Packet,1>& taccReal, PacketBlock<Packet,1>& taccImag, PacketBlock<Packetc,2>& tRes, PacketBlock<Packetc, 1>& acc1, PacketBlock<Packetc, 1>& acc2)
|
||||
{
|
||||
bcouple_common<Packet, Packetc>(taccReal, taccImag, acc1, acc2);
|
||||
|
||||
acc1.packet[0] = padd<Packetc>(tRes.packet[0], acc1.packet[0]);
|
||||
|
||||
acc2.packet[0] = padd<Packetc>(tRes.packet[1], acc2.packet[0]);
|
||||
}
|
||||
|
||||
template<>
|
||||
EIGEN_STRONG_INLINE void bcouple<Packet2d, Packet1cd>(PacketBlock<Packet2d,4>& taccReal, PacketBlock<Packet2d,4>& taccImag, PacketBlock<Packet1cd,8>& tRes, PacketBlock<Packet1cd, 4>& acc1, PacketBlock<Packet1cd, 4>& acc2)
|
||||
EIGEN_STRONG_INLINE void bcouple_common<Packet2d, Packet1cd>(PacketBlock<Packet2d,4>& taccReal, PacketBlock<Packet2d,4>& taccImag, PacketBlock<Packet1cd, 4>& acc1, PacketBlock<Packet1cd, 4>& acc2)
|
||||
{
|
||||
acc1.packet[0].v = vec_perm(taccReal.packet[0], taccImag.packet[0], p16uc_SETCOMPLEX64_FIRST);
|
||||
acc1.packet[1].v = vec_perm(taccReal.packet[1], taccImag.packet[1], p16uc_SETCOMPLEX64_FIRST);
|
||||
@ -103,23 +200,21 @@ EIGEN_STRONG_INLINE void bcouple<Packet2d, Packet1cd>(PacketBlock<Packet2d,4>& t
|
||||
acc2.packet[1].v = vec_perm(taccReal.packet[1], taccImag.packet[1], p16uc_SETCOMPLEX64_SECOND);
|
||||
acc2.packet[2].v = vec_perm(taccReal.packet[2], taccImag.packet[2], p16uc_SETCOMPLEX64_SECOND);
|
||||
acc2.packet[3].v = vec_perm(taccReal.packet[3], taccImag.packet[3], p16uc_SETCOMPLEX64_SECOND);
|
||||
}
|
||||
|
||||
acc1.packet[0] = padd<Packet1cd>(tRes.packet[0], acc1.packet[0]);
|
||||
acc1.packet[1] = padd<Packet1cd>(tRes.packet[1], acc1.packet[1]);
|
||||
acc1.packet[2] = padd<Packet1cd>(tRes.packet[2], acc1.packet[2]);
|
||||
acc1.packet[3] = padd<Packet1cd>(tRes.packet[3], acc1.packet[3]);
|
||||
template<>
|
||||
EIGEN_STRONG_INLINE void bcouple_common<Packet2d, Packet1cd>(PacketBlock<Packet2d,1>& taccReal, PacketBlock<Packet2d,1>& taccImag, PacketBlock<Packet1cd, 1>& acc1, PacketBlock<Packet1cd, 1>& acc2)
|
||||
{
|
||||
acc1.packet[0].v = vec_perm(taccReal.packet[0], taccImag.packet[0], p16uc_SETCOMPLEX64_FIRST);
|
||||
|
||||
acc2.packet[0] = padd<Packet1cd>(tRes.packet[4], acc2.packet[0]);
|
||||
acc2.packet[1] = padd<Packet1cd>(tRes.packet[5], acc2.packet[1]);
|
||||
acc2.packet[2] = padd<Packet1cd>(tRes.packet[6], acc2.packet[2]);
|
||||
acc2.packet[3] = padd<Packet1cd>(tRes.packet[7], acc2.packet[3]);
|
||||
acc2.packet[0].v = vec_perm(taccReal.packet[0], taccImag.packet[0], p16uc_SETCOMPLEX64_SECOND);
|
||||
}
|
||||
|
||||
// This is necessary because ploadRhs for double returns a pair of vectors when MMA is enabled.
|
||||
template<typename Scalar, typename Packet>
|
||||
EIGEN_STRONG_INLINE Packet ploadRhs(const Scalar *rhs)
|
||||
EIGEN_STRONG_INLINE Packet ploadRhs(const Scalar* rhs)
|
||||
{
|
||||
return *((Packet *)rhs);
|
||||
return *((Packet *)rhs);
|
||||
}
|
||||
|
||||
} // end namespace internal
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -112,6 +112,11 @@ EIGEN_DECLARE_TEST(product_large)
|
||||
CALL_SUBTEST_1( test_aliasing<float>() );
|
||||
|
||||
CALL_SUBTEST_6( bug_1622<1>() );
|
||||
|
||||
CALL_SUBTEST_7( product(MatrixXcd(internal::random<int>(1,EIGEN_TEST_MAX_SIZE/2), internal::random<int>(1,EIGEN_TEST_MAX_SIZE/2))) );
|
||||
CALL_SUBTEST_8( product(Matrix<double,Dynamic,Dynamic,RowMajor>(internal::random<int>(1,EIGEN_TEST_MAX_SIZE), internal::random<int>(1,EIGEN_TEST_MAX_SIZE))) );
|
||||
CALL_SUBTEST_9( product(Matrix<std::complex<float>,Dynamic,Dynamic,RowMajor>(internal::random<int>(1,EIGEN_TEST_MAX_SIZE), internal::random<int>(1,EIGEN_TEST_MAX_SIZE))) );
|
||||
CALL_SUBTEST_10( product(Matrix<std::complex<double>,Dynamic,Dynamic,RowMajor>(internal::random<int>(1,EIGEN_TEST_MAX_SIZE), internal::random<int>(1,EIGEN_TEST_MAX_SIZE))) );
|
||||
}
|
||||
|
||||
CALL_SUBTEST_6( product_large_regressions<0>() );
|
||||
|
Loading…
Reference in New Issue
Block a user