fix lhs packing in the case of real * complex products

This commit is contained in:
Gael Guennebaud 2010-07-19 23:16:03 +02:00
parent 1ed4233fd2
commit 08c841eb87
6 changed files with 20 additions and 20 deletions

View File

@ -329,7 +329,7 @@ public:
}
protected:
ei_conj_helper<LhsPacket,RhsPacket,ConjLhs,false> cj;
ei_conj_helper<ResPacket,ResPacket,ConjLhs,false> cj;
};
template<typename RealScalar, bool _ConjLhs, bool _ConjRhs>
@ -1124,34 +1124,34 @@ EIGEN_ASM_COMMENT("mybegin4");
//
// 32 33 34 35 ...
// 36 36 38 39 ...
template<typename Scalar, typename Index, int mr, int StorageOrder, bool Conjugate, bool PanelMode>
template<typename Scalar, typename Index, int Pack1, int Pack2, int StorageOrder, bool Conjugate, bool PanelMode>
struct ei_gemm_pack_lhs
{
void operator()(Scalar* blockA, const Scalar* EIGEN_RESTRICT _lhs, Index lhsStride, Index depth, Index rows,
Index stride=0, Index offset=0)
{
enum { PacketSize = ei_packet_traits<Scalar>::size };
// enum { PacketSize = ei_packet_traits<Scalar>::size };
ei_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride));
ei_conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;
ei_const_blas_data_mapper<Scalar, Index, StorageOrder> lhs(_lhs,lhsStride);
Index count = 0;
Index peeled_mc = (rows/mr)*mr;
for(Index i=0; i<peeled_mc; i+=mr)
Index peeled_mc = (rows/Pack1)*Pack1;
for(Index i=0; i<peeled_mc; i+=Pack1)
{
if(PanelMode) count += mr * offset;
if(PanelMode) count += Pack1 * offset;
for(Index k=0; k<depth; k++)
for(Index w=0; w<mr; w++)
for(Index w=0; w<Pack1; w++)
blockA[count++] = cj(lhs(i+w, k));
if(PanelMode) count += mr * (stride-offset-depth);
if(PanelMode) count += Pack1 * (stride-offset-depth);
}
if(rows-peeled_mc>=PacketSize)
if(rows-peeled_mc>=Pack2)
{
if(PanelMode) count += PacketSize*offset;
if(PanelMode) count += Pack2*offset;
for(Index k=0; k<depth; k++)
for(Index w=0; w<PacketSize; w++)
for(Index w=0; w<Pack2; w++)
blockA[count++] = cj(lhs(peeled_mc+w, k));
if(PanelMode) count += PacketSize * (stride-offset-depth);
peeled_mc += PacketSize;
if(PanelMode) count += Pack2 * (stride-offset-depth);
peeled_mc += Pack2;
}
for(Index i=peeled_mc; i<rows; i++)
{

View File

@ -79,7 +79,7 @@ static void run(Index rows, Index cols, Index depth,
Index mc = std::min(rows,blocking.mc()); // cache block size along the M direction
//Index nc = blocking.nc(); // cache block size along the N direction
ei_gemm_pack_lhs<LhsScalar, Index, Traits::mr, LhsStorageOrder> pack_lhs;
ei_gemm_pack_lhs<LhsScalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
ei_gemm_pack_rhs<RhsScalar, Index, Traits::nr, RhsStorageOrder> pack_rhs;
ei_gebp_kernel<LhsScalar, RhsScalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp;

View File

@ -350,7 +350,7 @@ struct ei_product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,Conjugat
Scalar* blockB = allocatedBlockB + sizeW;
ei_gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
ei_gemm_pack_lhs<Scalar, Index, Traits::mr,LhsStorageOrder> pack_lhs;
ei_gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
ei_symm_pack_rhs<Scalar, Index, Traits::nr,RhsStorageOrder> pack_rhs;
for(Index k2=0; k2<size; k2+=kc)

View File

@ -92,7 +92,7 @@ struct ei_selfadjoint_product<Scalar, Index, MatStorageOrder, ColMajor, AAT, UpL
ei_gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, ConjLhs, ConjRhs> gebp_kernel;
ei_gemm_pack_rhs<Scalar, Index, Traits::nr,MatStorageOrder==RowMajor ? ColMajor : RowMajor> pack_rhs;
ei_gemm_pack_lhs<Scalar, Index, Traits::mr,MatStorageOrder, false> pack_lhs;
ei_gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, MatStorageOrder, false> pack_lhs;
ei_sybb_kernel<Scalar, Index, Traits::mr, Traits::nr, ConjLhs, ConjRhs, UpLo> sybb;
for(Index k2=0; k2<depth; k2+=kc)

View File

@ -128,7 +128,7 @@ struct ei_product_triangular_matrix_matrix<Scalar,Index,Mode,true,
triangularBuffer.diagonal().setOnes();
ei_gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
ei_gemm_pack_lhs<Scalar, Index, Traits::mr,LhsStorageOrder> pack_lhs;
ei_gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
ei_gemm_pack_rhs<Scalar, Index, Traits::nr,RhsStorageOrder> pack_rhs;
for(Index k2=IsLower ? depth : 0;
@ -251,7 +251,7 @@ struct ei_product_triangular_matrix_matrix<Scalar,Index,Mode,false,
triangularBuffer.diagonal().setOnes();
ei_gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
ei_gemm_pack_lhs<Scalar, Index, Traits::mr,LhsStorageOrder> pack_lhs;
ei_gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
ei_gemm_pack_rhs<Scalar, Index, Traits::nr,RhsStorageOrder> pack_rhs;
ei_gemm_pack_rhs<Scalar, Index, Traits::nr,RhsStorageOrder,false,true> pack_rhs_panel;

View File

@ -76,7 +76,7 @@ struct ei_triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStora
ei_conj_if<Conjugate> conj;
ei_gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, Conjugate, false> gebp_kernel;
ei_gemm_pack_lhs<Scalar, Index, Traits::mr,TriStorageOrder> pack_lhs;
ei_gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, TriStorageOrder> pack_lhs;
ei_gemm_pack_rhs<Scalar, Index, Traits::nr, ColMajor, false, true> pack_rhs;
for(Index k2=IsLower ? 0 : size;
@ -217,7 +217,7 @@ struct ei_triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStor
ei_gebp_kernel<Scalar,Scalar, Index, Traits::mr, Traits::nr, false, Conjugate> gebp_kernel;
ei_gemm_pack_rhs<Scalar, Index, Traits::nr,RhsStorageOrder> pack_rhs;
ei_gemm_pack_rhs<Scalar, Index, Traits::nr,RhsStorageOrder,false,true> pack_rhs_panel;
ei_gemm_pack_lhs<Scalar, Index, Traits::mr, ColMajor, false, true> pack_lhs_panel;
ei_gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, ColMajor, false, true> pack_lhs_panel;
for(Index k2=IsLower ? size : 0;
IsLower ? k2>0 : k2<size;