fix some internal asserts in CacheFrinedlyProduct

This commit is contained in:
Gael Guennebaud 2008-07-27 22:14:08 +00:00
parent 02a7efa910
commit 44d95e0540
4 changed files with 24 additions and 18 deletions

View File

@ -359,19 +359,6 @@ static void ei_cache_friendly_product(
#endif // EIGEN_EXTERN_INSTANTIATIONS
template<typename Scalar>
inline static int ei_alignmentOffset(const Scalar* ptr, int maxOffset)
{
typedef typename ei_packet_traits<Scalar>::type Packet;
const int PacketSize = ei_packet_traits<Scalar>::size;
const int PacketAlignedMask = PacketSize-1;
const bool Vectorized = PacketSize>1;
return Vectorized
? std::min<int>( (PacketSize - ((size_t(ptr)/sizeof(Scalar)) & PacketAlignedMask))
& PacketAlignedMask, maxOffset)
: 0;
}
/* Optimized col-major matrix * vector product:
* This algorithm processes 4 columns at onces that allows to both reduce
* the number of load/stores of the result by a factor 4 and to reduce
@ -420,7 +407,7 @@ EIGEN_DONT_INLINE static void ei_cache_friendly_product_colmajor_times_vector(
// we cannot assume the first element is aligned because of sub-matrices
const int lhsAlignmentOffset = ei_alignmentOffset(lhs,size);
ei_internal_assert(size_t(lhs+lhsAlignmentOffset)%sizeof(Packet)==0);
ei_internal_assert(size_t(lhs+lhsAlignmentOffset)%sizeof(Packet)==0 || size<PacketSize || PacketSize==1);
// find how many columns do we have to skip to be aligned with the result (if possible)
int skipColumns=0;
@ -438,7 +425,7 @@ EIGEN_DONT_INLINE static void ei_cache_friendly_product_colmajor_times_vector(
// note that the skiped columns are processed later.
}
ei_internal_assert((alignmentPattern==NoneAligned)
ei_internal_assert((alignmentPattern==NoneAligned) || PacketSize==1
|| (size_t(lhs+alignedStart+lhsStride*skipColumns)%sizeof(Packet))==0);
int columnBound = ((rhs.size()-skipColumns)/columnsAtOnce)*columnsAtOnce + skipColumns;
@ -585,7 +572,7 @@ EIGEN_DONT_INLINE static void ei_cache_friendly_product_rowmajor_times_vector(
// we cannot assume the first element is aligned because of sub-matrices
const int lhsAlignmentOffset = ei_alignmentOffset(lhs,size);
ei_internal_assert(size_t(lhs+lhsAlignmentOffset)%sizeof(Packet)==0);
ei_internal_assert(size_t(lhs+lhsAlignmentOffset)%sizeof(Packet)==0 || PacketSize==1 || size<PacketSize);
// find how many rows do we have to skip to be aligned with rhs (if possible)
int skipRows=0;
for (; skipRows<PacketSize && alignedStart != lhsAlignmentOffset + alignmentStep*skipRows; ++skipRows)
@ -601,7 +588,7 @@ EIGEN_DONT_INLINE static void ei_cache_friendly_product_rowmajor_times_vector(
skipRows = std::min(skipRows,res.size());
// note that the skiped columns are processed later.
}
ei_internal_assert((alignmentPattern==NoneAligned)
ei_internal_assert((alignmentPattern==NoneAligned) || PacketSize==1
|| (size_t(lhs+alignedStart+lhsStride*skipRows)%sizeof(Packet))==0);
int rowBound = ((res.size()-skipRows)/rowsAtOnce)*rowsAtOnce + skipRows;

View File

@ -120,5 +120,19 @@ template <typename Scalar, typename Packet, int LoadMode> inline void ei_pstoret
ei_pstoreu(to, from);
}
/** \internal \returns the number of elements which have to be skipped such that data are aligned */
template<typename Scalar>
inline static int ei_alignmentOffset(const Scalar* ptr, int maxOffset)
{
typedef typename ei_packet_traits<Scalar>::type Packet;
const int PacketSize = ei_packet_traits<Scalar>::size;
const int PacketAlignedMask = PacketSize-1;
const bool Vectorized = PacketSize>1;
return Vectorized
? std::min<int>( (PacketSize - ((size_t(ptr)/sizeof(Scalar)) & PacketAlignedMask))
& PacketAlignedMask, maxOffset)
: 0;
}
#endif // EIGEN_DUMMY_PACKET_MATH_H

View File

@ -98,6 +98,8 @@ struct ei_trisolve_selector<Lhs,Rhs,Upper,RowMajor>
};
// forward substitution, col-major
// FIXME the Lower and Upper specialization could be merged using a small helper class
// performing reflexions on the coordinates...
template<typename Lhs, typename Rhs>
struct ei_trisolve_selector<Lhs,Rhs,Lower,ColMajor>
{
@ -138,6 +140,8 @@ struct ei_trisolve_selector<Lhs,Rhs,Lower,ColMajor>
* other.col(c).end(size-endBlock) += (lhs.block(endBlock, startBlock, size-endBlock, endBlock-startBlock)
* * other.col(c).block(startBlock,endBlock-startBlock)).lazy();
*/
// FIXME this is cool but what about conjugate/adjoint expressions ? do we want to evaluate them ?
// this is a more general problem though.
ei_cache_friendly_product_colmajor_times_vector(
size-endBlock, &(lhs.const_cast_derived().coeffRef(endBlock,startBlock)), lhs.stride(),
btmp, &(other.coeffRef(endBlock,c)));

View File

@ -379,6 +379,7 @@ struct ei_product_coeff_vectorized_dyn_selector
};
// NOTE the 3 following specializations are because taking .col(0) on a vector is a bit slower
// NOTE maybe they are now useless since we have a specialization for Block<Matrix>
template<typename Lhs, typename Rhs, int RhsCols>
struct ei_product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,RhsCols>
{
@ -406,7 +407,7 @@ struct ei_product_coeff_vectorized_dyn_selector<Lhs,Rhs,LhsRows,1>
template<typename Lhs, typename Rhs>
struct ei_product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,1>
{
inline static void run(int row, int /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
inline static void run(int /*row*/, int /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
{
res = ei_dot_impl<
Lhs,