mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-03-01 18:26:24 +08:00
fix some internal asserts in CacheFrinedlyProduct
This commit is contained in:
parent
02a7efa910
commit
44d95e0540
@ -359,19 +359,6 @@ static void ei_cache_friendly_product(
|
||||
|
||||
#endif // EIGEN_EXTERN_INSTANTIATIONS
|
||||
|
||||
template<typename Scalar>
|
||||
inline static int ei_alignmentOffset(const Scalar* ptr, int maxOffset)
|
||||
{
|
||||
typedef typename ei_packet_traits<Scalar>::type Packet;
|
||||
const int PacketSize = ei_packet_traits<Scalar>::size;
|
||||
const int PacketAlignedMask = PacketSize-1;
|
||||
const bool Vectorized = PacketSize>1;
|
||||
return Vectorized
|
||||
? std::min<int>( (PacketSize - ((size_t(ptr)/sizeof(Scalar)) & PacketAlignedMask))
|
||||
& PacketAlignedMask, maxOffset)
|
||||
: 0;
|
||||
}
|
||||
|
||||
/* Optimized col-major matrix * vector product:
|
||||
* This algorithm processes 4 columns at onces that allows to both reduce
|
||||
* the number of load/stores of the result by a factor 4 and to reduce
|
||||
@ -420,7 +407,7 @@ EIGEN_DONT_INLINE static void ei_cache_friendly_product_colmajor_times_vector(
|
||||
|
||||
// we cannot assume the first element is aligned because of sub-matrices
|
||||
const int lhsAlignmentOffset = ei_alignmentOffset(lhs,size);
|
||||
ei_internal_assert(size_t(lhs+lhsAlignmentOffset)%sizeof(Packet)==0);
|
||||
ei_internal_assert(size_t(lhs+lhsAlignmentOffset)%sizeof(Packet)==0 || size<PacketSize || PacketSize==1);
|
||||
|
||||
// find how many columns do we have to skip to be aligned with the result (if possible)
|
||||
int skipColumns=0;
|
||||
@ -438,7 +425,7 @@ EIGEN_DONT_INLINE static void ei_cache_friendly_product_colmajor_times_vector(
|
||||
// note that the skiped columns are processed later.
|
||||
}
|
||||
|
||||
ei_internal_assert((alignmentPattern==NoneAligned)
|
||||
ei_internal_assert((alignmentPattern==NoneAligned) || PacketSize==1
|
||||
|| (size_t(lhs+alignedStart+lhsStride*skipColumns)%sizeof(Packet))==0);
|
||||
|
||||
int columnBound = ((rhs.size()-skipColumns)/columnsAtOnce)*columnsAtOnce + skipColumns;
|
||||
@ -585,7 +572,7 @@ EIGEN_DONT_INLINE static void ei_cache_friendly_product_rowmajor_times_vector(
|
||||
|
||||
// we cannot assume the first element is aligned because of sub-matrices
|
||||
const int lhsAlignmentOffset = ei_alignmentOffset(lhs,size);
|
||||
ei_internal_assert(size_t(lhs+lhsAlignmentOffset)%sizeof(Packet)==0);
|
||||
ei_internal_assert(size_t(lhs+lhsAlignmentOffset)%sizeof(Packet)==0 || PacketSize==1 || size<PacketSize);
|
||||
// find how many rows do we have to skip to be aligned with rhs (if possible)
|
||||
int skipRows=0;
|
||||
for (; skipRows<PacketSize && alignedStart != lhsAlignmentOffset + alignmentStep*skipRows; ++skipRows)
|
||||
@ -601,7 +588,7 @@ EIGEN_DONT_INLINE static void ei_cache_friendly_product_rowmajor_times_vector(
|
||||
skipRows = std::min(skipRows,res.size());
|
||||
// note that the skiped columns are processed later.
|
||||
}
|
||||
ei_internal_assert((alignmentPattern==NoneAligned)
|
||||
ei_internal_assert((alignmentPattern==NoneAligned) || PacketSize==1
|
||||
|| (size_t(lhs+alignedStart+lhsStride*skipRows)%sizeof(Packet))==0);
|
||||
|
||||
int rowBound = ((res.size()-skipRows)/rowsAtOnce)*rowsAtOnce + skipRows;
|
||||
|
@ -120,5 +120,19 @@ template <typename Scalar, typename Packet, int LoadMode> inline void ei_pstoret
|
||||
ei_pstoreu(to, from);
|
||||
}
|
||||
|
||||
/** \internal \returns the number of elements which have to be skipped such that data are aligned */
|
||||
template<typename Scalar>
|
||||
inline static int ei_alignmentOffset(const Scalar* ptr, int maxOffset)
|
||||
{
|
||||
typedef typename ei_packet_traits<Scalar>::type Packet;
|
||||
const int PacketSize = ei_packet_traits<Scalar>::size;
|
||||
const int PacketAlignedMask = PacketSize-1;
|
||||
const bool Vectorized = PacketSize>1;
|
||||
return Vectorized
|
||||
? std::min<int>( (PacketSize - ((size_t(ptr)/sizeof(Scalar)) & PacketAlignedMask))
|
||||
& PacketAlignedMask, maxOffset)
|
||||
: 0;
|
||||
}
|
||||
|
||||
#endif // EIGEN_DUMMY_PACKET_MATH_H
|
||||
|
||||
|
@ -98,6 +98,8 @@ struct ei_trisolve_selector<Lhs,Rhs,Upper,RowMajor>
|
||||
};
|
||||
|
||||
// forward substitution, col-major
|
||||
// FIXME the Lower and Upper specialization could be merged using a small helper class
|
||||
// performing reflexions on the coordinates...
|
||||
template<typename Lhs, typename Rhs>
|
||||
struct ei_trisolve_selector<Lhs,Rhs,Lower,ColMajor>
|
||||
{
|
||||
@ -138,6 +140,8 @@ struct ei_trisolve_selector<Lhs,Rhs,Lower,ColMajor>
|
||||
* other.col(c).end(size-endBlock) += (lhs.block(endBlock, startBlock, size-endBlock, endBlock-startBlock)
|
||||
* * other.col(c).block(startBlock,endBlock-startBlock)).lazy();
|
||||
*/
|
||||
// FIXME this is cool but what about conjugate/adjoint expressions ? do we want to evaluate them ?
|
||||
// this is a more general problem though.
|
||||
ei_cache_friendly_product_colmajor_times_vector(
|
||||
size-endBlock, &(lhs.const_cast_derived().coeffRef(endBlock,startBlock)), lhs.stride(),
|
||||
btmp, &(other.coeffRef(endBlock,c)));
|
||||
|
@ -379,6 +379,7 @@ struct ei_product_coeff_vectorized_dyn_selector
|
||||
};
|
||||
|
||||
// NOTE the 3 following specializations are because taking .col(0) on a vector is a bit slower
|
||||
// NOTE maybe they are now useless since we have a specialization for Block<Matrix>
|
||||
template<typename Lhs, typename Rhs, int RhsCols>
|
||||
struct ei_product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,RhsCols>
|
||||
{
|
||||
@ -406,7 +407,7 @@ struct ei_product_coeff_vectorized_dyn_selector<Lhs,Rhs,LhsRows,1>
|
||||
template<typename Lhs, typename Rhs>
|
||||
struct ei_product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,1>
|
||||
{
|
||||
inline static void run(int row, int /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
|
||||
inline static void run(int /*row*/, int /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
|
||||
{
|
||||
res = ei_dot_impl<
|
||||
Lhs,
|
||||
|
Loading…
Reference in New Issue
Block a user