mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-03-07 18:27:40 +08:00
bug #1565: help MSVC to generatenot too bad ASM in reductions.
This commit is contained in:
parent
6a241bd8ee
commit
0537123953
@ -187,16 +187,17 @@ struct redux_impl<Func, Evaluator, DefaultTraversal, NoUnrolling>
|
|||||||
{
|
{
|
||||||
typedef typename Evaluator::Scalar Scalar;
|
typedef typename Evaluator::Scalar Scalar;
|
||||||
|
|
||||||
|
template<typename XprType>
|
||||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE
|
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE
|
||||||
Scalar run(const Evaluator &eval, const Func& func)
|
Scalar run(const Evaluator &eval, const Func& func, const XprType& xpr)
|
||||||
{
|
{
|
||||||
eigen_assert(eval.rows()>0 && eval.cols()>0 && "you are using an empty matrix");
|
eigen_assert(xpr.rows()>0 && xpr.cols()>0 && "you are using an empty matrix");
|
||||||
Scalar res;
|
Scalar res;
|
||||||
res = eval.coeffByOuterInner(0, 0);
|
res = eval.coeffByOuterInner(0, 0);
|
||||||
for(Index i = 1; i < eval.innerSize(); ++i)
|
for(Index i = 1; i < xpr.innerSize(); ++i)
|
||||||
res = func(res, eval.coeffByOuterInner(0, i));
|
res = func(res, eval.coeffByOuterInner(0, i));
|
||||||
for(Index i = 1; i < eval.outerSize(); ++i)
|
for(Index i = 1; i < xpr.outerSize(); ++i)
|
||||||
for(Index j = 0; j < eval.innerSize(); ++j)
|
for(Index j = 0; j < xpr.innerSize(); ++j)
|
||||||
res = func(res, eval.coeffByOuterInner(i, j));
|
res = func(res, eval.coeffByOuterInner(i, j));
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
@ -205,7 +206,16 @@ struct redux_impl<Func, Evaluator, DefaultTraversal, NoUnrolling>
|
|||||||
template<typename Func, typename Evaluator>
|
template<typename Func, typename Evaluator>
|
||||||
struct redux_impl<Func,Evaluator, DefaultTraversal, CompleteUnrolling>
|
struct redux_impl<Func,Evaluator, DefaultTraversal, CompleteUnrolling>
|
||||||
: redux_novec_unroller<Func,Evaluator, 0, Evaluator::SizeAtCompileTime>
|
: redux_novec_unroller<Func,Evaluator, 0, Evaluator::SizeAtCompileTime>
|
||||||
{};
|
{
|
||||||
|
typedef redux_novec_unroller<Func,Evaluator, 0, Evaluator::SizeAtCompileTime> Base;
|
||||||
|
typedef typename Evaluator::Scalar Scalar;
|
||||||
|
template<typename XprType>
|
||||||
|
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE
|
||||||
|
Scalar run(const Evaluator &eval, const Func& func, const XprType& /*xpr*/)
|
||||||
|
{
|
||||||
|
return Base::run(eval,func);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
template<typename Func, typename Evaluator>
|
template<typename Func, typename Evaluator>
|
||||||
struct redux_impl<Func, Evaluator, LinearVectorizedTraversal, NoUnrolling>
|
struct redux_impl<Func, Evaluator, LinearVectorizedTraversal, NoUnrolling>
|
||||||
@ -213,9 +223,10 @@ struct redux_impl<Func, Evaluator, LinearVectorizedTraversal, NoUnrolling>
|
|||||||
typedef typename Evaluator::Scalar Scalar;
|
typedef typename Evaluator::Scalar Scalar;
|
||||||
typedef typename redux_traits<Func, Evaluator>::PacketType PacketScalar;
|
typedef typename redux_traits<Func, Evaluator>::PacketType PacketScalar;
|
||||||
|
|
||||||
static Scalar run(const Evaluator &eval, const Func& func)
|
template<typename XprType>
|
||||||
|
static Scalar run(const Evaluator &eval, const Func& func, const XprType& xpr)
|
||||||
{
|
{
|
||||||
const Index size = eval.size();
|
const Index size = xpr.size();
|
||||||
|
|
||||||
const Index packetSize = redux_traits<Func, Evaluator>::PacketSize;
|
const Index packetSize = redux_traits<Func, Evaluator>::PacketSize;
|
||||||
const int packetAlignment = unpacket_traits<PacketScalar>::alignment;
|
const int packetAlignment = unpacket_traits<PacketScalar>::alignment;
|
||||||
@ -223,7 +234,7 @@ struct redux_impl<Func, Evaluator, LinearVectorizedTraversal, NoUnrolling>
|
|||||||
alignment0 = (bool(Evaluator::Flags & DirectAccessBit) && bool(packet_traits<Scalar>::AlignedOnScalar)) ? int(packetAlignment) : int(Unaligned),
|
alignment0 = (bool(Evaluator::Flags & DirectAccessBit) && bool(packet_traits<Scalar>::AlignedOnScalar)) ? int(packetAlignment) : int(Unaligned),
|
||||||
alignment = EIGEN_PLAIN_ENUM_MAX(alignment0, Evaluator::Alignment)
|
alignment = EIGEN_PLAIN_ENUM_MAX(alignment0, Evaluator::Alignment)
|
||||||
};
|
};
|
||||||
const Index alignedStart = internal::first_default_aligned(eval.nestedExpression());
|
const Index alignedStart = internal::first_default_aligned(xpr);
|
||||||
const Index alignedSize2 = ((size-alignedStart)/(2*packetSize))*(2*packetSize);
|
const Index alignedSize2 = ((size-alignedStart)/(2*packetSize))*(2*packetSize);
|
||||||
const Index alignedSize = ((size-alignedStart)/(packetSize))*(packetSize);
|
const Index alignedSize = ((size-alignedStart)/(packetSize))*(packetSize);
|
||||||
const Index alignedEnd2 = alignedStart + alignedSize2;
|
const Index alignedEnd2 = alignedStart + alignedSize2;
|
||||||
@ -272,11 +283,12 @@ struct redux_impl<Func, Evaluator, SliceVectorizedTraversal, Unrolling>
|
|||||||
typedef typename Evaluator::Scalar Scalar;
|
typedef typename Evaluator::Scalar Scalar;
|
||||||
typedef typename redux_traits<Func, Evaluator>::PacketType PacketType;
|
typedef typename redux_traits<Func, Evaluator>::PacketType PacketType;
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC static Scalar run(const Evaluator &eval, const Func& func)
|
template<typename XprType>
|
||||||
|
EIGEN_DEVICE_FUNC static Scalar run(const Evaluator &eval, const Func& func, const XprType& xpr)
|
||||||
{
|
{
|
||||||
eigen_assert(eval.rows()>0 && eval.cols()>0 && "you are using an empty matrix");
|
eigen_assert(xpr.rows()>0 && xpr.cols()>0 && "you are using an empty matrix");
|
||||||
const Index innerSize = eval.innerSize();
|
const Index innerSize = xpr.innerSize();
|
||||||
const Index outerSize = eval.outerSize();
|
const Index outerSize = xpr.outerSize();
|
||||||
enum {
|
enum {
|
||||||
packetSize = redux_traits<Func, Evaluator>::PacketSize
|
packetSize = redux_traits<Func, Evaluator>::PacketSize
|
||||||
};
|
};
|
||||||
@ -297,7 +309,7 @@ struct redux_impl<Func, Evaluator, SliceVectorizedTraversal, Unrolling>
|
|||||||
else // too small to vectorize anything.
|
else // too small to vectorize anything.
|
||||||
// since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize.
|
// since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize.
|
||||||
{
|
{
|
||||||
res = redux_impl<Func, Evaluator, DefaultTraversal, NoUnrolling>::run(eval, func);
|
res = redux_impl<Func, Evaluator, DefaultTraversal, NoUnrolling>::run(eval, func, xpr);
|
||||||
}
|
}
|
||||||
|
|
||||||
return res;
|
return res;
|
||||||
@ -316,10 +328,11 @@ struct redux_impl<Func, Evaluator, LinearVectorizedTraversal, CompleteUnrolling>
|
|||||||
VectorizedSize = (Size / PacketSize) * PacketSize
|
VectorizedSize = (Size / PacketSize) * PacketSize
|
||||||
};
|
};
|
||||||
|
|
||||||
|
template<typename XprType>
|
||||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE
|
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE
|
||||||
Scalar run(const Evaluator &eval, const Func& func)
|
Scalar run(const Evaluator &eval, const Func& func, const XprType &xpr)
|
||||||
{
|
{
|
||||||
eigen_assert(eval.rows()>0 && eval.cols()>0 && "you are using an empty matrix");
|
eigen_assert(xpr.rows()>0 && xpr.cols()>0 && "you are using an empty matrix");
|
||||||
if (VectorizedSize > 0) {
|
if (VectorizedSize > 0) {
|
||||||
Scalar res = func.predux(redux_vec_unroller<Func, Evaluator, 0, Size / PacketSize>::run(eval,func));
|
Scalar res = func.predux(redux_vec_unroller<Func, Evaluator, 0, Size / PacketSize>::run(eval,func));
|
||||||
if (VectorizedSize != Size)
|
if (VectorizedSize != Size)
|
||||||
@ -339,12 +352,11 @@ class redux_evaluator : public internal::evaluator<_XprType>
|
|||||||
typedef internal::evaluator<_XprType> Base;
|
typedef internal::evaluator<_XprType> Base;
|
||||||
public:
|
public:
|
||||||
typedef _XprType XprType;
|
typedef _XprType XprType;
|
||||||
EIGEN_DEVICE_FUNC explicit redux_evaluator(const XprType &xpr) : Base(xpr), m_xpr(xpr) {}
|
EIGEN_DEVICE_FUNC explicit redux_evaluator(const XprType &xpr) : Base(xpr) {}
|
||||||
|
|
||||||
typedef typename XprType::Scalar Scalar;
|
typedef typename XprType::Scalar Scalar;
|
||||||
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
||||||
typedef typename XprType::PacketScalar PacketScalar;
|
typedef typename XprType::PacketScalar PacketScalar;
|
||||||
typedef typename XprType::PacketReturnType PacketReturnType;
|
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
MaxRowsAtCompileTime = XprType::MaxRowsAtCompileTime,
|
MaxRowsAtCompileTime = XprType::MaxRowsAtCompileTime,
|
||||||
@ -356,12 +368,6 @@ public:
|
|||||||
InnerSizeAtCompileTime = XprType::InnerSizeAtCompileTime
|
InnerSizeAtCompileTime = XprType::InnerSizeAtCompileTime
|
||||||
};
|
};
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC Index rows() const { return m_xpr.rows(); }
|
|
||||||
EIGEN_DEVICE_FUNC Index cols() const { return m_xpr.cols(); }
|
|
||||||
EIGEN_DEVICE_FUNC Index size() const { return m_xpr.size(); }
|
|
||||||
EIGEN_DEVICE_FUNC Index innerSize() const { return m_xpr.innerSize(); }
|
|
||||||
EIGEN_DEVICE_FUNC Index outerSize() const { return m_xpr.outerSize(); }
|
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
CoeffReturnType coeffByOuterInner(Index outer, Index inner) const
|
CoeffReturnType coeffByOuterInner(Index outer, Index inner) const
|
||||||
{ return Base::coeff(IsRowMajor ? outer : inner, IsRowMajor ? inner : outer); }
|
{ return Base::coeff(IsRowMajor ? outer : inner, IsRowMajor ? inner : outer); }
|
||||||
@ -370,10 +376,6 @@ public:
|
|||||||
PacketType packetByOuterInner(Index outer, Index inner) const
|
PacketType packetByOuterInner(Index outer, Index inner) const
|
||||||
{ return Base::template packet<LoadMode,PacketType>(IsRowMajor ? outer : inner, IsRowMajor ? inner : outer); }
|
{ return Base::template packet<LoadMode,PacketType>(IsRowMajor ? outer : inner, IsRowMajor ? inner : outer); }
|
||||||
|
|
||||||
const XprType & nestedExpression() const { return m_xpr; }
|
|
||||||
|
|
||||||
protected:
|
|
||||||
const XprType &m_xpr;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
} // end namespace internal
|
} // end namespace internal
|
||||||
@ -400,7 +402,9 @@ DenseBase<Derived>::redux(const Func& func) const
|
|||||||
typedef typename internal::redux_evaluator<Derived> ThisEvaluator;
|
typedef typename internal::redux_evaluator<Derived> ThisEvaluator;
|
||||||
ThisEvaluator thisEval(derived());
|
ThisEvaluator thisEval(derived());
|
||||||
|
|
||||||
return internal::redux_impl<Func, ThisEvaluator>::run(thisEval, func);
|
// The initial expression is passed to the reducer as an additional argument instead of
|
||||||
|
// passing it as a member of redux_evaluator to help
|
||||||
|
return internal::redux_impl<Func, ThisEvaluator>::run(thisEval, func, derived());
|
||||||
}
|
}
|
||||||
|
|
||||||
/** \returns the minimum of all coefficients of \c *this.
|
/** \returns the minimum of all coefficients of \c *this.
|
||||||
|
Loading…
Reference in New Issue
Block a user