bug #1195: move NumTraits::Div<>::Cost to internal::scalar_div_cost (with some specializations in arch/SSE and arch/AVX)

This commit is contained in:
Gael Guennebaud 2016-09-08 08:36:27 +02:00
parent d780983f59
commit 471eac5399
8 changed files with 39 additions and 27 deletions

View File

@ -97,23 +97,6 @@ template<typename T> struct GenericNumTraits
MulCost = 1 MulCost = 1
}; };
// Division is messy but important, because it is expensive and throughput
// varies significantly. The following numbers are based on min division
// throughput on Haswell.
template<bool Vectorized>
struct Div {
enum {
#ifdef EIGEN_VECTORIZE_AVX
AVX = true,
#else
AVX = false,
#endif
Cost = IsInteger ? (sizeof(T) == 8 ? (IsSigned ? 24 : 21) : (IsSigned ? 8 : 9)):
Vectorized ? (sizeof(T) == 8 ? (AVX ? 16 : 8) : (AVX ? 14 : 7)) : 8
};
};
typedef T Real; typedef T Real;
typedef typename internal::conditional< typedef typename internal::conditional<
IsInteger, IsInteger,

View File

@ -94,6 +94,9 @@ template<> struct packet_traits<double> : default_packet_traits
}; };
}; };
template<> struct scalar_div_cost<float,true> { enum { value = 14 }; };
template<> struct scalar_div_cost<double,true> { enum { value = 16 }; };
/* Proper support for integers is only provided by AVX2. In the meantime, we'll /* Proper support for integers is only provided by AVX2. In the meantime, we'll
use SSE instructions and packets to deal with integers. use SSE instructions and packets to deal with integers.
template<> struct packet_traits<int> : default_packet_traits template<> struct packet_traits<int> : default_packet_traits

View File

@ -162,6 +162,11 @@ template<> struct unpacket_traits<Packet4f> { typedef float type; enum {size=4,
template<> struct unpacket_traits<Packet2d> { typedef double type; enum {size=2, alignment=Aligned16}; typedef Packet2d half; }; template<> struct unpacket_traits<Packet2d> { typedef double type; enum {size=2, alignment=Aligned16}; typedef Packet2d half; };
template<> struct unpacket_traits<Packet4i> { typedef int type; enum {size=4, alignment=Aligned16}; typedef Packet4i half; }; template<> struct unpacket_traits<Packet4i> { typedef int type; enum {size=4, alignment=Aligned16}; typedef Packet4i half; };
#ifndef EIGEN_VECTORIZE_AVX
template<> struct scalar_div_cost<float,true> { enum { value = 7 }; };
template<> struct scalar_div_cost<double,true> { enum { value = 8 }; };
#endif
#if EIGEN_COMP_MSVC==1500 #if EIGEN_COMP_MSVC==1500
// Workaround MSVC 9 internal compiler error. // Workaround MSVC 9 internal compiler error.
// TODO: It has been detected with win64 builds (amd64), so let's check whether it also happens in 32bits+SSE mode // TODO: It has been detected with win64 builds (amd64), so let's check whether it also happens in 32bits+SSE mode

View File

@ -287,7 +287,7 @@ struct functor_traits<scalar_hypot_op<Scalar,Scalar> > {
{ {
Cost = 3 * NumTraits<Scalar>::AddCost + Cost = 3 * NumTraits<Scalar>::AddCost +
2 * NumTraits<Scalar>::MulCost + 2 * NumTraits<Scalar>::MulCost +
2 * NumTraits<Scalar>::template Div<false>::Cost, 2 * scalar_div_cost<Scalar,false>::value,
PacketAccess = false PacketAccess = false
}; };
}; };
@ -375,7 +375,7 @@ struct functor_traits<scalar_quotient_op<LhsScalar,RhsScalar> > {
typedef typename scalar_quotient_op<LhsScalar,RhsScalar>::result_type result_type; typedef typename scalar_quotient_op<LhsScalar,RhsScalar>::result_type result_type;
enum { enum {
PacketAccess = is_same<LhsScalar,RhsScalar>::value && packet_traits<LhsScalar>::HasDiv && packet_traits<RhsScalar>::HasDiv, PacketAccess = is_same<LhsScalar,RhsScalar>::value && packet_traits<LhsScalar>::HasDiv && packet_traits<RhsScalar>::HasDiv,
Cost = NumTraits<result_type>::template Div<PacketAccess>::Cost Cost = scalar_div_cost<result_type,PacketAccess>::value
}; };
}; };

View File

@ -248,7 +248,7 @@ struct functor_traits<scalar_exp_op<Scalar> > {
// double: 7 pmadd, 5 pmul, 3 padd/psub, 1 div, 13 other // double: 7 pmadd, 5 pmul, 3 padd/psub, 1 div, 13 other
: (14 * NumTraits<Scalar>::AddCost + : (14 * NumTraits<Scalar>::AddCost +
6 * NumTraits<Scalar>::MulCost + 6 * NumTraits<Scalar>::MulCost +
NumTraits<Scalar>::template Div<packet_traits<Scalar>::HasDiv>::Cost)) scalar_div_cost<Scalar,packet_traits<Scalar>::HasDiv>::value))
#else #else
Cost = Cost =
(sizeof(Scalar) == 4 (sizeof(Scalar) == 4
@ -257,7 +257,7 @@ struct functor_traits<scalar_exp_op<Scalar> > {
// double: 7 pmadd, 5 pmul, 3 padd/psub, 1 div, 13 other // double: 7 pmadd, 5 pmul, 3 padd/psub, 1 div, 13 other
: (23 * NumTraits<Scalar>::AddCost + : (23 * NumTraits<Scalar>::AddCost +
12 * NumTraits<Scalar>::MulCost + 12 * NumTraits<Scalar>::MulCost +
NumTraits<Scalar>::template Div<packet_traits<Scalar>::HasDiv>::Cost)) scalar_div_cost<Scalar,packet_traits<Scalar>::HasDiv>::value))
#endif #endif
}; };
}; };
@ -514,17 +514,16 @@ struct functor_traits<scalar_tanh_op<Scalar> > {
// 9 pmadd, 2 pmul, 1 div, 2 other // 9 pmadd, 2 pmul, 1 div, 2 other
? (2 * NumTraits<Scalar>::AddCost + ? (2 * NumTraits<Scalar>::AddCost +
6 * NumTraits<Scalar>::MulCost + 6 * NumTraits<Scalar>::MulCost +
NumTraits<Scalar>::template Div<packet_traits<Scalar>::HasDiv>::Cost) scalar_div_cost<Scalar,packet_traits<Scalar>::HasDiv>::value)
#else #else
? (11 * NumTraits<Scalar>::AddCost + ? (11 * NumTraits<Scalar>::AddCost +
11 * NumTraits<Scalar>::MulCost + 11 * NumTraits<Scalar>::MulCost +
NumTraits<Scalar>::template Div<packet_traits<Scalar>::HasDiv>::Cost) scalar_div_cost<Scalar,packet_traits<Scalar>::HasDiv>::value)
#endif #endif
// This number assumes a naive implementation of tanh // This number assumes a naive implementation of tanh
: (6 * NumTraits<Scalar>::AddCost + : (6 * NumTraits<Scalar>::AddCost +
3 * NumTraits<Scalar>::MulCost + 3 * NumTraits<Scalar>::MulCost +
2 * NumTraits<Scalar>::template Div< 2 * scalar_div_cost<Scalar,packet_traits<Scalar>::HasDiv>::value +
packet_traits<Scalar>::HasDiv>::Cost +
functor_traits<scalar_exp_op<Scalar> >::Cost)) functor_traits<scalar_exp_op<Scalar> >::Cost))
}; };
}; };

View File

@ -664,6 +664,20 @@ bool is_same_dense(const T1 &, const T2 &, typename enable_if<!(has_direct_acces
return false; return false;
} }
// Internal helper defining the cost of a scalar division for the type T.
// The default heuristic can be specialized for each scalar type and architecture.
template<typename T,bool Vectorized=false,typename EnaleIf = void>
struct scalar_div_cost {
enum { value = 8*NumTraits<T>::MulCost };
};
template<bool Vectorized>
struct scalar_div_cost<signed long,Vectorized,typename enable_if<sizeof(long)==8>::type> { enum { value = 24 }; };
template<bool Vectorized>
struct scalar_div_cost<unsigned long,Vectorized,typename enable_if<sizeof(long)==8>::type> { enum { value = 21 }; };
#ifdef EIGEN_DEBUG_ASSIGN #ifdef EIGEN_DEBUG_ASSIGN
std::string demangle_traversal(int t) std::string demangle_traversal(int t)
{ {

View File

@ -158,4 +158,12 @@ void test_integer_types()
CALL_SUBTEST_8( integer_type_tests(Matrix<unsigned long long, Dynamic, 5>(1, 5)) ); CALL_SUBTEST_8( integer_type_tests(Matrix<unsigned long long, Dynamic, 5>(1, 5)) );
} }
#ifdef EIGEN_TEST_PART_9
VERIFY_IS_EQUAL(internal::scalar_div_cost<int>::value, 8);
VERIFY_IS_EQUAL(internal::scalar_div_cost<unsigned int>::value, 8);
if(sizeof(long)>sizeof(int)) {
VERIFY(internal::scalar_div_cost<long>::value > internal::scalar_div_cost<int>::value);
VERIFY(internal::scalar_div_cost<unsigned long>::value > internal::scalar_div_cost<int>::value);
}
#endif
} }

View File

@ -25,7 +25,7 @@ struct scalar_mod_op {
}; };
template <typename Scalar> template <typename Scalar>
struct functor_traits<scalar_mod_op<Scalar> > struct functor_traits<scalar_mod_op<Scalar> >
{ enum { Cost = NumTraits<Scalar>::template Div<false>::Cost, PacketAccess = false }; }; { enum { Cost = scalar_div_cost<Scalar,false>::value, PacketAccess = false }; };
/** \internal /** \internal
@ -38,7 +38,7 @@ struct scalar_mod2_op {
}; };
template <typename Scalar> template <typename Scalar>
struct functor_traits<scalar_mod2_op<Scalar> > struct functor_traits<scalar_mod2_op<Scalar> >
{ enum { Cost = NumTraits<Scalar>::template Div<false>::Cost, PacketAccess = false }; }; { enum { Cost = scalar_div_cost<Scalar,false>::value, PacketAccess = false }; };
template <typename Scalar> template <typename Scalar>
struct scalar_fmod_op { struct scalar_fmod_op {