Merged eigen/eigen into default

This commit is contained in:
Abhijit Kundu 2015-02-28 20:15:28 -05:00
commit 3a4b6827b4
14 changed files with 139 additions and 88 deletions

View File

@ -227,7 +227,7 @@ if(NOT MSVC)
option(EIGEN_TEST_NEON "Enable/Disable Neon in tests/examples" OFF)
if(EIGEN_TEST_NEON)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfpu=neon -mcpu=cortex-a8")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfpu=neon -mfloat-abi=softfp")
message(STATUS "Enabling NEON in tests/examples")
endif()

View File

@ -290,7 +290,8 @@ template<typename Packet> EIGEN_DEVICE_FUNC inline Packet preverse(const Packet&
template<size_t offset, typename Packet>
struct protate_impl
{
static Packet run(const Packet& a) { return a; }
// Empty so attempts to use this unimplemented path will fail to compile.
// Only specializations of this template should be used.
};
/** \internal \returns a packet with the coefficients rotated to the right in little-endian convention,
@ -299,7 +300,6 @@ struct protate_impl
*/
template<size_t offset, typename Packet> EIGEN_DEVICE_FUNC inline Packet protate(const Packet& a)
{
EIGEN_STATIC_ASSERT(offset < unpacket_traits<Packet>::size, ROTATION_BY_ILLEGAL_OFFSET);
return offset ? protate_impl<offset, Packet>::run(a) : a;
}

View File

@ -76,12 +76,12 @@ typedef uint32x4_t Packet4ui;
template<> struct packet_traits<float> : default_packet_traits
{
typedef Packet4f type;
typedef Packet2f half;
typedef Packet4f half; // Packet2f intrinsics not implemented yet
enum {
Vectorizable = 1,
AlignedOnScalar = 1,
size = 4,
HasHalfPacket=1,
HasHalfPacket=0, // Packet2f intrinsics not implemented yet
HasDiv = 1,
// FIXME check the Has*
@ -95,12 +95,12 @@ template<> struct packet_traits<float> : default_packet_traits
template<> struct packet_traits<int> : default_packet_traits
{
typedef Packet4i type;
typedef Packet2i half;
typedef Packet4i half; // Packet2i intrinsics not implemented yet
enum {
Vectorizable = 1,
AlignedOnScalar = 1,
size=4,
HasHalfPacket=1
HasHalfPacket=0 // Packet2i intrinsics not implemented yet
// FIXME check the Has*
};
};

View File

@ -155,7 +155,7 @@ void computeProductBlockingSizes(Index& k, Index& m, Index& n, Index num_threads
// In unit tests we do not want to use extra large matrices,
// so we reduce the cache size to check the blocking strategy is not flawed
#ifdef EIGEN_DEBUG_SMALL_PRODUCT_BLOCKS
l1 = 4*1024;
l1 = 9*1024;
l2 = 32*1024;
l3 = 512*1024;
#endif
@ -164,7 +164,7 @@ void computeProductBlockingSizes(Index& k, Index& m, Index& n, Index num_threads
// Perhaps it would make more sense to consider k*n*m??
// Note that for very tiny problem, this function should be bypassed anyway
// because we use the coefficient-based implementation for them.
if(std::max(k,std::max(m,n))<48)
if((std::max)(k,(std::max)(m,n))<48)
return;
typedef typename Traits::ResScalar ResScalar;
@ -800,6 +800,80 @@ protected:
conj_helper<ResPacket,ResPacket,false,ConjRhs> cj;
};
// helper for the rotating kernel below
template <typename GebpKernel, bool UseRotatingKernel = GebpKernel::UseRotatingKernel>
struct PossiblyRotatingKernelHelper
{
// default implementation, not rotating
typedef typename GebpKernel::Traits Traits;
typedef typename Traits::RhsScalar RhsScalar;
typedef typename Traits::RhsPacket RhsPacket;
typedef typename Traits::AccPacket AccPacket;
const Traits& traits;
PossiblyRotatingKernelHelper(const Traits& t) : traits(t) {}
template <size_t K, size_t Index>
void loadOrRotateRhs(RhsPacket& to, const RhsScalar* from) const
{
traits.loadRhs(from + (Index+4*K)*Traits::RhsProgress, to);
}
void unrotateResult(AccPacket&,
AccPacket&,
AccPacket&,
AccPacket&)
{
}
};
// rotating implementation
template <typename GebpKernel>
struct PossiblyRotatingKernelHelper<GebpKernel, true>
{
typedef typename GebpKernel::Traits Traits;
typedef typename Traits::RhsScalar RhsScalar;
typedef typename Traits::RhsPacket RhsPacket;
typedef typename Traits::AccPacket AccPacket;
const Traits& traits;
PossiblyRotatingKernelHelper(const Traits& t) : traits(t) {}
template <size_t K, size_t Index>
void loadOrRotateRhs(RhsPacket& to, const RhsScalar* from) const
{
if (Index == 0) {
to = pload<RhsPacket>(from + 4*K*Traits::RhsProgress);
} else {
EIGEN_ASM_COMMENT("Do not reorder code, we're very tight on registers");
to = protate<1>(to);
}
}
void unrotateResult(AccPacket& res0,
AccPacket& res1,
AccPacket& res2,
AccPacket& res3)
{
PacketBlock<AccPacket> resblock;
resblock.packet[0] = res0;
resblock.packet[1] = res1;
resblock.packet[2] = res2;
resblock.packet[3] = res3;
ptranspose(resblock);
resblock.packet[3] = protate<1>(resblock.packet[3]);
resblock.packet[2] = protate<2>(resblock.packet[2]);
resblock.packet[1] = protate<3>(resblock.packet[1]);
ptranspose(resblock);
res0 = resblock.packet[0];
res1 = resblock.packet[1];
res2 = resblock.packet[2];
res3 = resblock.packet[3];
}
};
/* optimized GEneral packed Block * packed Panel product kernel
*
* Mixing type logic: C += A * B
@ -833,6 +907,16 @@ struct gebp_kernel
ResPacketSize = Traits::ResPacketSize
};
static const bool UseRotatingKernel =
EIGEN_ARCH_ARM &&
internal::is_same<LhsScalar, float>::value &&
internal::is_same<RhsScalar, float>::value &&
internal::is_same<ResScalar, float>::value &&
Traits::LhsPacketSize == 4 &&
Traits::RhsPacketSize == 4 &&
Traits::ResPacketSize == 4;
EIGEN_DONT_INLINE
void operator()(const DataMapper& res, const LhsScalar* blockA, const RhsScalar* blockB,
Index rows, Index depth, Index cols, ResScalar alpha,
@ -866,6 +950,8 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
// Usually, make sense only with FMA
if(mr>=3*Traits::LhsProgress)
{
PossiblyRotatingKernelHelper<gebp_kernel> possiblyRotatingKernelHelper(traits);
// loops on each largest micro horizontal panel of lhs (3*Traits::LhsProgress x depth)
for(Index i=0; i<peeled_mc3; i+=3*Traits::LhsProgress)
{
@ -901,43 +987,12 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
prefetch(&blB[0]);
LhsPacket A0, A1;
#define EIGEN_ARCH_PREFERS_ROTATING_KERNEL EIGEN_ARCH_ARM
#if EIGEN_ARCH_PREFERS_ROTATING_KERNEL
static const bool UseRotatingKernel =
Traits::LhsPacketSize == 4 &&
Traits::RhsPacketSize == 4 &&
Traits::ResPacketSize == 4;
#endif
for(Index k=0; k<peeled_kc; k+=pk)
{
EIGEN_ASM_COMMENT("begin gebp micro kernel 3pX4");
RhsPacket B_0, T0;
LhsPacket A2;
#define EIGEN_GEBP_ONESTEP_LOADRHS_NONROTATING(K,N) \
traits.loadRhs(&blB[(N+4*K)*RhsProgress], B_0);
#if EIGEN_ARCH_PREFERS_ROTATING_KERNEL
#define EIGEN_GEBP_ONESTEP_LOADRHS(K,N) \
do { \
if (UseRotatingKernel) { \
if (N == 0) { \
B_0 = pload<RhsPacket>(&blB[(0+4*K)*RhsProgress]); \
} else { \
EIGEN_ASM_COMMENT("Do not reorder code, we're very tight on registers"); \
B_0 = protate<1>(B_0); \
} \
} else { \
EIGEN_GEBP_ONESTEP_LOADRHS_NONROTATING(K,N); \
} \
} while (false)
#else
#define EIGEN_GEBP_ONESTEP_LOADRHS(K,N) \
EIGEN_GEBP_ONESTEP_LOADRHS_NONROTATING(K,N)
#endif
#define EIGEN_GEBP_ONESTEP(K) \
do { \
EIGEN_ASM_COMMENT("begin step of gebp micro kernel 3pX4"); \
@ -947,19 +1002,19 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
traits.loadLhs(&blA[(0+3*K)*LhsProgress], A0); \
traits.loadLhs(&blA[(1+3*K)*LhsProgress], A1); \
traits.loadLhs(&blA[(2+3*K)*LhsProgress], A2); \
EIGEN_GEBP_ONESTEP_LOADRHS(K, 0); \
possiblyRotatingKernelHelper.template loadOrRotateRhs<K, 0>(B_0, blB); \
traits.madd(A0, B_0, C0, T0); \
traits.madd(A1, B_0, C4, T0); \
traits.madd(A2, B_0, C8, B_0); \
EIGEN_GEBP_ONESTEP_LOADRHS(K, 1); \
possiblyRotatingKernelHelper.template loadOrRotateRhs<K, 1>(B_0, blB); \
traits.madd(A0, B_0, C1, T0); \
traits.madd(A1, B_0, C5, T0); \
traits.madd(A2, B_0, C9, B_0); \
EIGEN_GEBP_ONESTEP_LOADRHS(K, 2); \
possiblyRotatingKernelHelper.template loadOrRotateRhs<K, 2>(B_0, blB); \
traits.madd(A0, B_0, C2, T0); \
traits.madd(A1, B_0, C6, T0); \
traits.madd(A2, B_0, C10, B_0); \
EIGEN_GEBP_ONESTEP_LOADRHS(K, 3); \
possiblyRotatingKernelHelper.template loadOrRotateRhs<K, 3>(B_0, blB); \
traits.madd(A0, B_0, C3 , T0); \
traits.madd(A1, B_0, C7, T0); \
traits.madd(A2, B_0, C11, B_0); \
@ -992,34 +1047,10 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
}
#undef EIGEN_GEBP_ONESTEP
#undef EIGEN_GEBP_ONESTEP_LOADRHS
#undef EIGEN_GEBP_ONESTEP_LOADRHS_NONROTATING
#if EIGEN_ARCH_PREFERS_ROTATING_KERNEL
if (UseRotatingKernel) {
#define EIGEN_GEBP_UNROTATE_RESULT(res0, res1, res2, res3) \
do { \
PacketBlock<ResPacket> resblock; \
resblock.packet[0] = res0; \
resblock.packet[1] = res1; \
resblock.packet[2] = res2; \
resblock.packet[3] = res3; \
ptranspose(resblock); \
resblock.packet[3] = protate<1>(resblock.packet[3]); \
resblock.packet[2] = protate<2>(resblock.packet[2]); \
resblock.packet[1] = protate<3>(resblock.packet[1]); \
ptranspose(resblock); \
res0 = resblock.packet[0]; \
res1 = resblock.packet[1]; \
res2 = resblock.packet[2]; \
res3 = resblock.packet[3]; \
} while (false)
EIGEN_GEBP_UNROTATE_RESULT(C0, C1, C2, C3);
EIGEN_GEBP_UNROTATE_RESULT(C4, C5, C6, C7);
EIGEN_GEBP_UNROTATE_RESULT(C8, C9, C10, C11);
}
#endif
possiblyRotatingKernelHelper.unrotateResult(C0, C1, C2, C3);
possiblyRotatingKernelHelper.unrotateResult(C4, C5, C6, C7);
possiblyRotatingKernelHelper.unrotateResult(C8, C9, C10, C11);
ResPacket R0, R1, R2;
ResPacket alphav = pset1<ResPacket>(alpha);

View File

@ -93,8 +93,7 @@
THE_STORAGE_ORDER_OF_BOTH_SIDES_MUST_MATCH,
OBJECT_ALLOCATED_ON_STACK_IS_TOO_BIG,
IMPLICIT_CONVERSION_TO_SCALAR_IS_FOR_INNER_PRODUCT_ONLY,
STORAGE_LAYOUT_DOES_NOT_MATCH,
ROTATION_BY_ILLEGAL_OFFSET
STORAGE_LAYOUT_DOES_NOT_MATCH
};
};

View File

@ -1,7 +1,7 @@
#include <stdio.h>
#if (defined __GNUC__) && (!defined __MINGW32__)
#if (defined __GNUC__) && (!defined __MINGW32__) && (!defined __CYGWIN__)
#define EIGEN_WEAK_LINKING __attribute__ ((weak))
#else
#define EIGEN_WEAK_LINKING

View File

@ -502,6 +502,10 @@ macro(ei_set_build_string)
set(TMP_BUILD_STRING ${TMP_BUILD_STRING}-64bit)
endif()
if(EIGEN_TEST_CXX11)
set(TMP_BUILD_STRING ${TMP_BUILD_STRING}-cxx11)
endif()
if(EIGEN_BUILD_STRING_SUFFIX)
set(TMP_BUILD_STRING ${TMP_BUILD_STRING}-${EIGEN_BUILD_STRING_SUFFIX})
endif()

View File

@ -14,9 +14,9 @@ targets_to_make=`echo "$TESTSLIST" | egrep "$1" | xargs echo`
if [ -n "${EIGEN_MAKE_ARGS:+x}" ]
then
make $targets_to_make ${EIGEN_MAKE_ARGS}
@CMAKE_MAKE_PROGRAM@ $targets_to_make ${EIGEN_MAKE_ARGS}
else
make $targets_to_make
@CMAKE_MAKE_PROGRAM@ $targets_to_make @EIGEN_TEST_BUILD_FLAGS@
fi
exit $?

View File

@ -42,13 +42,19 @@
#include <deque>
#include <queue>
#include <list>
#if __cplusplus >= 201103L
#include <random>
#ifdef EIGEN_USE_THREADS
#include <future>
#endif
#endif
// To test that all calls from Eigen code to std::min() and std::max() are
// protected by parenthesis against macro expansion, the min()/max() macros
// are defined here and any not-parenthesized min/max call will cause a
// compiler error.
//#define min(A,B) please_protect_your_min_with_parentheses
//#define max(A,B) please_protect_your_max_with_parentheses
#define min(A,B) please_protect_your_min_with_parentheses
#define max(A,B) please_protect_your_max_with_parentheses
#define FORBIDDEN_IDENTIFIER (this_identifier_is_forbidden_to_avoid_clashes) this_identifier_is_forbidden_to_avoid_clashes
// B0 is defined in POSIX header termios.h

View File

@ -97,7 +97,7 @@ struct EvalRange<Evaluator, Index, true> {
Index i = first;
static const int PacketSize = unpacket_traits<typename Evaluator::PacketReturnType>::size;
if (last - first > PacketSize) {
if (last - first >= PacketSize) {
eigen_assert(first % PacketSize == 0);
Index lastPacket = last - (last % PacketSize);
for (; i < lastPacket; i += PacketSize) {

View File

@ -141,20 +141,32 @@ int main()
public:
typedef mpfr::mpreal ResScalar;
enum {
Vectorizable = false,
LhsPacketSize = 1,
RhsPacketSize = 1,
ResPacketSize = 1,
NumberOfRegisters = 1,
nr = 1,
mr = 1,
LhsProgress = 1,
RhsProgress = 1
};
typedef ResScalar LhsPacket;
typedef ResScalar RhsPacket;
typedef ResScalar ResPacket;
};
template<typename Index, bool ConjugateLhs, bool ConjugateRhs>
struct gebp_kernel<mpfr::mpreal,mpfr::mpreal,Index,1,1,ConjugateLhs,ConjugateRhs>
template<typename Index, typename DataMapper, bool ConjugateLhs, bool ConjugateRhs>
struct gebp_kernel<mpfr::mpreal,mpfr::mpreal,Index,DataMapper,1,1,ConjugateLhs,ConjugateRhs>
{
typedef mpfr::mpreal mpreal;
EIGEN_DONT_INLINE
void operator()(mpreal* res, Index resStride, const mpreal* blockA, const mpreal* blockB, Index rows, Index depth, Index cols, mpreal alpha,
void operator()(const DataMapper& res, const mpreal* blockA, const mpreal* blockB,
Index rows, Index depth, Index cols, const mpreal& alpha,
Index strideA=-1, Index strideB=-1, Index offsetA=0, Index offsetB=0)
{
if(rows==0 || cols==0 || depth==0)
@ -170,8 +182,6 @@ int main()
{
for(Index j=0; j<cols; ++j)
{
mpreal *C1 = res + j*resStride;
const mpreal *A = blockA + i*strideA + offsetA;
const mpreal *B = blockB + j*strideB + offsetB;
@ -183,7 +193,7 @@ int main()
}
mpfr_mul(acc1.mpfr_ptr(), acc1.mpfr_srcptr(), alpha.mpfr_srcptr(), mpreal::get_default_rnd());
mpfr_add(C1[i].mpfr_ptr(), C1[i].mpfr_srcptr(), acc1.mpfr_srcptr(), mpreal::get_default_rnd());
mpfr_add(res(i,j).mpfr_ptr(), res(i,j).mpfr_srcptr(), acc1.mpfr_srcptr(), mpreal::get_default_rnd());
}
}
}

View File

@ -50,7 +50,7 @@ if(MPFR_FOUND)
include_directories(${MPFR_INCLUDES} ./mpreal)
ei_add_property(EIGEN_TESTED_BACKENDS "MPFR C++, ")
set(EIGEN_MPFR_TEST_LIBRARIES ${MPFR_LIBRARIES} ${GMP_LIBRARIES})
# ei_add_test(mpreal_support "" "${EIGEN_MPFR_TEST_LIBRARIES}" )
ei_add_test(mpreal_support "" "${EIGEN_MPFR_TEST_LIBRARIES}" )
else()
ei_add_property(EIGEN_MISSING_BACKENDS "MPFR C++, ")
endif()

View File

@ -54,7 +54,7 @@ static void test_equality()
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 3; ++j) {
for (int k = 0; k < 7; ++k) {
if (random() < 0.5) {
if (internal::random<bool>()) {
mat2(i,j,k) = mat1(i,j,k);
}
}

View File

@ -57,7 +57,8 @@
#include <limits>
// Options
#define MPREAL_HAVE_INT64_SUPPORT // Enable int64_t support if possible. Available only for MSVC 2010 & GCC.
// FIXME HAVE_INT64_SUPPORT leads to clashes with long int and int64_t on some systems.
//#define MPREAL_HAVE_INT64_SUPPORT // Enable int64_t support if possible. Available only for MSVC 2010 & GCC.
#define MPREAL_HAVE_MSVC_DEBUGVIEW // Enable Debugger Visualizer for "Debug" builds in MSVC.
#define MPREAL_HAVE_DYNAMIC_STD_NUMERIC_LIMITS // Enable extended std::numeric_limits<mpfr::mpreal> specialization.
// Meaning that "digits", "round_style" and similar members are defined as functions, not constants.