Patches to support ARM NEON with Clang 3.0 and LLVM-GCC

This commit is contained in:
Marton Danoczy 2011-11-04 16:37:10 +01:00
parent 1b98b73472
commit f422668d39
3 changed files with 38 additions and 24 deletions

View File

@ -27,8 +27,8 @@
namespace internal { namespace internal {
static uint32x4_t p4ui_CONJ_XOR = { 0x00000000, 0x80000000, 0x00000000, 0x80000000 }; static uint32x4_t p4ui_CONJ_XOR = EIGEN_INIT_NEON_PACKET4(0x00000000, 0x80000000, 0x00000000, 0x80000000);
static uint32x2_t p2ui_CONJ_XOR = { 0x00000000, 0x80000000 }; static uint32x2_t p2ui_CONJ_XOR = EIGEN_INIT_NEON_PACKET2(0x00000000, 0x80000000);
//---------- float ---------- //---------- float ----------
struct Packet2cf struct Packet2cf

View File

@ -52,6 +52,16 @@ typedef uint32x4_t Packet4ui;
#define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \ #define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \
const Packet4i p4i_##NAME = pset1<Packet4i>(X) const Packet4i p4i_##NAME = pset1<Packet4i>(X)
#if defined(__llvm__) && !defined(__clang__)
//Special treatment for Apple's llvm-gcc, its NEON packet types are unions
#define EIGEN_INIT_NEON_PACKET2(X, Y) {{X, Y}}
#define EIGEN_INIT_NEON_PACKET4(X, Y, Z, W) {{X, Y, Z, W}}
#else
//Default initializer for packets
#define EIGEN_INIT_NEON_PACKET2(X, Y) {X, Y}
#define EIGEN_INIT_NEON_PACKET4(X, Y, Z, W) {X, Y, Z, W}
#endif
#ifndef __pld #ifndef __pld
#define __pld(x) asm volatile ( " pld [%[addr]]\n" :: [addr] "r" (x) : "cc" ); #define __pld(x) asm volatile ( " pld [%[addr]]\n" :: [addr] "r" (x) : "cc" );
#endif #endif
@ -84,7 +94,7 @@ template<> struct packet_traits<int> : default_packet_traits
}; };
}; };
#if EIGEN_GNUC_AT_MOST(4,4) #if EIGEN_GNUC_AT_MOST(4,4) && !defined(__llvm__)
// workaround gcc 4.2, 4.3 and 4.4 compilatin issue // workaround gcc 4.2, 4.3 and 4.4 compilatin issue
EIGEN_STRONG_INLINE float32x4_t vld1q_f32(const float* x) { return ::vld1q_f32((const float32_t*)x); } EIGEN_STRONG_INLINE float32x4_t vld1q_f32(const float* x) { return ::vld1q_f32((const float32_t*)x); }
EIGEN_STRONG_INLINE float32x2_t vld1_f32 (const float* x) { return ::vld1_f32 ((const float32_t*)x); } EIGEN_STRONG_INLINE float32x2_t vld1_f32 (const float* x) { return ::vld1_f32 ((const float32_t*)x); }
@ -100,12 +110,12 @@ template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from) {
template<> EIGEN_STRONG_INLINE Packet4f plset<float>(const float& a) template<> EIGEN_STRONG_INLINE Packet4f plset<float>(const float& a)
{ {
Packet4f countdown = { 0, 1, 2, 3 }; Packet4f countdown = EIGEN_INIT_NEON_PACKET4(0, 1, 2, 3);
return vaddq_f32(pset1<Packet4f>(a), countdown); return vaddq_f32(pset1<Packet4f>(a), countdown);
} }
template<> EIGEN_STRONG_INLINE Packet4i plset<int>(const int& a) template<> EIGEN_STRONG_INLINE Packet4i plset<int>(const int& a)
{ {
Packet4i countdown = { 0, 1, 2, 3 }; Packet4i countdown = EIGEN_INIT_NEON_PACKET4(0, 1, 2, 3);
return vaddq_s32(pset1<Packet4i>(a), countdown); return vaddq_s32(pset1<Packet4i>(a), countdown);
} }
@ -395,25 +405,29 @@ template<> EIGEN_STRONG_INLINE int predux_max<Packet4i>(const Packet4i& a)
return s[0]; return s[0];
} }
template<int Offset> // this PALIGN_NEON business is to work around a bug in LLVM Clang 3.0 causing incorrect compilation errors,
struct palign_impl<Offset,Packet4f> // see bug 347 and this LLVM bug: http://llvm.org/bugs/show_bug.cgi?id=11074
{ #define PALIGN_NEON(Offset,Type,Command) \
EIGEN_STRONG_INLINE static void run(Packet4f& first, const Packet4f& second) template<>\
{ struct palign_impl<Offset,Type>\
if (Offset!=0) {\
first = vextq_f32(first, second, Offset); EIGEN_STRONG_INLINE static void run(Type& first, const Type& second)\
} {\
}; if (Offset!=0)\
first = Command(first, second, Offset);\
}\
};\
template<int Offset> PALIGN_NEON(0,Packet4f,vextq_f32)
struct palign_impl<Offset,Packet4i> PALIGN_NEON(1,Packet4f,vextq_f32)
{ PALIGN_NEON(2,Packet4f,vextq_f32)
EIGEN_STRONG_INLINE static void run(Packet4i& first, const Packet4i& second) PALIGN_NEON(3,Packet4f,vextq_f32)
{ PALIGN_NEON(0,Packet4i,vextq_s32)
if (Offset!=0) PALIGN_NEON(1,Packet4i,vextq_s32)
first = vextq_s32(first, second, Offset); PALIGN_NEON(2,Packet4i,vextq_s32)
} PALIGN_NEON(3,Packet4i,vextq_s32)
};
#undef PALIGN_NEON
} // end namespace internal } // end namespace internal

View File

@ -45,7 +45,7 @@
#define EIGEN_GNUC_AT_MOST(x,y) 0 #define EIGEN_GNUC_AT_MOST(x,y) 0
#endif #endif
#if EIGEN_GNUC_AT_MOST(4,3) #if EIGEN_GNUC_AT_MOST(4,3) && !defined(__clang__)
// see bug 89 // see bug 89
#define EIGEN_SAFE_TO_USE_STANDARD_ASSERT_MACRO 0 #define EIGEN_SAFE_TO_USE_STANDARD_ASSERT_MACRO 0
#else #else