From 93a089adc8bd7d7cf541341c9a631bcb51f2e62d Mon Sep 17 00:00:00 2001 From: Benoit Jacob Date: Wed, 4 Feb 2009 16:53:03 +0000 Subject: [PATCH] disable alignment altogether outside of the platforms which potentially have SSE or AltiVec This should remove most portability issues to other platforms where data alignment issues (including overloading operator new and new[]) can be tricky, and where data alignment is not needed in the first place. --- Eigen/src/Core/util/Macros.h | 24 +++++++++++- Eigen/src/Core/util/Memory.h | 75 +++++++++++------------------------- test/dynalloc.cpp | 28 ++++++++------ 3 files changed, 62 insertions(+), 65 deletions(-) diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h index 92c565720..795efb90c 100644 --- a/Eigen/src/Core/util/Macros.h +++ b/Eigen/src/Core/util/Macros.h @@ -36,6 +36,24 @@ (EIGEN_MAJOR_VERSION>y || (EIGEN_MAJOR_VERSION>=y && \ EIGEN_MINOR_VERSION>=z)))) +// if the compiler is GNUC, disable 16 byte alignment on exotic archs that probably don't need it, and on which +// it may be extra trouble to get aligned memory allocation to work (example: on ARM, overloading new[] is a PITA +// because extra memory must be allocated for bookkeeping). +// if the compiler is not GNUC, just cross fingers that the architecture isn't too exotic, because we don't want +// to keep track of all the different preprocessor symbols for all compilers. +#if !defined(__GNUC__) || defined(__i386__) || defined(__x86_64__) || defined(__ppc__) || defined(__ia64__) + #define EIGEN_ARCH_WANTS_ALIGNMENT 1 +#else + #ifdef EIGEN_VECTORIZE + #error Vectorization enabled, but the architecture is not listed among those for which we require 16 byte alignment. If you added vectorization for another architecture, you also need to edit this list. + #endif + #define EIGEN_ARCH_WANTS_ALIGNMENT 0 + #ifndef EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT + #define EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT + #endif +#endif + + #ifdef EIGEN_DEFAULT_TO_ROW_MAJOR #define EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION RowMajor #else @@ -147,12 +165,14 @@ using Eigen::ei_cos; * If we made alignment depend on whether or not EIGEN_VECTORIZE is defined, it would be impossible to link * vectorized and non-vectorized code. */ -#if (defined __GNUC__) +#if !EIGEN_ARCH_WANTS_ALIGNMENT +#define EIGEN_ALIGN_128 +#elif (defined __GNUC__) #define EIGEN_ALIGN_128 __attribute__((aligned(16))) #elif (defined _MSC_VER) #define EIGEN_ALIGN_128 __declspec(align(16)) #else -#define EIGEN_ALIGN_128 +#error Please tell me what is the equivalent of __attribute__((aligned(16))) for your compiler #endif #define EIGEN_RESTRICT __restrict diff --git a/Eigen/src/Core/util/Memory.h b/Eigen/src/Core/util/Memory.h index 8afd81828..8c58debea 100644 --- a/Eigen/src/Core/util/Memory.h +++ b/Eigen/src/Core/util/Memory.h @@ -74,13 +74,15 @@ inline void* ei_aligned_malloc(size_t size) #endif void *result; - #if EIGEN_HAS_POSIX_MEMALIGN && !EIGEN_MALLOC_ALREADY_ALIGNED + #if EIGEN_HAS_POSIX_MEMALIGN && EIGEN_ARCH_WANTS_ALIGNMENT && !EIGEN_MALLOC_ALREADY_ALIGNED #ifdef EIGEN_EXCEPTIONS const int failed = #endif posix_memalign(&result, 16, size); #else - #if EIGEN_MALLOC_ALREADY_ALIGNED + #if !EIGEN_ARCH_WANTS_ALIGNMENT + result = malloc(size); + #elif EIGEN_MALLOC_ALREADY_ALIGNED result = malloc(size); #elif EIGEN_HAS_MM_MALLOC result = _mm_malloc(size, 16); @@ -141,7 +143,9 @@ template inline T* ei_conditional_aligned_new(size_t siz */ inline void ei_aligned_free(void *ptr) { - #if EIGEN_MALLOC_ALREADY_ALIGNED + #if !EIGEN_ARCH_WANTS_ALIGNMENT + free(ptr); + #elif EIGEN_MALLOC_ALREADY_ALIGNED free(ptr); #elif EIGEN_HAS_POSIX_MEMALIGN free(ptr); @@ -232,60 +236,27 @@ inline static int ei_alignmentOffset(const Scalar* ptr, int maxOffset) #define ei_aligned_stack_delete(TYPE,PTR,SIZE) do {ei_delete_elements_of_array(PTR, SIZE); \ ei_aligned_stack_free(PTR,sizeof(TYPE)*SIZE);} while(0) - -/** \brief Overloads the operator new and delete of the class Type with operators that are aligned if NeedsToAlign is true - * - * When Eigen's explicit vectorization is enabled, Eigen assumes that some fixed sizes types are aligned - * on a 16 bytes boundary. Those include all Matrix types having a sizeof multiple of 16 bytes, e.g.: - * - Vector2d, Vector4f, Vector4i, Vector4d, - * - Matrix2d, Matrix4f, Matrix4i, Matrix4d, - * - etc. - * When an object is statically allocated, the compiler will automatically and always enforces 16 bytes - * alignment of the data when needed. However some troubles might appear when data are dynamically allocated. - * Let's pick an example: - * \code - * struct Foo { - * char dummy; - * Vector4f some_vector; - * }; - * Foo obj1; // static allocation - * obj1.some_vector = Vector4f(..); // => OK - * - * Foo *pObj2 = new Foo; // dynamic allocation - * pObj2->some_vector = Vector4f(..); // => !! might segfault !! - * \endcode - * Here, the problem is that operator new is not aware of the compile time alignment requirement of the - * type Vector4f (and hence of the type Foo). Therefore "new Foo" does not necessarily returns a 16 bytes - * aligned pointer. The purpose of the class WithAlignedOperatorNew is exactly to overcome this issue by - * overloading the operator new to return aligned data when the vectorization is enabled. - * Here is a similar safe example: - * \code - * struct Foo { - * EIGEN_MAKE_ALIGNED_OPERATOR_NEW - * char dummy; - * Vector4f some_vector; - * }; - * Foo *pObj2 = new Foo; // dynamic allocation - * pObj2->some_vector = Vector4f(..); // => SAFE ! - * \endcode - * - * \sa class ei_new_allocator - */ -#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign) \ - void *operator new(size_t size) throw() { \ - return Eigen::ei_conditional_aligned_malloc(size); \ - } \ - void *operator new[](size_t size) throw() { \ - return Eigen::ei_conditional_aligned_malloc(size); \ - } \ - void operator delete(void * ptr) { Eigen::ei_conditional_aligned_free(ptr); } \ - void operator delete[](void * ptr) { Eigen::ei_conditional_aligned_free(ptr); } \ - void *operator new(size_t, void *ptr) throw() { return ptr; } + +#if EIGEN_ARCH_WANTS_ALIGNMENT + #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign) \ + void *operator new(size_t size) throw() { \ + return Eigen::ei_conditional_aligned_malloc(size); \ + } \ + void *operator new[](size_t size) throw() { \ + return Eigen::ei_conditional_aligned_malloc(size); \ + } \ + void operator delete(void * ptr) { Eigen::ei_conditional_aligned_free(ptr); } \ + void operator delete[](void * ptr) { Eigen::ei_conditional_aligned_free(ptr); } \ + void *operator new(size_t, void *ptr) throw() { return ptr; } +#else + #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign) +#endif #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(true) #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(Scalar,Size) \ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(((Size)!=Eigen::Dynamic) && ((sizeof(Scalar)*(Size))%16==0)) + /** \class aligned_allocator * * \brief stl compatible allocator to use with with 16 byte aligned types diff --git a/test/dynalloc.cpp b/test/dynalloc.cpp index 899342d83..c0fcdde3d 100644 --- a/test/dynalloc.cpp +++ b/test/dynalloc.cpp @@ -24,12 +24,18 @@ #include "main.h" +#if EIGEN_ARCH_WANTS_ALIGNMENT +#define ALIGNMENT 16 +#else +#define ALIGNMENT 1 +#endif + void check_handmade_aligned_malloc() { for(int i = 1; i < 1000; i++) { char *p = (char*)ei_handmade_aligned_malloc(i); - VERIFY(size_t(p)%16==0); + VERIFY(size_t(p)%ALIGNMENT==0); // if the buffer is wrongly allocated this will give a bad write --> check with valgrind for(int j = 0; j < i; j++) p[j]=0; ei_handmade_aligned_free(p); @@ -41,7 +47,7 @@ void check_aligned_malloc() for(int i = 1; i < 1000; i++) { char *p = (char*)ei_aligned_malloc(i); - VERIFY(size_t(p)%16==0); + VERIFY(size_t(p)%ALIGNMENT==0); // if the buffer is wrongly allocated this will give a bad write --> check with valgrind for(int j = 0; j < i; j++) p[j]=0; ei_aligned_free(p); @@ -53,7 +59,7 @@ void check_aligned_new() for(int i = 1; i < 1000; i++) { float *p = ei_aligned_new(i); - VERIFY(size_t(p)%16==0); + VERIFY(size_t(p)%ALIGNMENT==0); // if the buffer is wrongly allocated this will give a bad write --> check with valgrind for(int j = 0; j < i; j++) p[j]=0; ei_aligned_delete(p,i); @@ -65,7 +71,7 @@ void check_aligned_stack_alloc() for(int i = 1; i < 1000; i++) { float *p = ei_aligned_stack_new(float,i); - VERIFY(size_t(p)%16==0); + VERIFY(size_t(p)%ALIGNMENT==0); // if the buffer is wrongly allocated this will give a bad write --> check with valgrind for(int j = 0; j < i; j++) p[j]=0; ei_aligned_stack_delete(float,p,i); @@ -92,7 +98,7 @@ class MyClassA template void check_dynaligned() { T* obj = new T; - VERIFY(size_t(obj)%16==0); + VERIFY(size_t(obj)%ALIGNMENT==0); delete obj; } @@ -115,15 +121,15 @@ void test_dynalloc() // check static allocation, who knows ? { - MyStruct foo0; VERIFY(size_t(foo0.avec.data())%16==0); - MyClassA fooA; VERIFY(size_t(fooA.avec.data())%16==0); + MyStruct foo0; VERIFY(size_t(foo0.avec.data())%ALIGNMENT==0); + MyClassA fooA; VERIFY(size_t(fooA.avec.data())%ALIGNMENT==0); } // dynamic allocation, single object for (int i=0; iavec.data())%16==0); - MyClassA *fooA = new MyClassA(); VERIFY(size_t(fooA->avec.data())%16==0); + MyStruct *foo0 = new MyStruct(); VERIFY(size_t(foo0->avec.data())%ALIGNMENT==0); + MyClassA *fooA = new MyClassA(); VERIFY(size_t(fooA->avec.data())%ALIGNMENT==0); delete foo0; delete fooA; } @@ -132,8 +138,8 @@ void test_dynalloc() const int N = 10; for (int i=0; iavec.data())%16==0); - MyClassA *fooA = new MyClassA[N]; VERIFY(size_t(fooA->avec.data())%16==0); + MyStruct *foo0 = new MyStruct[N]; VERIFY(size_t(foo0->avec.data())%ALIGNMENT==0); + MyClassA *fooA = new MyClassA[N]; VERIFY(size_t(fooA->avec.data())%ALIGNMENT==0); delete[] foo0; delete[] fooA; }