mirror of
https://gitlab.com/libeigen/eigen.git
synced 2024-12-15 07:10:37 +08:00
* implement handmade aligned malloc, fast but always wastes 16 bytes of memory.
only used as fallback for now, needs benchmarking. also notice that some malloc() impls do waste memory to keep track of alignment and other stuff (check msdn's page on malloc). * expand test_dynalloc to cover low level aligned alloc funcs. Remove the old #ifdef EIGEN_VECTORIZE... * rewrite the logic choosing an aligned alloc, some new stuff: * malloc() already aligned on freebsd and windows x64 (plus apple already) * _mm_malloc() used only if EIGEN_VECTORIZE * posix_memalign: correct detection according to man page (not necessarily linux specific), don't attempt to declare it if the platform didn't declare it (there had to be a reason why it didn't declare it, right?)
This commit is contained in:
parent
f52a9e5315
commit
fd831d5a12
@ -27,11 +27,42 @@
|
||||
#ifndef EIGEN_MEMORY_H
|
||||
#define EIGEN_MEMORY_H
|
||||
|
||||
#ifdef __linux
|
||||
// it seems we cannot assume posix_memalign is defined in the stdlib header
|
||||
extern "C" int posix_memalign (void **, size_t, size_t) throw ();
|
||||
#if defined(__APPLE__) || defined(__FreeBSD__) || defined(_WIN64)
|
||||
#define EIGEN_MALLOC_ALREADY_ALIGNED 1
|
||||
#else
|
||||
#define EIGEN_MALLOC_ALREADY_ALIGNED 0
|
||||
#endif
|
||||
|
||||
#if (defined _GNU_SOURCE) || ((defined _XOPEN_SOURCE) && (_XOPEN_SOURCE >= 600))
|
||||
#define EIGEN_HAS_POSIX_MEMALIGN 1
|
||||
#else
|
||||
#define EIGEN_HAS_POSIX_MEMALIGN 0
|
||||
#endif
|
||||
|
||||
#ifdef EIGEN_VECTORIZE_SSE
|
||||
#define EIGEN_HAS_MM_MALLOC 1
|
||||
#else
|
||||
#define EIGEN_HAS_MM_MALLOC 0
|
||||
#endif
|
||||
|
||||
/** \internal like malloc, but the returned pointer is guaranteed to be 16-byte aligned.
|
||||
* Fast, but wastes 16 additional bytes of memory.
|
||||
* Does not throw any exception.
|
||||
*/
|
||||
inline void* ei_handmade_aligned_malloc(size_t size)
|
||||
{
|
||||
void *original = malloc(size+16);
|
||||
void *aligned = reinterpret_cast<void*>((reinterpret_cast<size_t>(original) & ~(size_t(15))) + 16);
|
||||
*(reinterpret_cast<void**>(aligned) - 1) = original;
|
||||
return aligned;
|
||||
}
|
||||
|
||||
/** \internal frees memory allocated with ei_handmade_aligned_malloc */
|
||||
inline void ei_handmade_aligned_free(void *ptr)
|
||||
{
|
||||
free(*(reinterpret_cast<void**>(ptr) - 1));
|
||||
}
|
||||
|
||||
/** \internal allocates \a size bytes. The returned pointer is guaranteed to have 16 bytes alignment.
|
||||
* On allocation error, the returned pointer is undefined, but if exceptions are enabled then a std::bad_alloc is thrown.
|
||||
*/
|
||||
@ -42,18 +73,20 @@ inline void* ei_aligned_malloc(size_t size)
|
||||
#endif
|
||||
|
||||
void *result;
|
||||
#ifdef __linux
|
||||
#if EIGEN_HAS_POSIX_MEMALIGN && !EIGEN_MALLOC_ALREADY_ALIGNED
|
||||
#ifdef EIGEN_EXCEPTIONS
|
||||
const int failed =
|
||||
#endif
|
||||
posix_memalign(&result, 16, size);
|
||||
#else
|
||||
#ifdef _MSC_VER
|
||||
result = _aligned_malloc(size, 16);
|
||||
#elif defined(__APPLE__)
|
||||
result = malloc(size); // Apple's malloc() already returns 16-byte-aligned ptrs
|
||||
#else
|
||||
#if EIGEN_MALLOC_ALREADY_ALIGNED
|
||||
result = malloc(size);
|
||||
#elif EIGEN_HAS_MM_MALLOC
|
||||
result = _mm_malloc(size, 16);
|
||||
#elif (defined _MSC_VER)
|
||||
result = _aligned_malloc(size, 16);
|
||||
#else
|
||||
result = ei_handmade_aligned_malloc(size);
|
||||
#endif
|
||||
#ifdef EIGEN_EXCEPTIONS
|
||||
const int failed = (result == 0);
|
||||
@ -103,14 +136,16 @@ template<typename T, bool Align> inline T* ei_conditional_aligned_new(size_t siz
|
||||
*/
|
||||
inline void ei_aligned_free(void *ptr)
|
||||
{
|
||||
#if defined(__linux)
|
||||
#if EIGEN_HAS_POSIX_MEMALIGN
|
||||
free(ptr);
|
||||
#elif defined(__APPLE__)
|
||||
#elif EIGEN_MALLOC_ALREADY_ALIGNED
|
||||
free(ptr);
|
||||
#elif defined(_MSC_VER)
|
||||
_aligned_free(ptr);
|
||||
#else
|
||||
#elif EIGEN_HAS_MM_MALLOC
|
||||
_mm_free(ptr);
|
||||
#else
|
||||
ei_handmade_aligned_free(ptr);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -24,6 +24,55 @@
|
||||
|
||||
#include "main.h"
|
||||
|
||||
void check_handmade_aligned_malloc()
|
||||
{
|
||||
for(int i = 1; i < 1000; i++)
|
||||
{
|
||||
char *p = (char*)ei_handmade_aligned_malloc(i);
|
||||
VERIFY(size_t(p)%16==0);
|
||||
// if the buffer is wrongly allocated this will give a bad write --> check with valgrind
|
||||
for(int j = 0; j < i; j++) p[j]=0;
|
||||
ei_handmade_aligned_free(p);
|
||||
}
|
||||
}
|
||||
|
||||
void check_aligned_malloc()
|
||||
{
|
||||
for(int i = 1; i < 1000; i++)
|
||||
{
|
||||
char *p = (char*)ei_aligned_malloc(i);
|
||||
VERIFY(size_t(p)%16==0);
|
||||
// if the buffer is wrongly allocated this will give a bad write --> check with valgrind
|
||||
for(int j = 0; j < i; j++) p[j]=0;
|
||||
ei_aligned_free(p);
|
||||
}
|
||||
}
|
||||
|
||||
void check_aligned_new()
|
||||
{
|
||||
for(int i = 1; i < 1000; i++)
|
||||
{
|
||||
float *p = ei_aligned_new<float>(i);
|
||||
VERIFY(size_t(p)%16==0);
|
||||
// if the buffer is wrongly allocated this will give a bad write --> check with valgrind
|
||||
for(int j = 0; j < i; j++) p[j]=0;
|
||||
ei_aligned_delete(p,i);
|
||||
}
|
||||
}
|
||||
|
||||
void check_aligned_stack_alloc()
|
||||
{
|
||||
for(int i = 1; i < 1000; i++)
|
||||
{
|
||||
float *p = ei_aligned_stack_new(float,i);
|
||||
VERIFY(size_t(p)%16==0);
|
||||
// if the buffer is wrongly allocated this will give a bad write --> check with valgrind
|
||||
for(int j = 0; j < i; j++) p[j]=0;
|
||||
ei_aligned_stack_delete(float,p,i);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// test compilation with both a struct and a class...
|
||||
struct MyStruct
|
||||
{
|
||||
@ -49,8 +98,12 @@ template<typename T> void check_dynaligned()
|
||||
|
||||
void test_dynalloc()
|
||||
{
|
||||
// low level dynamic memory allocation
|
||||
CALL_SUBTEST(check_handmade_aligned_malloc());
|
||||
CALL_SUBTEST(check_aligned_malloc());
|
||||
CALL_SUBTEST(check_aligned_new());
|
||||
CALL_SUBTEST(check_aligned_stack_alloc());
|
||||
|
||||
#ifdef EIGEN_VECTORIZE
|
||||
for (int i=0; i<g_repeat*100; ++i)
|
||||
{
|
||||
CALL_SUBTEST( check_dynaligned<Vector4f>() );
|
||||
@ -100,6 +153,4 @@ void test_dynalloc()
|
||||
}
|
||||
}
|
||||
|
||||
#endif // EIGEN_VECTORIZE
|
||||
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user