make the number of registers easier to configure per architectures

This commit is contained in:
Gael Guennebaud 2010-03-04 18:58:12 +01:00
parent 8ed1ef4469
commit ea8cad5151
3 changed files with 21 additions and 6 deletions

View File

@ -62,4 +62,17 @@
#define EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH 8
#endif
/** Defines the default number of registers available for that architecture.
* Currently it must be 8 or 16. Other values will fail.
*/
#ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS
#if (defined __i386__)
#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 8
#else
#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 16
#endif
#endif
#endif // EIGEN_DEFAULT_SETTINGS_H

View File

@ -35,6 +35,12 @@
#define EIGEN_TUNE_FOR_CPU_CACHE_SIZE 4*96*96
#endif
// FIXME NEON has 16 quad registers, but since the current register allocator
// is so bad, it is much better to reduce it to 8
#ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS
#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 8
#endif
typedef float32x4_t Packet4f;
typedef int32x4_t Packet4i;

View File

@ -130,14 +130,10 @@ struct ei_product_blocking_traits
typedef typename ei_packet_traits<Scalar>::type PacketType;
enum {
PacketSize = sizeof(PacketType)/sizeof(Scalar),
#if (defined __i386__)
HalfRegisterCount = 4,
#else
HalfRegisterCount = 8,
#endif
NumberOfRegisters = EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS,
// register block size along the N direction (must be either 2 or 4)
nr = HalfRegisterCount/2,
nr = NumberOfRegisters/4,
// register block size along the M direction (currently, this one cannot be modified)
mr = 2 * PacketSize,