mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-03-07 18:27:40 +08:00
Allow vectorized padding on GPU. This helps speed things up a little
Before: BM_padding/10 5000000 460 217.03 MFlops/s BM_padding/80 5000000 460 13899.40 MFlops/s BM_padding/640 5000000 461 888421.17 MFlops/s BM_padding/4K 5000000 460 54316322.55 MFlops/s After: BM_padding/10 5000000 454 220.20 MFlops/s BM_padding/80 5000000 455 14039.86 MFlops/s BM_padding/640 5000000 452 904968.83 MFlops/s BM_padding/4K 5000000 411 60750049.21 MFlops/s
This commit is contained in:
parent
86da77cb9b
commit
5fa27574dd
@ -93,7 +93,7 @@ struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device
|
||||
static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
|
||||
|
||||
enum {
|
||||
IsAligned = false,
|
||||
IsAligned = true,
|
||||
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
|
||||
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
||||
CoordAccess = true,
|
||||
|
Loading…
Reference in New Issue
Block a user