mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-01-24 14:45:14 +08:00
do not read buffers out of bounds -- load only the 4 bytes we know exist here. Could also have done a vld1_lane_f32 but doing so here, without the overhead of initializing the unused lane, would have triggered used-of-uninitialized-value errors in tools such as ASan. Note that this code is sub-optimal before or after this change: we should be reading either 2 or 4 float32 values per load-instruction (2 for ARM in-order cores with an affinity for 8-byte loads; 4 for ARM out-of-order cores able to dual-issue 16-byte load instructions with arithmetic instructions). Before or after this patch, we are only loading 4 bytes of useful data here (even if before this patch, we were technically loading 8, only to use only the 4 first).
This commit is contained in:
parent
b131a4db24
commit
a4159dba08
@ -859,7 +859,7 @@ template<>
|
||||
struct gebp_traits <float, float, false, false,Architecture::NEON>
|
||||
: gebp_traits<float,float,false,false,Architecture::Generic>
|
||||
{
|
||||
typedef float32x2_t RhsPacket;
|
||||
typedef float RhsPacket;
|
||||
|
||||
EIGEN_STRONG_INLINE void broadcastRhs(const RhsScalar* b, RhsPacket& b0, RhsPacket& b1, RhsPacket& b2, RhsPacket& b3)
|
||||
{
|
||||
@ -871,7 +871,7 @@ struct gebp_traits <float, float, false, false,Architecture::NEON>
|
||||
|
||||
EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, RhsPacket& dest) const
|
||||
{
|
||||
dest = vld1_f32(b);
|
||||
dest = *b;
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE void loadRhsQuad(const RhsScalar* b, RhsPacket& dest) const
|
||||
@ -881,7 +881,7 @@ struct gebp_traits <float, float, false, false,Architecture::NEON>
|
||||
|
||||
EIGEN_STRONG_INLINE void madd(const LhsPacket& a, const RhsPacket& b, AccPacket& c, RhsPacket& /*tmp*/) const
|
||||
{
|
||||
c = vfmaq_lane_f32(c, a, b, 0);
|
||||
c = vfmaq_n_f32(c, a, b);
|
||||
}
|
||||
};
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user