mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-03-31 19:00:35 +08:00
Fix out-of-range memory access in GEMV (the memory was not used for the computation, only to assemble unaligned packets from aligned packet loads)
(transplanted from 221f54698c2f6690da8c0f44c1e31e55118dedab )
This commit is contained in:
parent
d0ee31aea6
commit
c5031edb92
@ -88,7 +88,7 @@ EIGEN_DONT_INLINE static void run(
|
||||
// Here we assume data are at least aligned on the base scalar type.
|
||||
Index alignedStart = internal::first_aligned(res,size);
|
||||
Index alignedSize = ResPacketSize>1 ? alignedStart + ((size-alignedStart) & ~ResPacketAlignedMask) : 0;
|
||||
const Index peeledSize = peels>1 ? alignedStart + ((alignedSize-alignedStart) & ~PeelAlignedMask) : alignedStart;
|
||||
const Index peeledSize = alignedSize - RhsPacketSize*peels - RhsPacketSize + 1;
|
||||
|
||||
const Index alignmentStep = LhsPacketSize>1 ? (LhsPacketSize - lhsStride % LhsPacketSize) & LhsPacketAlignedMask : 0;
|
||||
Index alignmentPattern = alignmentStep==0 ? AllAligned
|
||||
@ -177,6 +177,8 @@ EIGEN_DONT_INLINE static void run(
|
||||
_EIGEN_ACCUMULATE_PACKETS(d,du,d);
|
||||
break;
|
||||
case FirstAligned:
|
||||
{
|
||||
Index j = alignedStart;
|
||||
if(peels>1)
|
||||
{
|
||||
LhsPacket A00, A01, A02, A03, A10, A11, A12, A13;
|
||||
@ -186,7 +188,7 @@ EIGEN_DONT_INLINE static void run(
|
||||
A02 = pload<LhsPacket>(&lhs2[alignedStart-2]);
|
||||
A03 = pload<LhsPacket>(&lhs3[alignedStart-3]);
|
||||
|
||||
for (Index j = alignedStart; j<peeledSize; j+=peels*ResPacketSize)
|
||||
for (; j<peeledSize; j+=peels*ResPacketSize)
|
||||
{
|
||||
A11 = pload<LhsPacket>(&lhs1[j-1+LhsPacketSize]); palign<1>(A01,A11);
|
||||
A12 = pload<LhsPacket>(&lhs2[j-2+LhsPacketSize]); palign<2>(A02,A12);
|
||||
@ -210,9 +212,10 @@ EIGEN_DONT_INLINE static void run(
|
||||
pstore(&res[j+ResPacketSize],T1);
|
||||
}
|
||||
}
|
||||
for (Index j = peeledSize; j<alignedSize; j+=ResPacketSize)
|
||||
for (; j<alignedSize; j+=ResPacketSize)
|
||||
_EIGEN_ACCUMULATE_PACKETS(d,du,du);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
for (Index j = alignedStart; j<alignedSize; j+=ResPacketSize)
|
||||
_EIGEN_ACCUMULATE_PACKETS(du,du,du);
|
||||
@ -340,7 +343,7 @@ EIGEN_DONT_INLINE static void run(
|
||||
// if that's not the case then vectorization is discarded, see below.
|
||||
Index alignedStart = internal::first_aligned(rhs, depth);
|
||||
Index alignedSize = RhsPacketSize>1 ? alignedStart + ((depth-alignedStart) & ~RhsPacketAlignedMask) : 0;
|
||||
const Index peeledSize = peels>1 ? alignedStart + ((alignedSize-alignedStart) & ~PeelAlignedMask) : alignedStart;
|
||||
const Index peeledSize = alignedSize - RhsPacketSize*peels - RhsPacketSize + 1;
|
||||
|
||||
const Index alignmentStep = LhsPacketSize>1 ? (LhsPacketSize - lhsStride % LhsPacketSize) & LhsPacketAlignedMask : 0;
|
||||
Index alignmentPattern = alignmentStep==0 ? AllAligned
|
||||
@ -430,10 +433,12 @@ EIGEN_DONT_INLINE static void run(
|
||||
_EIGEN_ACCUMULATE_PACKETS(d,du,d);
|
||||
break;
|
||||
case FirstAligned:
|
||||
{
|
||||
Index j = alignedStart;
|
||||
if (peels>1)
|
||||
{
|
||||
/* Here we proccess 4 rows with with two peeled iterations to hide
|
||||
* tghe overhead of unaligned loads. Moreover unaligned loads are handled
|
||||
* the overhead of unaligned loads. Moreover unaligned loads are handled
|
||||
* using special shift/move operations between the two aligned packets
|
||||
* overlaping the desired unaligned packet. This is *much* more efficient
|
||||
* than basic unaligned loads.
|
||||
@ -443,7 +448,7 @@ EIGEN_DONT_INLINE static void run(
|
||||
A02 = pload<LhsPacket>(&lhs2[alignedStart-2]);
|
||||
A03 = pload<LhsPacket>(&lhs3[alignedStart-3]);
|
||||
|
||||
for (Index j = alignedStart; j<peeledSize; j+=peels*RhsPacketSize)
|
||||
for (; j<peeledSize; j+=peels*RhsPacketSize)
|
||||
{
|
||||
RhsPacket b = pload<RhsPacket>(&rhs[j]);
|
||||
A11 = pload<LhsPacket>(&lhs1[j-1+LhsPacketSize]); palign<1>(A01,A11);
|
||||
@ -465,9 +470,10 @@ EIGEN_DONT_INLINE static void run(
|
||||
ptmp3 = pcj.pmadd(A13, b, ptmp3);
|
||||
}
|
||||
}
|
||||
for (Index j = peeledSize; j<alignedSize; j+=RhsPacketSize)
|
||||
for (; j<alignedSize; j+=RhsPacketSize)
|
||||
_EIGEN_ACCUMULATE_PACKETS(d,du,du);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
for (Index j = alignedStart; j<alignedSize; j+=RhsPacketSize)
|
||||
_EIGEN_ACCUMULATE_PACKETS(du,du,du);
|
||||
|
@ -135,6 +135,35 @@ void zero_sized_objects()
|
||||
a*b;
|
||||
}
|
||||
|
||||
void unaligned_objects()
|
||||
{
|
||||
// Regression test for the bug reported here:
|
||||
// http://forum.kde.org/viewtopic.php?f=74&t=107541
|
||||
// Recall the matrix*vector kernel avoid unaligned loads by loading two packets and then reassemble then.
|
||||
// There was a mistake in the computation of the valid range for fully unaligned objects: in some rare cases,
|
||||
// memory was read outside the allocated matrix memory. Though the values were not used, this might raise segfault.
|
||||
for(int m=450;m<460;++m)
|
||||
{
|
||||
for(int n=8;n<12;++n)
|
||||
{
|
||||
MatrixXf M(m, n);
|
||||
VectorXf v1(n), r1(500);
|
||||
RowVectorXf v2(m), r2(16);
|
||||
|
||||
M.setRandom();
|
||||
v1.setRandom();
|
||||
v2.setRandom();
|
||||
for(int o=0; o<4; ++o)
|
||||
{
|
||||
r1.segment(o,m).noalias() = M * v1;
|
||||
VERIFY_IS_APPROX(r1.segment(o,m), M * MatrixXf(v1));
|
||||
r2.segment(o,n).noalias() = v2 * M;
|
||||
VERIFY_IS_APPROX(r2.segment(o,n), MatrixXf(v2) * M);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void test_product_extra()
|
||||
{
|
||||
for(int i = 0; i < g_repeat; i++) {
|
||||
@ -143,6 +172,7 @@ void test_product_extra()
|
||||
CALL_SUBTEST_2( mat_mat_scalar_scalar_product() );
|
||||
CALL_SUBTEST_3( product_extra(MatrixXcf(internal::random<int>(1,EIGEN_TEST_MAX_SIZE/2), internal::random<int>(1,EIGEN_TEST_MAX_SIZE/2))) );
|
||||
CALL_SUBTEST_4( product_extra(MatrixXcd(internal::random<int>(1,EIGEN_TEST_MAX_SIZE/2), internal::random<int>(1,EIGEN_TEST_MAX_SIZE/2))) );
|
||||
CALL_SUBTEST_5( zero_sized_objects() );
|
||||
}
|
||||
CALL_SUBTEST_5( zero_sized_objects() );
|
||||
CALL_SUBTEST_6( unaligned_objects() );
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user