Make inverse 3x3 faster and avoid gcc bug.

There seems to be a gcc 4.7 bug that incorrectly flags the current
3x3 inverse as using uninitialized memory.  I'm *pretty* sure it's
a false positive, but it's hard to trigger.  The same warning
does not trigger with clang or later compiler versions.

In trying to find a work-around, this implementation turns out to be
faster anyways for static-sized matrices.

```
name                                            old cpu/op  new cpu/op  delta
BM_Inverse3x3<DynamicMatrix3T<float>>            423ns ± 2%   433ns ± 3%   +2.32%    (p=0.000 n=98+96)
BM_Inverse3x3<DynamicMatrix3T<double>>           425ns ± 2%   427ns ± 3%   +0.48%    (p=0.003 n=99+96)
BM_Inverse3x3<StaticMatrix3T<float>>            7.10ns ± 2%  0.80ns ± 1%  -88.67%  (p=0.000 n=114+112)
BM_Inverse3x3<StaticMatrix3T<double>>           7.45ns ± 2%  1.34ns ± 1%  -82.01%  (p=0.000 n=105+111)
BM_AliasedInverse3x3<DynamicMatrix3T<float>>     409ns ± 3%   419ns ± 3%   +2.40%   (p=0.000 n=100+98)
BM_AliasedInverse3x3<DynamicMatrix3T<double>>    414ns ± 3%   413ns ± 2%     ~       (p=0.322 n=98+98)
BM_AliasedInverse3x3<StaticMatrix3T<float>>     7.57ns ± 1%  0.80ns ± 1%  -89.37%  (p=0.000 n=111+114)
BM_AliasedInverse3x3<StaticMatrix3T<double>>    9.09ns ± 1%  2.58ns ±41%  -71.60%  (p=0.000 n=113+116)
```
This commit is contained in:
Antonio Sanchez 2021-08-04 13:39:09 -07:00 committed by Rasmus Munk Larsen
parent 31f796ebef
commit 5ad8b9bfe2

View File

@ -144,13 +144,18 @@ inline void compute_inverse_size3_helper(
const Matrix<typename ResultType::Scalar,3,1>& cofactors_col0,
ResultType& result)
{
result.row(0) = cofactors_col0 * invdet;
result.coeffRef(1,0) = cofactor_3x3<MatrixType,0,1>(matrix) * invdet;
result.coeffRef(1,1) = cofactor_3x3<MatrixType,1,1>(matrix) * invdet;
// Compute cofactors in a way that avoids aliasing issues.
typedef typename ResultType::Scalar Scalar;
const Scalar c01 = cofactor_3x3<MatrixType,0,1>(matrix) * invdet;
const Scalar c11 = cofactor_3x3<MatrixType,1,1>(matrix) * invdet;
const Scalar c02 = cofactor_3x3<MatrixType,0,2>(matrix) * invdet;
result.coeffRef(1,2) = cofactor_3x3<MatrixType,2,1>(matrix) * invdet;
result.coeffRef(2,0) = cofactor_3x3<MatrixType,0,2>(matrix) * invdet;
result.coeffRef(2,1) = cofactor_3x3<MatrixType,1,2>(matrix) * invdet;
result.coeffRef(2,2) = cofactor_3x3<MatrixType,2,2>(matrix) * invdet;
result.coeffRef(1,0) = c01;
result.coeffRef(1,1) = c11;
result.coeffRef(2,0) = c02;
result.row(0) = cofactors_col0 * invdet;
}
template<typename MatrixType, typename ResultType>
@ -166,12 +171,7 @@ struct compute_inverse<MatrixType, ResultType, 3>
cofactors_col0.coeffRef(2) = cofactor_3x3<MatrixType,2,0>(matrix);
const Scalar det = (cofactors_col0.cwiseProduct(matrix.col(0))).sum();
const Scalar invdet = Scalar(1) / det;
if(extract_data(matrix) != extract_data(result)) {
compute_inverse_size3_helper(matrix, invdet, cofactors_col0, result);
} else {
MatrixType matrix_t = matrix;
compute_inverse_size3_helper(matrix_t, invdet, cofactors_col0, result);
}
compute_inverse_size3_helper(matrix, invdet, cofactors_col0, result);
}
};
@ -187,22 +187,16 @@ struct compute_inverse_and_det_with_check<MatrixType, ResultType, 3>
bool& invertible
)
{
using std::abs;
typedef typename ResultType::Scalar Scalar;
Matrix<Scalar,3,1> cofactors_col0;
cofactors_col0.coeffRef(0) = cofactor_3x3<MatrixType,0,0>(matrix);
cofactors_col0.coeffRef(1) = cofactor_3x3<MatrixType,1,0>(matrix);
cofactors_col0.coeffRef(2) = cofactor_3x3<MatrixType,2,0>(matrix);
determinant = (cofactors_col0.cwiseProduct(matrix.col(0))).sum();
invertible = abs(determinant) > absDeterminantThreshold;
invertible = Eigen::numext::abs(determinant) > absDeterminantThreshold;
if(!invertible) return;
const Scalar invdet = Scalar(1) / determinant;
if(extract_data(matrix) != extract_data(inverse)) {
compute_inverse_size3_helper(matrix, invdet, cofactors_col0, inverse);
} else {
MatrixType matrix_t = matrix;
compute_inverse_size3_helper(matrix_t, invdet, cofactors_col0, inverse);
}
compute_inverse_size3_helper(matrix, invdet, cofactors_col0, inverse);
}
};