Pulled latest changes from trunk

This commit is contained in:
Benoit Steiner 2015-04-14 19:13:34 -07:00
commit 0f82399fe9
7 changed files with 85 additions and 24 deletions

View File

@ -169,6 +169,11 @@ if(NOT MSVC)
ei_add_cxx_compiler_flag("-ansi")
endif()
if(ANDROID_NDK)
ei_add_cxx_compiler_flag("-pie")
ei_add_cxx_compiler_flag("-fPIE")
endif()
set(CMAKE_REQUIRED_FLAGS "")
option(EIGEN_TEST_SSE2 "Enable/Disable SSE2 in tests/examples" OFF)
@ -208,7 +213,7 @@ if(NOT MSVC)
endif()
option(EIGEN_TEST_FMA "Enable/Disable FMA in tests/examples" OFF)
if(EIGEN_TEST_FMA)
if(EIGEN_TEST_FMA AND NOT EIGEN_TEST_NEON)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfma")
message(STATUS "Enabling FMA in tests/examples")
endif()
@ -227,7 +232,12 @@ if(NOT MSVC)
option(EIGEN_TEST_NEON "Enable/Disable Neon in tests/examples" OFF)
if(EIGEN_TEST_NEON)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfpu=neon -mfloat-abi=softfp")
if(EIGEN_TEST_FMA)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfpu=neon-vfpv4")
else()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfpu=neon")
endif()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfloat-abi=softfp")
message(STATUS "Enabling NEON in tests/examples")
endif()

View File

@ -112,14 +112,18 @@ void evaluateProductBlockingSizesHeuristic(Index& k, Index& m, Index& n, Index n
nr = Traits::nr,
nr_mask = (0xffffffff/nr)*nr
};
Index k_cache = (l1-ksub)/kdiv;
// Increasing k gives us more time to prefetch the content of the "C"
// registers. However once the latency is hidden there is no point in
// increasing the value of k, so we'll cap it at 320 (value determined
// experimentally).
const Index k_cache = (std::min<Index>)((l1-ksub)/kdiv, 320);
if (k_cache < k) {
k = k_cache & k_mask;
eigen_internal_assert(k > 0);
}
Index n_cache = (l2-l1) / (nr * sizeof(RhsScalar) * k);
Index n_per_thread = numext::div_ceil(n, num_threads);
const Index n_cache = (l2-l1) / (nr * sizeof(RhsScalar) * k);
const Index n_per_thread = numext::div_ceil(n, num_threads);
if (n_cache <= n_per_thread) {
// Don't exceed the capacity of the l2 cache.
eigen_internal_assert(n_cache >= static_cast<Index>(nr));
@ -131,8 +135,8 @@ void evaluateProductBlockingSizesHeuristic(Index& k, Index& m, Index& n, Index n
if (l3 > l2) {
// l3 is shared between all cores, so we'll give each thread its own chunk of l3.
Index m_cache = (l3-l2) / (sizeof(LhsScalar) * k * num_threads);
Index m_per_thread = numext::div_ceil(m, num_threads);
const Index m_cache = (l3-l2) / (sizeof(LhsScalar) * k * num_threads);
const Index m_per_thread = numext::div_ceil(m, num_threads);
if(m_cache < m_per_thread && m_cache >= static_cast<Index>(mr)) {
m = m_cache & mr_mask;
eigen_internal_assert(m > 0);
@ -380,11 +384,14 @@ public:
nr = 4,
// register block size along the M direction (currently, this one cannot be modified)
default_mr = (EIGEN_PLAIN_ENUM_MIN(16,NumberOfRegisters)/2/nr)*LhsPacketSize,
#if defined(EIGEN_HAS_SINGLE_INSTRUCTION_MADD) && !defined(EIGEN_VECTORIZE_ALTIVEC) && !defined(EIGEN_VECTORIZE_VSX)
// we assume 16 registers
mr = 3*LhsPacketSize,
// See bug 992, if the scalar type is not vectorizable but that EIGEN_HAS_SINGLE_INSTRUCTION_MADD is defined,
// then using 3*LhsPacketSize triggers non-implemented paths in syrk.
mr = Vectorizable ? 3*LhsPacketSize : default_mr,
#else
mr = (EIGEN_PLAIN_ENUM_MIN(16,NumberOfRegisters)/2/nr)*LhsPacketSize,
mr = default_mr,
#endif
LhsProgress = LhsPacketSize,

View File

@ -75,7 +75,7 @@ struct sparse_solve_triangular_selector<Lhs,Rhs,Mode,Upper,RowMajor>
for(Index i=lhs.rows()-1 ; i>=0 ; --i)
{
Scalar tmp = other.coeff(i,col);
Scalar l_ii = 0;
Scalar l_ii(0);
LhsIterator it(lhsEval, i);
while(it && it.index()<i)
++it;

View File

@ -49,8 +49,8 @@
#include "unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h"
#include "unsupported/Eigen/CXX11/src/Tensor/TensorDeviceType.h"
#include "unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h"
#include "unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h"
#include "unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h"
#include "unsupported/Eigen/CXX11/src/Tensor/TensorInitializer.h"
#include "unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h"
#include "unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h"

View File

@ -157,6 +157,8 @@ struct TensorEvaluator<const TensorChippingOp<DimId, ArgType>, Device>
eigen_assert(NumInputDims > m_dim.actualDim());
const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions();
eigen_assert(op.offset() < input_dims[m_dim.actualDim()]);
int j = 0;
for (int i = 0; i < NumInputDims; ++i) {
if (i != m_dim.actualDim()) {
@ -246,7 +248,9 @@ struct TensorEvaluator<const TensorChippingOp<DimId, ArgType>, Device>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar* data() const {
Scalar* result = m_impl.data();
if (m_dim.actualDim() == NumDims && result) {
if (((static_cast<int>(Layout) == static_cast<int>(ColMajor) && m_dim.actualDim() == NumDims) ||
(static_cast<int>(Layout) == static_cast<int>(RowMajor) && m_dim.actualDim() == 0)) &&
result) {
return result + m_inputOffset;
} else {
return NULL;

View File

@ -340,11 +340,9 @@ static void test_chip_as_lvalue()
}
}
template<int DataLayout>
static void test_chip_raw_data()
static void test_chip_raw_data_col_major()
{
Tensor<float, 5, DataLayout> tensor(2,3,5,7,11);
Tensor<float, 5, ColMajor> tensor(2,3,5,7,11);
tensor.setRandom();
typedef TensorEvaluator<decltype(tensor.template chip<4>(3)), DefaultDevice> Evaluator4;
@ -353,12 +351,7 @@ static void test_chip_raw_data()
for (int j = 0; j < 3; ++j) {
for (int k = 0; k < 5; ++k) {
for (int l = 0; l < 7; ++l) {
int chip_index;
if (DataLayout == ColMajor) {
chip_index = i + 2 * (j + 3 * (k + 5 * l));
} else {
chip_index = 11 * (l + 7 * (k + 5 * (j + 3 * i)));
}
int chip_index = i + 2 * (j + 3 * (k + 5 * l));
VERIFY_IS_EQUAL(chip.data()[chip_index], tensor(i,j,k,l,3));
}
}
@ -382,6 +375,41 @@ static void test_chip_raw_data()
VERIFY_IS_EQUAL(chip3.data(), static_cast<float*>(0));
}
static void test_chip_raw_data_row_major()
{
Tensor<float, 5, RowMajor> tensor(11,7,5,3,2);
tensor.setRandom();
typedef TensorEvaluator<decltype(tensor.template chip<0>(3)), DefaultDevice> Evaluator0;
auto chip = Evaluator0(tensor.template chip<0>(3), DefaultDevice());
for (int i = 0; i < 7; ++i) {
for (int j = 0; j < 5; ++j) {
for (int k = 0; k < 3; ++k) {
for (int l = 0; l < 2; ++l) {
int chip_index = l + 2 * (k + 3 * (j + 5 * i));
VERIFY_IS_EQUAL(chip.data()[chip_index], tensor(3,i,j,k,l));
}
}
}
}
typedef TensorEvaluator<decltype(tensor.template chip<1>(0)), DefaultDevice> Evaluator1;
auto chip1 = Evaluator1(tensor.template chip<1>(0), DefaultDevice());
VERIFY_IS_EQUAL(chip1.data(), static_cast<float*>(0));
typedef TensorEvaluator<decltype(tensor.template chip<2>(0)), DefaultDevice> Evaluator2;
auto chip2 = Evaluator2(tensor.template chip<2>(0), DefaultDevice());
VERIFY_IS_EQUAL(chip2.data(), static_cast<float*>(0));
typedef TensorEvaluator<decltype(tensor.template chip<3>(0)), DefaultDevice> Evaluator3;
auto chip3 = Evaluator3(tensor.template chip<3>(0), DefaultDevice());
VERIFY_IS_EQUAL(chip3.data(), static_cast<float*>(0));
typedef TensorEvaluator<decltype(tensor.template chip<4>(0)), DefaultDevice> Evaluator4;
auto chip4 = Evaluator4(tensor.template chip<4>(0), DefaultDevice());
VERIFY_IS_EQUAL(chip4.data(), static_cast<float*>(0));
}
void test_cxx11_tensor_chipping()
{
CALL_SUBTEST(test_simple_chip<ColMajor>());
@ -392,6 +420,6 @@ void test_cxx11_tensor_chipping()
CALL_SUBTEST(test_chip_in_expr<RowMajor>());
CALL_SUBTEST(test_chip_as_lvalue<ColMajor>());
CALL_SUBTEST(test_chip_as_lvalue<RowMajor>());
CALL_SUBTEST(test_chip_raw_data<ColMajor>());
CALL_SUBTEST(test_chip_raw_data<RowMajor>());
CALL_SUBTEST(test_chip_raw_data_col_major());
CALL_SUBTEST(test_chip_raw_data_row_major());
}

View File

@ -255,6 +255,17 @@ static void test_mixed_index_list()
VERIFY_IS_APPROX(result3(0), expected);
}
static void test_dim_check()
{
Eigen::IndexList<Eigen::type2index<1>, int> dim1;
dim1.set(1, 2);
Eigen::IndexList<Eigen::type2index<1>, int> dim2;
dim2.set(1, 2);
VERIFY(dimensions_match(dim1, dim2));
}
#endif
void test_cxx11_tensor_index_list()
@ -264,5 +275,6 @@ void test_cxx11_tensor_index_list()
CALL_SUBTEST(test_type2index_list());
CALL_SUBTEST(test_dynamic_index_list());
CALL_SUBTEST(test_mixed_index_list());
CALL_SUBTEST(test_dim_check());
#endif
}