Added support for 32bit index on a per tensor/tensor expression. This enables us to use 32bit indices to evaluate expressions on GPU faster while keeping the ability to use 64 bit indices to manipulate large tensors on CPU in the same binary.

2025-01-30 17:40:05 +08:00 · 2015-02-27 12:57:13 -08:00 · 2015-02-27 12:57:13 -08:00 · 2386fc8528
commit 2386fc8528
parent 05089aba75
6 changed files with 102 additions and 23 deletions
--- a/Eigen/src/Core/util/Constants.h
+++ b/Eigen/src/Core/util/Constants.h
@ -296,7 +296,11 @@ enum {
  /** Align the matrix itself if it is vectorizable fixed-size */
  AutoAlign = 0,
  /** Don't require alignment for the matrix itself (the array of coefficients, if dynamically allocated, may still be requested to be aligned) */ // FIXME --- clarify the situation
-  DontAlign = 0x2
+  DontAlign = 0x2,
+ /** Use the DenseIndex type to index the matrix/array/tensor. Unless otherwise specified by defining EIGEN_DEFAULT_DENSE_INDEX_TYPE, DenseIndex is a ptrdiff_t. */
+  IndexDefault = 0,
+  /** Use 32bit signed integers to index the matrix/array/tensor. */
+  Index32Bit = 0x4
 };

 /** \ingroup enums
--- a/unsupported/Eigen/CXX11/src/Tensor/Tensor.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/Tensor.h
@ -92,7 +92,7 @@ class Tensor : public TensorBase<Tensor<Scalar_, NumIndices_, Options_> >
    // Metadata
    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index                         rank()                   const { return NumIndices; }
    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index                         dimension(std::size_t n) const { return m_storage.dimensions()[n]; }
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const DSizes<DenseIndex, NumIndices_>& dimensions()    const { return m_storage.dimensions(); }
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions&             dimensions()             const { return m_storage.dimensions(); }
    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index                         size()                   const { return m_storage.size(); }
    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar                        *data()                        { return m_storage.data(); }
    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar                  *data()                  const { return m_storage.data(); }
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorStorage.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorStorage.h
@ -66,14 +66,16 @@ template<typename T, DenseIndex NumIndices_, int Options_>
 class TensorStorage<T, NumIndices_, Dynamic, Options_, void>
  : public TensorStorage<T, NumIndices_, Dynamic, Options_, typename internal::gen_numeric_list_repeated<DenseIndex, NumIndices_, Dynamic>::type>
 {
+  typedef typename internal::compute_index_type<Options_ & Index32Bit>::type Index;
+  typedef DSizes<Index, NumIndices_> Dimensions;
  typedef TensorStorage<T, NumIndices_, Dynamic, Options_, typename internal::gen_numeric_list_repeated<DenseIndex, NumIndices_, Dynamic>::type> Base_;

  public:
-    TensorStorage() { }
-    TensorStorage(const TensorStorage<T, NumIndices_, Dynamic, Options_, void>& other) : Base_(other) { }
+    EIGEN_DEVICE_FUNC TensorStorage() { }
+    EIGEN_DEVICE_FUNC TensorStorage(const TensorStorage<T, NumIndices_, Dynamic, Options_, void>& other) : Base_(other) { }

-    TensorStorage(internal::constructor_without_unaligned_array_assert) : Base_(internal::constructor_without_unaligned_array_assert()) {}
-    TensorStorage(DenseIndex size, const array<DenseIndex, NumIndices_>& dimensions) : Base_(size, dimensions) {}
+    EIGEN_DEVICE_FUNC TensorStorage(internal::constructor_without_unaligned_array_assert) : Base_(internal::constructor_without_unaligned_array_assert()) {}
+    EIGEN_DEVICE_FUNC TensorStorage(DenseIndex size, const array<Index, NumIndices_>& dimensions) : Base_(size, dimensions) {}

  //      TensorStorage<T, NumIndices_, Dynamic, Options_, void>& operator=(const TensorStorage<T, NumIndices_, Dynamic, Options_, void>&) = default;
 };
@ -82,24 +84,26 @@ class TensorStorage<T, NumIndices_, Dynamic, Options_, void>
 template<typename T, DenseIndex NumIndices_, int Options_>
 class TensorStorage<T, NumIndices_, Dynamic, Options_, typename internal::gen_numeric_list_repeated<DenseIndex, NumIndices_, Dynamic>::type>
 {
-    T *m_data;
-    DSizes<DenseIndex, NumIndices_> m_dimensions;
+  public:
+  typedef typename internal::compute_index_type<Options_&Index32Bit>::type Index;
+  typedef DSizes<Index, NumIndices_> Dimensions;

    typedef TensorStorage<T, NumIndices_, Dynamic, Options_, typename internal::gen_numeric_list_repeated<DenseIndex, NumIndices_, Dynamic>::type> Self_;
-  public:
-    TensorStorage() : m_data(0), m_dimensions() {}
-    TensorStorage(internal::constructor_without_unaligned_array_assert)
-      : m_data(0), m_dimensions(internal::template repeat<NumIndices_, DenseIndex>(0)) {}
-    TensorStorage(DenseIndex size, const array<DenseIndex, NumIndices_>& dimensions)
+
+    EIGEN_DEVICE_FUNC TensorStorage() : m_data(0), m_dimensions() {}
+    EIGEN_DEVICE_FUNC TensorStorage(internal::constructor_without_unaligned_array_assert)
+      : m_data(0), m_dimensions(internal::template repeat<NumIndices_, Index>(0)) {}
+    EIGEN_DEVICE_FUNC TensorStorage(Index size, const array<Index, NumIndices_>& dimensions)
        : m_data(internal::conditional_aligned_new_auto<T,(Options_&DontAlign)==0>(size)), m_dimensions(dimensions)
      { EIGEN_INTERNAL_TENSOR_STORAGE_CTOR_PLUGIN }
-      TensorStorage(const Self_& other)
+
+    EIGEN_DEVICE_FUNC TensorStorage(const Self_& other)
      : m_data(internal::conditional_aligned_new_auto<T,(Options_&DontAlign)==0>(internal::array_prod(other.m_dimensions)))
      , m_dimensions(other.m_dimensions)
    {
      internal::smart_copy(other.m_data, other.m_data+internal::array_prod(other.m_dimensions), m_data);
    }
-    Self_& operator=(const Self_& other)
+    EIGEN_DEVICE_FUNC Self_& operator=(const Self_& other)
    {
      if (this != &other) {
        Self_ tmp(other);
@ -108,15 +112,15 @@ class TensorStorage<T, NumIndices_, Dynamic, Options_, typename internal::gen_nu
      return *this;
    }

-    ~TensorStorage() { internal::conditional_aligned_delete_auto<T,(Options_&DontAlign)==0>(m_data, internal::array_prod(m_dimensions)); }
-    void swap(Self_& other)
+    EIGEN_DEVICE_FUNC  ~TensorStorage() { internal::conditional_aligned_delete_auto<T,(Options_&DontAlign)==0>(m_data, internal::array_prod(m_dimensions)); }
+    EIGEN_DEVICE_FUNC  void swap(Self_& other)
    { std::swap(m_data,other.m_data); std::swap(m_dimensions,other.m_dimensions); }

-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const DSizes<DenseIndex, NumIndices_>& dimensions() const {return m_dimensions;}
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const {return m_dimensions;}

-    EIGEN_DEVICE_FUNC void resize(DenseIndex size, const array<DenseIndex, NumIndices_>& nbDimensions)
+    EIGEN_DEVICE_FUNC void resize(Index size, const array<Index, NumIndices_>& nbDimensions)
    {
-      const DenseIndex currentSz = internal::array_prod(m_dimensions);
+      const Index currentSz = internal::array_prod(m_dimensions);
      if(size != currentSz)
      {
        internal::conditional_aligned_delete_auto<T,(Options_&DontAlign)==0>(m_data, currentSz);
@ -132,7 +136,11 @@ class TensorStorage<T, NumIndices_, Dynamic, Options_, typename internal::gen_nu
    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T *data() { return m_data; }
    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const T *data() const { return m_data; }

-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DenseIndex size() const { return m_dimensions.TotalSize(); }
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index size() const { return m_dimensions.TotalSize(); }
+
+ private:
+  T *m_data;
+  Dimensions m_dimensions;
 };

 } // end namespace Eigen
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h
@ -43,13 +43,24 @@ class compute_tensor_flags
    enum { ret = packet_access_bit | aligned_bit};
 };

+template<bool force32bit>
+struct compute_index_type {
+  typedef DenseIndex type;
+};
+
+template<>
+struct compute_index_type<true> {
+  typedef int type;
+};
+
+

 template<typename Scalar_, std::size_t NumIndices_, int Options_>
 struct traits<Tensor<Scalar_, NumIndices_, Options_> >
 {
  typedef Scalar_ Scalar;
  typedef Dense StorageKind;
-  typedef DenseIndex Index;
+  typedef typename compute_index_type<Options_&Index32Bit>::type Index;
  static const int NumDimensions = NumIndices_;
  static const int Layout = Options_ & RowMajor ? RowMajor : ColMajor;
  enum {
@ -64,7 +75,7 @@ struct traits<TensorFixedSize<Scalar_, Dimensions, Options_> >
 {
  typedef Scalar_ Scalar;
  typedef Dense StorageKind;
-  typedef DenseIndex Index;
+  typedef typename compute_index_type<Options_&Index32Bit>::type Index;
  static const int NumDimensions = array_size<Dimensions>::value;
  static const int Layout = Options_ & RowMajor ? RowMajor : ColMajor;
  enum {
--- a/unsupported/test/CMakeLists.txt
+++ b/unsupported/test/CMakeLists.txt
@ -104,6 +104,7 @@ if(EIGEN_TEST_CXX11)
  ei_add_test(cxx11_tensor_assign "-std=c++0x")
  ei_add_test(cxx11_tensor_dimension "-std=c++0x")
  ei_add_test(cxx11_tensor_index_list "-std=c++0x")
+  ei_add_test(cxx11_tensor_mixed_indices "-std=c++0x")
  ei_add_test(cxx11_tensor_comparisons "-std=c++0x")
  ei_add_test(cxx11_tensor_contraction "-std=c++0x")
  ei_add_test(cxx11_tensor_convolution "-std=c++0x")
--- a/unsupported/test/cxx11_tensor_mixed_indices.cpp
+++ b/unsupported/test/cxx11_tensor_mixed_indices.cpp
@ -0,0 +1,55 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#include "main.h"
+
+#include <Eigen/CXX11/Tensor>
+
+using Eigen::Tensor;
+using Eigen::RowMajor;
+
+static void test_simple()
+{
+  Tensor<float, 1> vec1({6});
+  Tensor<float, 1, Index32Bit> vec2({6});
+
+  vec1(0) = 4.0;  vec2(0) = 0.0;
+  vec1(1) = 8.0;  vec2(1) = 1.0;
+  vec1(2) = 15.0; vec2(2) = 2.0;
+  vec1(3) = 16.0; vec2(3) = 3.0;
+  vec1(4) = 23.0; vec2(4) = 4.0;
+  vec1(5) = 42.0; vec2(5) = 5.0;
+
+  float data3[6];
+  TensorMap<Tensor<float, 1>> vec3(data3, 6);
+  vec3 = vec1.sqrt();
+  float data4[6];
+  TensorMap<Tensor<float, 1, Index32Bit>> vec4(data4, 6);
+  vec4 = vec2.square();
+
+  VERIFY_IS_APPROX(vec3(0), sqrtf(4.0));
+  VERIFY_IS_APPROX(vec3(1), sqrtf(8.0));
+  VERIFY_IS_APPROX(vec3(2), sqrtf(15.0));
+  VERIFY_IS_APPROX(vec3(3), sqrtf(16.0));
+  VERIFY_IS_APPROX(vec3(4), sqrtf(23.0));
+  VERIFY_IS_APPROX(vec3(5), sqrtf(42.0));
+
+  VERIFY_IS_APPROX(vec4(0), 0.0f);
+  VERIFY_IS_APPROX(vec4(1), 1.0f);
+  VERIFY_IS_APPROX(vec4(2), 2.0f * 2.0f);
+  VERIFY_IS_APPROX(vec4(3), 3.0f * 3.0f);
+  VERIFY_IS_APPROX(vec4(4), 4.0f * 4.0f);
+  VERIFY_IS_APPROX(vec4(5), 5.0f * 5.0f);
+}
+
+
+void test_cxx11_tensor_mixed_indices()
+{
+  CALL_SUBTEST(test_simple());
+}