From 2386fc8528fa8f923b0300af6ddc4cd46a178afd Mon Sep 17 00:00:00 2001
From: Benoit Steiner <benoit.steiner.goog@gmail.com>
Date: Fri, 27 Feb 2015 12:57:13 -0800
Subject: [PATCH] Added support for 32bit index on a per tensor/tensor
 expression. This enables us to use 32bit indices to evaluate expressions on
 GPU faster while keeping the ability to use 64 bit indices to manipulate
 large tensors on CPU in the same binary.

---
 Eigen/src/Core/util/Constants.h               |  6 +-
 unsupported/Eigen/CXX11/src/Tensor/Tensor.h   |  2 +-
 .../Eigen/CXX11/src/Tensor/TensorStorage.h    | 46 +++++++++-------
 .../Eigen/CXX11/src/Tensor/TensorTraits.h     | 15 ++++-
 unsupported/test/CMakeLists.txt               |  1 +
 .../test/cxx11_tensor_mixed_indices.cpp       | 55 +++++++++++++++++++
 6 files changed, 102 insertions(+), 23 deletions(-)
 create mode 100644 unsupported/test/cxx11_tensor_mixed_indices.cpp
diff --git a/Eigen/src/Core/util/Constants.h b/Eigen/src/Core/util/Constants.h
index d1855b50b..fea75a004 100644
--- a/Eigen/src/Core/util/Constants.h
+++ b/Eigen/src/Core/util/Constants.h
@@ -296,7 +296,11 @@ enum {
   /** Align the matrix itself if it is vectorizable fixed-size */
   AutoAlign = 0,
   /** Don't require alignment for the matrix itself (the array of coefficients, if dynamically allocated, may still be requested to be aligned) */ // FIXME --- clarify the situation
-  DontAlign = 0x2
+  DontAlign = 0x2,
+ /** Use the DenseIndex type to index the matrix/array/tensor. Unless otherwise specified by defining EIGEN_DEFAULT_DENSE_INDEX_TYPE, DenseIndex is a ptrdiff_t. */
+  IndexDefault = 0,
+  /** Use 32bit signed integers to index the matrix/array/tensor. */
+  Index32Bit = 0x4
 };
 
 /** \ingroup enums
diff --git a/unsupported/Eigen/CXX11/src/Tensor/Tensor.h b/unsupported/Eigen/CXX11/src/Tensor/Tensor.h
index 037219f23..87ced2cce 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/Tensor.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/Tensor.h
@@ -92,7 +92,7 @@ class Tensor : public TensorBase<Tensor<Scalar_, NumIndices_, Options_> >
     // Metadata
     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index                         rank()                   const { return NumIndices; }
     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index                         dimension(std::size_t n) const { return m_storage.dimensions()[n]; }
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const DSizes<DenseIndex, NumIndices_>& dimensions()    const { return m_storage.dimensions(); }
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions&             dimensions()             const { return m_storage.dimensions(); }
     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index                         size()                   const { return m_storage.size(); }
     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar                        *data()                        { return m_storage.data(); }
     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar                  *data()                  const { return m_storage.data(); }
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorStorage.h b/unsupported/Eigen/CXX11/src/Tensor/TensorStorage.h
index 1b227e8c2..91aae091c 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorStorage.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorStorage.h
@@ -66,14 +66,16 @@ template<typename T, DenseIndex NumIndices_, int Options_>
 class TensorStorage<T, NumIndices_, Dynamic, Options_, void>
   : public TensorStorage<T, NumIndices_, Dynamic, Options_, typename internal::gen_numeric_list_repeated<DenseIndex, NumIndices_, Dynamic>::type>
 {
+  typedef typename internal::compute_index_type<Options_ & Index32Bit>::type Index;
+  typedef DSizes<Index, NumIndices_> Dimensions;
   typedef TensorStorage<T, NumIndices_, Dynamic, Options_, typename internal::gen_numeric_list_repeated<DenseIndex, NumIndices_, Dynamic>::type> Base_;
 
   public:
-    TensorStorage() { }
-    TensorStorage(const TensorStorage<T, NumIndices_, Dynamic, Options_, void>& other) : Base_(other) { }
+    EIGEN_DEVICE_FUNC TensorStorage() { }
+    EIGEN_DEVICE_FUNC TensorStorage(const TensorStorage<T, NumIndices_, Dynamic, Options_, void>& other) : Base_(other) { }
 
-    TensorStorage(internal::constructor_without_unaligned_array_assert) : Base_(internal::constructor_without_unaligned_array_assert()) {}
-    TensorStorage(DenseIndex size, const array<DenseIndex, NumIndices_>& dimensions) : Base_(size, dimensions) {}
+    EIGEN_DEVICE_FUNC TensorStorage(internal::constructor_without_unaligned_array_assert) : Base_(internal::constructor_without_unaligned_array_assert()) {}
+    EIGEN_DEVICE_FUNC TensorStorage(DenseIndex size, const array<Index, NumIndices_>& dimensions) : Base_(size, dimensions) {}
 
   //      TensorStorage<T, NumIndices_, Dynamic, Options_, void>& operator=(const TensorStorage<T, NumIndices_, Dynamic, Options_, void>&) = default;
 };
@@ -82,24 +84,26 @@ class TensorStorage<T, NumIndices_, Dynamic, Options_, void>
 template<typename T, DenseIndex NumIndices_, int Options_>
 class TensorStorage<T, NumIndices_, Dynamic, Options_, typename internal::gen_numeric_list_repeated<DenseIndex, NumIndices_, Dynamic>::type>
 {
-    T *m_data;
-    DSizes<DenseIndex, NumIndices_> m_dimensions;
+  public:
+  typedef typename internal::compute_index_type<Options_&Index32Bit>::type Index;
+  typedef DSizes<Index, NumIndices_> Dimensions;
 
     typedef TensorStorage<T, NumIndices_, Dynamic, Options_, typename internal::gen_numeric_list_repeated<DenseIndex, NumIndices_, Dynamic>::type> Self_;
-  public:
-    TensorStorage() : m_data(0), m_dimensions() {}
-    TensorStorage(internal::constructor_without_unaligned_array_assert)
-      : m_data(0), m_dimensions(internal::template repeat<NumIndices_, DenseIndex>(0)) {}
-    TensorStorage(DenseIndex size, const array<DenseIndex, NumIndices_>& dimensions)
+
+    EIGEN_DEVICE_FUNC TensorStorage() : m_data(0), m_dimensions() {}
+    EIGEN_DEVICE_FUNC TensorStorage(internal::constructor_without_unaligned_array_assert)
+      : m_data(0), m_dimensions(internal::template repeat<NumIndices_, Index>(0)) {}
+    EIGEN_DEVICE_FUNC TensorStorage(Index size, const array<Index, NumIndices_>& dimensions)
         : m_data(internal::conditional_aligned_new_auto<T,(Options_&DontAlign)==0>(size)), m_dimensions(dimensions)
       { EIGEN_INTERNAL_TENSOR_STORAGE_CTOR_PLUGIN }
-      TensorStorage(const Self_& other)
+
+    EIGEN_DEVICE_FUNC TensorStorage(const Self_& other)
       : m_data(internal::conditional_aligned_new_auto<T,(Options_&DontAlign)==0>(internal::array_prod(other.m_dimensions)))
       , m_dimensions(other.m_dimensions)
     {
       internal::smart_copy(other.m_data, other.m_data+internal::array_prod(other.m_dimensions), m_data);
     }
-    Self_& operator=(const Self_& other)
+    EIGEN_DEVICE_FUNC Self_& operator=(const Self_& other)
     {
       if (this != &other) {
         Self_ tmp(other);
@@ -108,15 +112,15 @@ class TensorStorage<T, NumIndices_, Dynamic, Options_, typename internal::gen_nu
       return *this;
     }
 
-    ~TensorStorage() { internal::conditional_aligned_delete_auto<T,(Options_&DontAlign)==0>(m_data, internal::array_prod(m_dimensions)); }
-    void swap(Self_& other)
+    EIGEN_DEVICE_FUNC  ~TensorStorage() { internal::conditional_aligned_delete_auto<T,(Options_&DontAlign)==0>(m_data, internal::array_prod(m_dimensions)); }
+    EIGEN_DEVICE_FUNC  void swap(Self_& other)
     { std::swap(m_data,other.m_data); std::swap(m_dimensions,other.m_dimensions); }
 
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const DSizes<DenseIndex, NumIndices_>& dimensions() const {return m_dimensions;}
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const {return m_dimensions;}
 
-    EIGEN_DEVICE_FUNC void resize(DenseIndex size, const array<DenseIndex, NumIndices_>& nbDimensions)
+    EIGEN_DEVICE_FUNC void resize(Index size, const array<Index, NumIndices_>& nbDimensions)
     {
-      const DenseIndex currentSz = internal::array_prod(m_dimensions);
+      const Index currentSz = internal::array_prod(m_dimensions);
       if(size != currentSz)
       {
         internal::conditional_aligned_delete_auto<T,(Options_&DontAlign)==0>(m_data, currentSz);
@@ -132,7 +136,11 @@ class TensorStorage<T, NumIndices_, Dynamic, Options_, typename internal::gen_nu
     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T *data() { return m_data; }
     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const T *data() const { return m_data; }
 
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DenseIndex size() const { return m_dimensions.TotalSize(); }
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index size() const { return m_dimensions.TotalSize(); }
+
+ private:
+  T *m_data;
+  Dimensions m_dimensions;
 };
 
 } // end namespace Eigen
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h b/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h
index a844a4d68..424bb24eb 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h
@@ -43,13 +43,24 @@ class compute_tensor_flags
     enum { ret = packet_access_bit | aligned_bit};
 };
 
+template<bool force32bit>
+struct compute_index_type {
+  typedef DenseIndex type;
+};
+
+template<>
+struct compute_index_type<true> {
+  typedef int type;
+};
+
+
 
 template<typename Scalar_, std::size_t NumIndices_, int Options_>
 struct traits<Tensor<Scalar_, NumIndices_, Options_> >
 {
   typedef Scalar_ Scalar;
   typedef Dense StorageKind;
-  typedef DenseIndex Index;
+  typedef typename compute_index_type<Options_&Index32Bit>::type Index;
   static const int NumDimensions = NumIndices_;
   static const int Layout = Options_ & RowMajor ? RowMajor : ColMajor;
   enum {
@@ -64,7 +75,7 @@ struct traits<TensorFixedSize<Scalar_, Dimensions, Options_> >
 {
   typedef Scalar_ Scalar;
   typedef Dense StorageKind;
-  typedef DenseIndex Index;
+  typedef typename compute_index_type<Options_&Index32Bit>::type Index;
   static const int NumDimensions = array_size<Dimensions>::value;
   static const int Layout = Options_ & RowMajor ? RowMajor : ColMajor;
   enum {
diff --git a/unsupported/test/CMakeLists.txt b/unsupported/test/CMakeLists.txt
index 8542dd853..806ea77b5 100644
--- a/unsupported/test/CMakeLists.txt
+++ b/unsupported/test/CMakeLists.txt
@@ -104,6 +104,7 @@ if(EIGEN_TEST_CXX11)
   ei_add_test(cxx11_tensor_assign "-std=c++0x")
   ei_add_test(cxx11_tensor_dimension "-std=c++0x")
   ei_add_test(cxx11_tensor_index_list "-std=c++0x")
+  ei_add_test(cxx11_tensor_mixed_indices "-std=c++0x")
   ei_add_test(cxx11_tensor_comparisons "-std=c++0x")
   ei_add_test(cxx11_tensor_contraction "-std=c++0x")
   ei_add_test(cxx11_tensor_convolution "-std=c++0x")
diff --git a/unsupported/test/cxx11_tensor_mixed_indices.cpp b/unsupported/test/cxx11_tensor_mixed_indices.cpp
new file mode 100644
index 000000000..8a12f9207
--- /dev/null
+++ b/unsupported/test/cxx11_tensor_mixed_indices.cpp
@@ -0,0 +1,55 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#include "main.h"
+
+#include <Eigen/CXX11/Tensor>
+
+using Eigen::Tensor;
+using Eigen::RowMajor;
+
+static void test_simple()
+{
+  Tensor<float, 1> vec1({6});
+  Tensor<float, 1, Index32Bit> vec2({6});
+
+  vec1(0) = 4.0;  vec2(0) = 0.0;
+  vec1(1) = 8.0;  vec2(1) = 1.0;
+  vec1(2) = 15.0; vec2(2) = 2.0;
+  vec1(3) = 16.0; vec2(3) = 3.0;
+  vec1(4) = 23.0; vec2(4) = 4.0;
+  vec1(5) = 42.0; vec2(5) = 5.0;
+
+  float data3[6];
+  TensorMap<Tensor<float, 1>> vec3(data3, 6);
+  vec3 = vec1.sqrt();
+  float data4[6];
+  TensorMap<Tensor<float, 1, Index32Bit>> vec4(data4, 6);
+  vec4 = vec2.square();
+
+  VERIFY_IS_APPROX(vec3(0), sqrtf(4.0));
+  VERIFY_IS_APPROX(vec3(1), sqrtf(8.0));
+  VERIFY_IS_APPROX(vec3(2), sqrtf(15.0));
+  VERIFY_IS_APPROX(vec3(3), sqrtf(16.0));
+  VERIFY_IS_APPROX(vec3(4), sqrtf(23.0));
+  VERIFY_IS_APPROX(vec3(5), sqrtf(42.0));
+
+  VERIFY_IS_APPROX(vec4(0), 0.0f);
+  VERIFY_IS_APPROX(vec4(1), 1.0f);
+  VERIFY_IS_APPROX(vec4(2), 2.0f * 2.0f);
+  VERIFY_IS_APPROX(vec4(3), 3.0f * 3.0f);
+  VERIFY_IS_APPROX(vec4(4), 4.0f * 4.0f);
+  VERIFY_IS_APPROX(vec4(5), 5.0f * 5.0f);
+}
+
+
+void test_cxx11_tensor_mixed_indices()
+{
+  CALL_SUBTEST(test_simple());
+}