eigen/test/assignment_threaded.cpp

// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2023 Charlie Schlosser <cs.schlosser@gmail.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.

#define EIGEN_USE_THREADS 1

#include "main.h"
#include <Eigen/ThreadPool>

namespace Eigen {
namespace internal {
// conveniently control vectorization logic
template <typename Scalar, bool Vectorize>
struct scalar_dummy_op {
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const Scalar& a) const { return a; }
  template <typename Packet>
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& a) const {
    return a;
  }
};
template <typename Scalar, bool Vectorize>
struct functor_traits<scalar_dummy_op<Scalar, Vectorize> > {
  enum { Cost = 1'000'000, PacketAccess = Vectorize && packet_traits<Scalar>::Vectorizable };
};
}  // namespace internal
}  // namespace Eigen

template <typename PlainObject>
void test_threaded_assignment(const PlainObject&, Index rows = PlainObject::RowsAtCompileTime,
                              Index cols = PlainObject::ColsAtCompileTime) {
  using Scalar = typename PlainObject::Scalar;
  using VectorizationOff = internal::scalar_dummy_op<Scalar, false>;
  using VectorizationOn = internal::scalar_dummy_op<Scalar, true>;

  int threads = 4;
  ThreadPool pool(threads);
  CoreThreadPoolDevice threadPoolDevice(pool);

  PlainObject dst(rows, cols), ref(rows, cols), rhs(rows, cols);
  rhs.setRandom();
  const auto rhs_xpr = rhs.cwiseAbs2();

  // linear access
  dst.setRandom();
  ref.setRandom();
  ref = rhs_xpr.unaryExpr(VectorizationOff());
  dst.device(threadPoolDevice) = rhs_xpr.unaryExpr(VectorizationOff());
  VERIFY_IS_CWISE_EQUAL(ref, dst);

  ref = rhs_xpr.unaryExpr(VectorizationOn());
  dst.device(threadPoolDevice) = rhs_xpr.unaryExpr(VectorizationOn());
  VERIFY_IS_CWISE_EQUAL(ref, dst);

  // outer-inner access
  Index blockRows = numext::maxi(Index(1), rows - 1);
  Index blockCols = numext::maxi(Index(1), cols - 1);
  dst.setRandom();
  ref.setRandom();
  ref.bottomRightCorner(blockRows, blockCols) =
      rhs_xpr.bottomRightCorner(blockRows, blockCols).unaryExpr(VectorizationOff());
  dst.bottomRightCorner(blockRows, blockCols).device(threadPoolDevice) =
      rhs_xpr.bottomRightCorner(blockRows, blockCols).unaryExpr(VectorizationOff());
  VERIFY_IS_CWISE_EQUAL(ref.bottomRightCorner(blockRows, blockCols), dst.bottomRightCorner(blockRows, blockCols));

  ref.setZero();
  dst.setZero();
  ref.bottomRightCorner(blockRows, blockCols) =
      rhs_xpr.bottomRightCorner(blockRows, blockCols).unaryExpr(VectorizationOn());
  dst.bottomRightCorner(blockRows, blockCols).device(threadPoolDevice) =
      rhs_xpr.bottomRightCorner(blockRows, blockCols).unaryExpr(VectorizationOn());
  VERIFY_IS_CWISE_EQUAL(ref.bottomRightCorner(blockRows, blockCols), dst.bottomRightCorner(blockRows, blockCols));
}

EIGEN_DECLARE_TEST(test) {
  for (int i = 0; i < g_repeat; i++) {
    CALL_SUBTEST(test_threaded_assignment(MatrixXd(), 123, 123));
    CALL_SUBTEST(test_threaded_assignment(Matrix<float, 16, 16>()));
  }
}