Added benchmarks for full reduction

This commit is contained in:
Benoit Steiner 2016-02-29 14:57:52 -08:00
parent b2075cb7a2
commit 56a3ada670
2 changed files with 25 additions and 2 deletions

View File

@ -297,7 +297,8 @@ template <typename Device, typename T> class BenchmarkSuite {
input_size[0] = k_; input_size[0] = k_;
input_size[1] = n_; input_size[1] = n_;
const TensorMap<Tensor<T, 2, 0, TensorIndex>, Eigen::Aligned> B(b_, input_size); const TensorMap<Tensor<T, 2, 0, TensorIndex>, Eigen::Aligned> B(b_, input_size);
const Eigen::array<TensorIndex, 1> output_size = {{n_}}; Eigen::array<TensorIndex, 1> output_size;
output_size[0] = n_;
TensorMap<Tensor<T, 1, 0, TensorIndex>, Eigen::Aligned> C(c_, output_size); TensorMap<Tensor<T, 1, 0, TensorIndex>, Eigen::Aligned> C(c_, output_size);
#ifndef EIGEN_HAS_INDEX_LIST #ifndef EIGEN_HAS_INDEX_LIST
@ -325,7 +326,8 @@ template <typename Device, typename T> class BenchmarkSuite {
input_size[1] = n_; input_size[1] = n_;
const TensorMap<Tensor<T, 2, 0, TensorIndex>, Eigen::Aligned> B( const TensorMap<Tensor<T, 2, 0, TensorIndex>, Eigen::Aligned> B(
b_, input_size); b_, input_size);
const Eigen::array<TensorIndex, 1> output_size = {{k_}}; Eigen::array<TensorIndex, 1> output_size;
output_size[0] = k_;
TensorMap<Tensor<T, 1, 0, TensorIndex>, Eigen::Aligned> C( TensorMap<Tensor<T, 1, 0, TensorIndex>, Eigen::Aligned> C(
c_, output_size); c_, output_size);
@ -347,6 +349,26 @@ template <typename Device, typename T> class BenchmarkSuite {
finalizeBenchmark(static_cast<int64_t>(k_) * n_ * num_iters); finalizeBenchmark(static_cast<int64_t>(k_) * n_ * num_iters);
} }
// Full reduction
void fullReduction(int num_iters) {
Eigen::array<TensorIndex, 2> input_size;
input_size[0] = k_;
input_size[1] = n_;
const TensorMap<Tensor<T, 2, 0, TensorIndex>, Eigen::Aligned> B(
b_, input_size);
const Eigen::array<TensorIndex, 0> output_size;
TensorMap<Tensor<float, 0, 0, TensorIndex>, Eigen::Aligned> C(
c_, output_size);
StartBenchmarkTiming();
for (int iter = 0; iter < num_iters; ++iter) {
C.device(device_) = B.sum();
}
// Record the number of FLOP executed per second (assuming one operation
// per value)
finalizeBenchmark(static_cast<int64_t>(k_) * n_ * num_iters);
}
// do a contraction which is equivalent to a matrix multiplication // do a contraction which is equivalent to a matrix multiplication
void contraction(int num_iters) { void contraction(int num_iters) {
Eigen::array<TensorIndex, 2> sizeA; Eigen::array<TensorIndex, 2> sizeA;

View File

@ -33,6 +33,7 @@ BM_FuncGPU(algebraicFunc);
BM_FuncGPU(transcendentalFunc); BM_FuncGPU(transcendentalFunc);
BM_FuncGPU(rowReduction); BM_FuncGPU(rowReduction);
BM_FuncGPU(colReduction); BM_FuncGPU(colReduction);
BM_FuncGPU(fullReduction);
// Contractions // Contractions