Added a benchmark to measure the performance of full reductions of 16 bit floats

This commit is contained in:
Benoit Steiner 2016-05-05 14:15:11 -07:00
parent 28d5572658
commit f81e413180
2 changed files with 2 additions and 1 deletions

View File

@ -368,7 +368,7 @@ template <typename Device, typename T> class BenchmarkSuite {
const TensorMap<Tensor<T, 2, 0, TensorIndex>, Eigen::Aligned> B(
b_, input_size);
Eigen::array<TensorIndex, 0> output_size;
TensorMap<Tensor<float, 0, 0, TensorIndex>, Eigen::Aligned> C(
TensorMap<Tensor<T, 0, 0, TensorIndex>, Eigen::Aligned> C(
c_, output_size);
StartBenchmarkTiming();

View File

@ -33,6 +33,7 @@ BM_FuncGPU(algebraicFunc);
BM_FuncGPU(transcendentalFunc);
BM_FuncGPU(rowReduction);
BM_FuncGPU(colReduction);
BM_FuncGPU(fullReduction);
// Contractions