mirror of
https://gitlab.com/libeigen/eigen.git
synced 2024-12-21 07:19:46 +08:00
Added regression tests for tensor convolutions
This commit is contained in:
parent
68d4afe985
commit
678207e02a
@ -14,15 +14,16 @@
|
||||
using Eigen::Tensor;
|
||||
using Eigen::DefaultDevice;
|
||||
|
||||
template <int DataLayout>
|
||||
static void test_evals()
|
||||
{
|
||||
Tensor<float, 2> input(3, 3);
|
||||
Tensor<float, 1> kernel(2);
|
||||
Tensor<float, 2, DataLayout> input(3, 3);
|
||||
Tensor<float, 1, DataLayout> kernel(2);
|
||||
|
||||
input.setRandom();
|
||||
kernel.setRandom();
|
||||
|
||||
Tensor<float, 2> result(2,3);
|
||||
Tensor<float, 2, DataLayout> result(2,3);
|
||||
result.setZero();
|
||||
Eigen::array<Tensor<float, 2>::Index, 1> dims3({0});
|
||||
|
||||
@ -41,15 +42,15 @@ static void test_evals()
|
||||
VERIFY_IS_APPROX(result(1,2), input(1,2)*kernel(0) + input(2,2)*kernel(1)); // index 5
|
||||
}
|
||||
|
||||
|
||||
template <int DataLayout>
|
||||
static void test_expr()
|
||||
{
|
||||
Tensor<float, 2> input(3, 3);
|
||||
Tensor<float, 2> kernel(2, 2);
|
||||
Tensor<float, 2, DataLayout> input(3, 3);
|
||||
Tensor<float, 2, DataLayout> kernel(2, 2);
|
||||
input.setRandom();
|
||||
kernel.setRandom();
|
||||
|
||||
Tensor<float, 2> result(2,2);
|
||||
Tensor<float, 2, DataLayout> result(2,2);
|
||||
Eigen::array<ptrdiff_t, 2> dims({0, 1});
|
||||
result = input.convolve(kernel, dims);
|
||||
|
||||
@ -63,10 +64,10 @@ static void test_expr()
|
||||
input(2,1)*kernel(1,0) + input(2,2)*kernel(1,1));
|
||||
}
|
||||
|
||||
|
||||
template <int DataLayout>
|
||||
static void test_modes() {
|
||||
Tensor<float, 1> input(3);
|
||||
Tensor<float, 1> kernel(3);
|
||||
Tensor<float, 1, DataLayout> input(3);
|
||||
Tensor<float, 1, DataLayout> kernel(3);
|
||||
input(0) = 1.0f;
|
||||
input(1) = 2.0f;
|
||||
input(2) = 3.0f;
|
||||
@ -74,13 +75,13 @@ static void test_modes() {
|
||||
kernel(1) = 1.0f;
|
||||
kernel(2) = 0.0f;
|
||||
|
||||
const Eigen::array<ptrdiff_t, 1> dims{{0}};
|
||||
const Eigen::array<ptrdiff_t, 1> dims({0});
|
||||
Eigen::array<std::pair<ptrdiff_t, ptrdiff_t>, 1> padding;
|
||||
|
||||
// Emulate VALID mode (as defined in
|
||||
// http://docs.scipy.org/doc/numpy/reference/generated/numpy.convolve.html).
|
||||
padding[0] = std::make_pair(0, 0);
|
||||
Tensor<float, 1> valid(1);
|
||||
Tensor<float, 1, DataLayout> valid(1);
|
||||
valid = input.pad(padding).convolve(kernel, dims);
|
||||
VERIFY_IS_EQUAL(valid.dimension(0), 1);
|
||||
VERIFY_IS_APPROX(valid(0), 2.5f);
|
||||
@ -88,7 +89,7 @@ static void test_modes() {
|
||||
// Emulate SAME mode (as defined in
|
||||
// http://docs.scipy.org/doc/numpy/reference/generated/numpy.convolve.html).
|
||||
padding[0] = std::make_pair(1, 1);
|
||||
Tensor<float, 1> same(3);
|
||||
Tensor<float, 1, DataLayout> same(3);
|
||||
same = input.pad(padding).convolve(kernel, dims);
|
||||
VERIFY_IS_EQUAL(same.dimension(0), 3);
|
||||
VERIFY_IS_APPROX(same(0), 1.0f);
|
||||
@ -98,7 +99,7 @@ static void test_modes() {
|
||||
// Emulate FULL mode (as defined in
|
||||
// http://docs.scipy.org/doc/numpy/reference/generated/numpy.convolve.html).
|
||||
padding[0] = std::make_pair(2, 2);
|
||||
Tensor<float, 1> full(5);
|
||||
Tensor<float, 1, DataLayout> full(5);
|
||||
full = input.pad(padding).convolve(kernel, dims);
|
||||
VERIFY_IS_EQUAL(full.dimension(0), 5);
|
||||
VERIFY_IS_APPROX(full(0), 0.0f);
|
||||
@ -108,18 +109,18 @@ static void test_modes() {
|
||||
VERIFY_IS_APPROX(full(4), 1.5f);
|
||||
}
|
||||
|
||||
|
||||
template <int DataLayout>
|
||||
static void test_strides() {
|
||||
Tensor<float, 1> input(13);
|
||||
Tensor<float, 1> kernel(3);
|
||||
Tensor<float, 1, DataLayout> input(13);
|
||||
Tensor<float, 1, DataLayout> kernel(3);
|
||||
input.setRandom();
|
||||
kernel.setRandom();
|
||||
|
||||
const Eigen::array<ptrdiff_t, 1> dims{{0}};
|
||||
const Eigen::array<ptrdiff_t, 1> stride_of_3{{3}};
|
||||
const Eigen::array<ptrdiff_t, 1> stride_of_2{{2}};
|
||||
const Eigen::array<ptrdiff_t, 1> dims({0});
|
||||
const Eigen::array<ptrdiff_t, 1> stride_of_3({3});
|
||||
const Eigen::array<ptrdiff_t, 1> stride_of_2({2});
|
||||
|
||||
Tensor<float, 1> result;
|
||||
Tensor<float, 1, DataLayout> result;
|
||||
result = input.stride(stride_of_3).convolve(kernel, dims).stride(stride_of_2);
|
||||
|
||||
VERIFY_IS_EQUAL(result.dimension(0), 2);
|
||||
@ -129,13 +130,14 @@ static void test_strides() {
|
||||
input(12)*kernel(2)));
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
void test_cxx11_tensor_convolution()
|
||||
{
|
||||
CALL_SUBTEST(test_evals());
|
||||
CALL_SUBTEST(test_expr());
|
||||
CALL_SUBTEST(test_modes());
|
||||
CALL_SUBTEST(test_strides());
|
||||
CALL_SUBTEST(test_evals<ColMajor>());
|
||||
CALL_SUBTEST(test_evals<RowMajor>());
|
||||
CALL_SUBTEST(test_expr<ColMajor>());
|
||||
CALL_SUBTEST(test_expr<RowMajor>());
|
||||
CALL_SUBTEST(test_modes<ColMajor>());
|
||||
CALL_SUBTEST(test_modes<RowMajor>());
|
||||
CALL_SUBTEST(test_strides<ColMajor>());
|
||||
CALL_SUBTEST(test_strides<RowMajor>());
|
||||
}
|
||||
|
@ -117,11 +117,10 @@ void test_cuda_elementwise()
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void test_cuda_reduction()
|
||||
{
|
||||
Tensor<float, 4> in1(Eigen::array<int, 4>(72,53,97,113));
|
||||
Tensor<float, 2> out(Eigen::array<int, 2>(72,97));
|
||||
Tensor<float, 4> in1(72,53,97,113);
|
||||
Tensor<float, 2> out(72,97);
|
||||
in1.setRandom();
|
||||
|
||||
std::size_t in1_bytes = in1.size() * sizeof(float);
|
||||
@ -138,8 +137,8 @@ void test_cuda_reduction()
|
||||
assert(cudaStreamCreate(&stream) == cudaSuccess);
|
||||
Eigen::GpuDevice gpu_device(&stream);
|
||||
|
||||
Eigen::TensorMap<Eigen::Tensor<float, 4> > gpu_in1(d_in1, Eigen::array<int, 4>(72,53,97,113));
|
||||
Eigen::TensorMap<Eigen::Tensor<float, 2> > gpu_out(d_out, Eigen::array<int, 2>(72,97));
|
||||
Eigen::TensorMap<Eigen::Tensor<float, 4> > gpu_in1(d_in1, 72,53,97,113);
|
||||
Eigen::TensorMap<Eigen::Tensor<float, 2> > gpu_out(d_out, 72,97);
|
||||
|
||||
array<int, 2> reduction_axis;
|
||||
reduction_axis[0] = 1;
|
||||
@ -156,10 +155,10 @@ void test_cuda_reduction()
|
||||
for (int k = 0; k < 53; ++k) {
|
||||
for (int l = 0; l < 113; ++l) {
|
||||
expected =
|
||||
std::max<float>(expected, in1(Eigen::array<int, 4>(i, k, j, l)));
|
||||
std::max<float>(expected, in1(i, k, j, l));
|
||||
}
|
||||
}
|
||||
VERIFY_IS_APPROX(out(Eigen::array<int, 2>(i,j)), expected);
|
||||
VERIFY_IS_APPROX(out(i,j), expected);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -170,7 +169,7 @@ static void test_cuda_contraction()
|
||||
// with these dimensions, the output has 300 * 140 elements, which is
|
||||
// more than 30 * 1024, which is the number of threads in blocks on
|
||||
// a 15 SM GK110 GPU
|
||||
Tensor<float, 4, DataLayout> t_left(Eigen::array<int, 4>(6, 50, 3, 31));
|
||||
Tensor<float, 4, DataLayout> t_left(6, 50, 3, 31);
|
||||
Tensor<float, 5, DataLayout> t_right(Eigen::array<int, 5>(3, 31, 7, 20, 1));
|
||||
Tensor<float, 5, DataLayout> t_result(Eigen::array<int, 5>(6, 50, 7, 20, 1));
|
||||
|
||||
@ -196,12 +195,9 @@ static void test_cuda_contraction()
|
||||
assert(cudaStreamCreate(&stream) == cudaSuccess);
|
||||
Eigen::GpuDevice gpu_device(&stream);
|
||||
|
||||
Eigen::TensorMap<Eigen::Tensor<float, 4, DataLayout> >
|
||||
gpu_t_left(d_t_left, Eigen::array<int, 4>(6, 50, 3, 31));
|
||||
Eigen::TensorMap<Eigen::Tensor<float, 5, DataLayout> >
|
||||
gpu_t_right(d_t_right, Eigen::array<int, 5>(3, 31, 7, 20, 1));
|
||||
Eigen::TensorMap<Eigen::Tensor<float, 5, DataLayout> >
|
||||
gpu_t_result(d_t_result, Eigen::array<int, 5>(6, 50, 7, 20, 1));
|
||||
Eigen::TensorMap<Eigen::Tensor<float, 4, DataLayout> > gpu_t_left(d_t_left, 6, 50, 3, 31);
|
||||
Eigen::TensorMap<Eigen::Tensor<float, 5, DataLayout> > gpu_t_right(d_t_right, 3, 31, 7, 20, 1);
|
||||
Eigen::TensorMap<Eigen::Tensor<float, 5, DataLayout> > gpu_t_result(d_t_result, 6, 50, 7, 20, 1);
|
||||
|
||||
typedef Eigen::Map<Eigen::Matrix<float, Dynamic, Dynamic, DataLayout> > MapXf;
|
||||
MapXf m_left(t_left.data(), 300, 93);
|
||||
@ -226,11 +222,12 @@ static void test_cuda_contraction()
|
||||
}
|
||||
}
|
||||
|
||||
template<int DataLayout>
|
||||
static void test_cuda_convolution_1d()
|
||||
{
|
||||
Tensor<float, 4> input(Eigen::array<int, 4>(74,37,11,137));
|
||||
Tensor<float, 1> kernel(Eigen::array<int, 1>(4));
|
||||
Tensor<float, 4> out(Eigen::array<int, 4>(74,34,11,137));
|
||||
Tensor<float, 4, DataLayout> input(74,37,11,137);
|
||||
Tensor<float, 1, DataLayout> kernel(4);
|
||||
Tensor<float, 4, DataLayout> out(74,34,11,137);
|
||||
input = input.constant(10.0f) + input.random();
|
||||
kernel = kernel.constant(7.0f) + kernel.random();
|
||||
|
||||
@ -252,9 +249,9 @@ static void test_cuda_convolution_1d()
|
||||
assert(cudaStreamCreate(&stream) == cudaSuccess);
|
||||
Eigen::GpuDevice gpu_device(&stream);
|
||||
|
||||
Eigen::TensorMap<Eigen::Tensor<float, 4> > gpu_input(d_input, Eigen::array<int, 4>(74,37,11,137));
|
||||
Eigen::TensorMap<Eigen::Tensor<float, 1> > gpu_kernel(d_kernel, Eigen::array<int, 1>(4));
|
||||
Eigen::TensorMap<Eigen::Tensor<float, 4> > gpu_out(d_out, Eigen::array<int, 4>(74,34,11,137));
|
||||
Eigen::TensorMap<Eigen::Tensor<float, 4, DataLayout> > gpu_input(d_input, 74,37,11,137);
|
||||
Eigen::TensorMap<Eigen::Tensor<float, 1, DataLayout> > gpu_kernel(d_kernel, 4);
|
||||
Eigen::TensorMap<Eigen::Tensor<float, 4, DataLayout> > gpu_out(d_out, 74,34,11,137);
|
||||
|
||||
Eigen::array<int, 1> dims(1);
|
||||
gpu_out.device(gpu_device) = gpu_input.convolve(gpu_kernel, dims);
|
||||
@ -266,11 +263,9 @@ static void test_cuda_convolution_1d()
|
||||
for (int j = 0; j < 34; ++j) {
|
||||
for (int k = 0; k < 11; ++k) {
|
||||
for (int l = 0; l < 137; ++l) {
|
||||
const float result = out(Eigen::array<int, 4>(i,j,k,l));
|
||||
const float expected = input(Eigen::array<int, 4>(i,j+0,k,l)) * kernel(Eigen::array<int, 1>(0)) +
|
||||
input(Eigen::array<int, 4>(i,j+1,k,l)) * kernel(Eigen::array<int, 1>(1)) +
|
||||
input(Eigen::array<int, 4>(i,j+2,k,l)) * kernel(Eigen::array<int, 1>(2)) +
|
||||
input(Eigen::array<int, 4>(i,j+3,k,l)) * kernel(Eigen::array<int, 1>(3));
|
||||
const float result = out(i,j,k,l);
|
||||
const float expected = input(i,j+0,k,l) * kernel(0) + input(i,j+1,k,l) * kernel(1) +
|
||||
input(i,j+2,k,l) * kernel(2) + input(i,j+3,k,l) * kernel(3);
|
||||
VERIFY_IS_APPROX(result, expected);
|
||||
}
|
||||
}
|
||||
@ -278,12 +273,11 @@ static void test_cuda_convolution_1d()
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void test_cuda_convolution_2d()
|
||||
static void test_cuda_convolution_inner_dim_col_major_1d()
|
||||
{
|
||||
Tensor<float, 4> input(Eigen::array<int, 4>(74,37,11,137));
|
||||
Tensor<float, 2> kernel(Eigen::array<int, 2>(3,4));
|
||||
Tensor<float, 4> out(Eigen::array<int, 4>(74,35,8,137));
|
||||
Tensor<float, 4, ColMajor> input(74,9,11,7);
|
||||
Tensor<float, 1, ColMajor> kernel(4);
|
||||
Tensor<float, 4, ColMajor> out(71,9,11,7);
|
||||
input = input.constant(10.0f) + input.random();
|
||||
kernel = kernel.constant(7.0f) + kernel.random();
|
||||
|
||||
@ -305,9 +299,110 @@ static void test_cuda_convolution_2d()
|
||||
assert(cudaStreamCreate(&stream) == cudaSuccess);
|
||||
Eigen::GpuDevice gpu_device(&stream);
|
||||
|
||||
Eigen::TensorMap<Eigen::Tensor<float, 4> > gpu_input(d_input, Eigen::array<int, 4>(74,37,11,137));
|
||||
Eigen::TensorMap<Eigen::Tensor<float, 2> > gpu_kernel(d_kernel, Eigen::array<int, 2>(3,4));
|
||||
Eigen::TensorMap<Eigen::Tensor<float, 4> > gpu_out(d_out, Eigen::array<int, 4>(74,35,8,137));
|
||||
Eigen::TensorMap<Eigen::Tensor<float, 4, ColMajor> > gpu_input(d_input,74,9,11,7);
|
||||
Eigen::TensorMap<Eigen::Tensor<float, 1, ColMajor> > gpu_kernel(d_kernel,4);
|
||||
Eigen::TensorMap<Eigen::Tensor<float, 4, ColMajor> > gpu_out(d_out,71,9,11,7);
|
||||
|
||||
Eigen::array<int, 1> dims(0);
|
||||
gpu_out.device(gpu_device) = gpu_input.convolve(gpu_kernel, dims);
|
||||
|
||||
assert(cudaMemcpyAsync(out.data(), d_out, out_bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess);
|
||||
assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess);
|
||||
|
||||
for (int i = 0; i < 71; ++i) {
|
||||
for (int j = 0; j < 9; ++j) {
|
||||
for (int k = 0; k < 11; ++k) {
|
||||
for (int l = 0; l < 7; ++l) {
|
||||
const float result = out(i,j,k,l);
|
||||
const float expected = input(i+0,j,k,l) * kernel(0) + input(i+1,j,k,l) * kernel(1) +
|
||||
input(i+2,j,k,l) * kernel(2) + input(i+3,j,k,l) * kernel(3);
|
||||
VERIFY_IS_APPROX(result, expected);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void test_cuda_convolution_inner_dim_row_major_1d()
|
||||
{
|
||||
Tensor<float, 4, RowMajor> input(7,9,11,74);
|
||||
Tensor<float, 1, RowMajor> kernel(4);
|
||||
Tensor<float, 4, RowMajor> out(7,9,11,71);
|
||||
input = input.constant(10.0f) + input.random();
|
||||
kernel = kernel.constant(7.0f) + kernel.random();
|
||||
|
||||
std::size_t input_bytes = input.size() * sizeof(float);
|
||||
std::size_t kernel_bytes = kernel.size() * sizeof(float);
|
||||
std::size_t out_bytes = out.size() * sizeof(float);
|
||||
|
||||
float* d_input;
|
||||
float* d_kernel;
|
||||
float* d_out;
|
||||
cudaMalloc((void**)(&d_input), input_bytes);
|
||||
cudaMalloc((void**)(&d_kernel), kernel_bytes);
|
||||
cudaMalloc((void**)(&d_out), out_bytes);
|
||||
|
||||
cudaMemcpy(d_input, input.data(), input_bytes, cudaMemcpyHostToDevice);
|
||||
cudaMemcpy(d_kernel, kernel.data(), kernel_bytes, cudaMemcpyHostToDevice);
|
||||
|
||||
cudaStream_t stream;
|
||||
assert(cudaStreamCreate(&stream) == cudaSuccess);
|
||||
Eigen::GpuDevice gpu_device(&stream);
|
||||
|
||||
Eigen::TensorMap<Eigen::Tensor<float, 4, RowMajor> > gpu_input(d_input, 7,9,11,74);
|
||||
Eigen::TensorMap<Eigen::Tensor<float, 1, RowMajor> > gpu_kernel(d_kernel, 4);
|
||||
Eigen::TensorMap<Eigen::Tensor<float, 4, RowMajor> > gpu_out(d_out, 7,9,11,71);
|
||||
|
||||
Eigen::array<int, 1> dims(3);
|
||||
gpu_out.device(gpu_device) = gpu_input.convolve(gpu_kernel, dims);
|
||||
|
||||
assert(cudaMemcpyAsync(out.data(), d_out, out_bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess);
|
||||
assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess);
|
||||
|
||||
for (int i = 0; i < 7; ++i) {
|
||||
for (int j = 0; j < 9; ++j) {
|
||||
for (int k = 0; k < 11; ++k) {
|
||||
for (int l = 0; l < 71; ++l) {
|
||||
const float result = out(i,j,k,l);
|
||||
const float expected = input(i,j,k,l+0) * kernel(0) + input(i,j,k,l+1) * kernel(1) +
|
||||
input(i,j,k,l+2) * kernel(2) + input(i,j,k,l+3) * kernel(3);
|
||||
VERIFY_IS_APPROX(result, expected);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<int DataLayout>
|
||||
static void test_cuda_convolution_2d()
|
||||
{
|
||||
Tensor<float, 4, DataLayout> input(74,37,11,137);
|
||||
Tensor<float, 2, DataLayout> kernel(3,4);
|
||||
Tensor<float, 4, DataLayout> out(74,35,8,137);
|
||||
input = input.constant(10.0f) + input.random();
|
||||
kernel = kernel.constant(7.0f) + kernel.random();
|
||||
|
||||
std::size_t input_bytes = input.size() * sizeof(float);
|
||||
std::size_t kernel_bytes = kernel.size() * sizeof(float);
|
||||
std::size_t out_bytes = out.size() * sizeof(float);
|
||||
|
||||
float* d_input;
|
||||
float* d_kernel;
|
||||
float* d_out;
|
||||
cudaMalloc((void**)(&d_input), input_bytes);
|
||||
cudaMalloc((void**)(&d_kernel), kernel_bytes);
|
||||
cudaMalloc((void**)(&d_out), out_bytes);
|
||||
|
||||
cudaMemcpy(d_input, input.data(), input_bytes, cudaMemcpyHostToDevice);
|
||||
cudaMemcpy(d_kernel, kernel.data(), kernel_bytes, cudaMemcpyHostToDevice);
|
||||
|
||||
cudaStream_t stream;
|
||||
assert(cudaStreamCreate(&stream) == cudaSuccess);
|
||||
Eigen::GpuDevice gpu_device(&stream);
|
||||
|
||||
Eigen::TensorMap<Eigen::Tensor<float, 4, DataLayout> > gpu_input(d_input,74,37,11,137);
|
||||
Eigen::TensorMap<Eigen::Tensor<float, 2, DataLayout> > gpu_kernel(d_kernel,3,4);
|
||||
Eigen::TensorMap<Eigen::Tensor<float, 4, DataLayout> > gpu_out(d_out,74,35,8,137);
|
||||
|
||||
Eigen::array<int, 2> dims(1,2);
|
||||
gpu_out.device(gpu_device) = gpu_input.convolve(gpu_kernel, dims);
|
||||
@ -319,32 +414,32 @@ static void test_cuda_convolution_2d()
|
||||
for (int j = 0; j < 35; ++j) {
|
||||
for (int k = 0; k < 8; ++k) {
|
||||
for (int l = 0; l < 137; ++l) {
|
||||
const float result = out(Eigen::array<int, 4>(i,j,k,l));
|
||||
const float expected = input(Eigen::array<int, 4>(i,j+0,k+0,l)) * kernel(Eigen::array<int, 2>(0,0)) +
|
||||
input(Eigen::array<int, 4>(i,j+1,k+0,l)) * kernel(Eigen::array<int, 2>(1,0)) +
|
||||
input(Eigen::array<int, 4>(i,j+2,k+0,l)) * kernel(Eigen::array<int, 2>(2,0)) +
|
||||
input(Eigen::array<int, 4>(i,j+0,k+1,l)) * kernel(Eigen::array<int, 2>(0,1)) +
|
||||
input(Eigen::array<int, 4>(i,j+1,k+1,l)) * kernel(Eigen::array<int, 2>(1,1)) +
|
||||
input(Eigen::array<int, 4>(i,j+2,k+1,l)) * kernel(Eigen::array<int, 2>(2,1)) +
|
||||
input(Eigen::array<int, 4>(i,j+0,k+2,l)) * kernel(Eigen::array<int, 2>(0,2)) +
|
||||
input(Eigen::array<int, 4>(i,j+1,k+2,l)) * kernel(Eigen::array<int, 2>(1,2)) +
|
||||
input(Eigen::array<int, 4>(i,j+2,k+2,l)) * kernel(Eigen::array<int, 2>(2,2)) +
|
||||
input(Eigen::array<int, 4>(i,j+0,k+3,l)) * kernel(Eigen::array<int, 2>(0,3)) +
|
||||
input(Eigen::array<int, 4>(i,j+1,k+3,l)) * kernel(Eigen::array<int, 2>(1,3)) +
|
||||
input(Eigen::array<int, 4>(i,j+2,k+3,l)) * kernel(Eigen::array<int, 2>(2,3));
|
||||
VERIFY_IS_APPROX(result, expected);
|
||||
const float result = out(i,j,k,l);
|
||||
const float expected = input(i,j+0,k+0,l) * kernel(0,0) +
|
||||
input(i,j+1,k+0,l) * kernel(1,0) +
|
||||
input(i,j+2,k+0,l) * kernel(2,0) +
|
||||
input(i,j+0,k+1,l) * kernel(0,1) +
|
||||
input(i,j+1,k+1,l) * kernel(1,1) +
|
||||
input(i,j+2,k+1,l) * kernel(2,1) +
|
||||
input(i,j+0,k+2,l) * kernel(0,2) +
|
||||
input(i,j+1,k+2,l) * kernel(1,2) +
|
||||
input(i,j+2,k+2,l) * kernel(2,2) +
|
||||
input(i,j+0,k+3,l) * kernel(0,3) +
|
||||
input(i,j+1,k+3,l) * kernel(1,3) +
|
||||
input(i,j+2,k+3,l) * kernel(2,3);
|
||||
VERIFY_IS_APPROX(result, expected);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template<int DataLayout>
|
||||
static void test_cuda_convolution_3d()
|
||||
{
|
||||
Tensor<float, 5> input(Eigen::array<int, 5>(74,37,11,137,17));
|
||||
Tensor<float, 3> kernel(Eigen::array<int, 3>(3,4,2));
|
||||
Tensor<float, 5> out(Eigen::array<int, 5>(74,35,8,136,17));
|
||||
Tensor<float, 5, DataLayout> input(Eigen::array<int, 5>(74,37,11,137,17));
|
||||
Tensor<float, 3, DataLayout> kernel(3,4,2);
|
||||
Tensor<float, 5, DataLayout> out(Eigen::array<int, 5>(74,35,8,136,17));
|
||||
input = input.constant(10.0f) + input.random();
|
||||
kernel = kernel.constant(7.0f) + kernel.random();
|
||||
|
||||
@ -366,9 +461,9 @@ static void test_cuda_convolution_3d()
|
||||
assert(cudaStreamCreate(&stream) == cudaSuccess);
|
||||
Eigen::GpuDevice gpu_device(&stream);
|
||||
|
||||
Eigen::TensorMap<Eigen::Tensor<float, 5> > gpu_input(d_input, Eigen::array<int, 5>(74,37,11,137,17));
|
||||
Eigen::TensorMap<Eigen::Tensor<float, 3> > gpu_kernel(d_kernel, Eigen::array<int, 3>(3,4,2));
|
||||
Eigen::TensorMap<Eigen::Tensor<float, 5> > gpu_out(d_out, Eigen::array<int, 5>(74,35,8,136,17));
|
||||
Eigen::TensorMap<Eigen::Tensor<float, 5, DataLayout> > gpu_input(d_input,74,37,11,137,17);
|
||||
Eigen::TensorMap<Eigen::Tensor<float, 3, DataLayout> > gpu_kernel(d_kernel,3,4,2);
|
||||
Eigen::TensorMap<Eigen::Tensor<float, 5, DataLayout> > gpu_out(d_out,74,35,8,136,17);
|
||||
|
||||
Eigen::array<int, 3> dims(1,2,3);
|
||||
gpu_out.device(gpu_device) = gpu_input.convolve(gpu_kernel, dims);
|
||||
@ -381,31 +476,31 @@ static void test_cuda_convolution_3d()
|
||||
for (int k = 0; k < 8; ++k) {
|
||||
for (int l = 0; l < 136; ++l) {
|
||||
for (int m = 0; m < 17; ++m) {
|
||||
const float result = out(Eigen::array<int, 5>(i,j,k,l,m));
|
||||
const float expected = input(Eigen::array<int, 5>(i,j+0,k+0,l+0,m)) * kernel(Eigen::array<int, 3>(0,0,0)) +
|
||||
input(Eigen::array<int, 5>(i,j+1,k+0,l+0,m)) * kernel(Eigen::array<int, 3>(1,0,0)) +
|
||||
input(Eigen::array<int, 5>(i,j+2,k+0,l+0,m)) * kernel(Eigen::array<int, 3>(2,0,0)) +
|
||||
input(Eigen::array<int, 5>(i,j+0,k+1,l+0,m)) * kernel(Eigen::array<int, 3>(0,1,0)) +
|
||||
input(Eigen::array<int, 5>(i,j+1,k+1,l+0,m)) * kernel(Eigen::array<int, 3>(1,1,0)) +
|
||||
input(Eigen::array<int, 5>(i,j+2,k+1,l+0,m)) * kernel(Eigen::array<int, 3>(2,1,0)) +
|
||||
input(Eigen::array<int, 5>(i,j+0,k+2,l+0,m)) * kernel(Eigen::array<int, 3>(0,2,0)) +
|
||||
input(Eigen::array<int, 5>(i,j+1,k+2,l+0,m)) * kernel(Eigen::array<int, 3>(1,2,0)) +
|
||||
input(Eigen::array<int, 5>(i,j+2,k+2,l+0,m)) * kernel(Eigen::array<int, 3>(2,2,0)) +
|
||||
input(Eigen::array<int, 5>(i,j+0,k+3,l+0,m)) * kernel(Eigen::array<int, 3>(0,3,0)) +
|
||||
input(Eigen::array<int, 5>(i,j+1,k+3,l+0,m)) * kernel(Eigen::array<int, 3>(1,3,0)) +
|
||||
input(Eigen::array<int, 5>(i,j+2,k+3,l+0,m)) * kernel(Eigen::array<int, 3>(2,3,0)) +
|
||||
input(Eigen::array<int, 5>(i,j+0,k+0,l+1,m)) * kernel(Eigen::array<int, 3>(0,0,1)) +
|
||||
input(Eigen::array<int, 5>(i,j+1,k+0,l+1,m)) * kernel(Eigen::array<int, 3>(1,0,1)) +
|
||||
input(Eigen::array<int, 5>(i,j+2,k+0,l+1,m)) * kernel(Eigen::array<int, 3>(2,0,1)) +
|
||||
input(Eigen::array<int, 5>(i,j+0,k+1,l+1,m)) * kernel(Eigen::array<int, 3>(0,1,1)) +
|
||||
input(Eigen::array<int, 5>(i,j+1,k+1,l+1,m)) * kernel(Eigen::array<int, 3>(1,1,1)) +
|
||||
input(Eigen::array<int, 5>(i,j+2,k+1,l+1,m)) * kernel(Eigen::array<int, 3>(2,1,1)) +
|
||||
input(Eigen::array<int, 5>(i,j+0,k+2,l+1,m)) * kernel(Eigen::array<int, 3>(0,2,1)) +
|
||||
input(Eigen::array<int, 5>(i,j+1,k+2,l+1,m)) * kernel(Eigen::array<int, 3>(1,2,1)) +
|
||||
input(Eigen::array<int, 5>(i,j+2,k+2,l+1,m)) * kernel(Eigen::array<int, 3>(2,2,1)) +
|
||||
input(Eigen::array<int, 5>(i,j+0,k+3,l+1,m)) * kernel(Eigen::array<int, 3>(0,3,1)) +
|
||||
input(Eigen::array<int, 5>(i,j+1,k+3,l+1,m)) * kernel(Eigen::array<int, 3>(1,3,1)) +
|
||||
input(Eigen::array<int, 5>(i,j+2,k+3,l+1,m)) * kernel(Eigen::array<int, 3>(2,3,1));
|
||||
const float result = out(i,j,k,l,m);
|
||||
const float expected = input(i,j+0,k+0,l+0,m) * kernel(0,0,0) +
|
||||
input(i,j+1,k+0,l+0,m) * kernel(1,0,0) +
|
||||
input(i,j+2,k+0,l+0,m) * kernel(2,0,0) +
|
||||
input(i,j+0,k+1,l+0,m) * kernel(0,1,0) +
|
||||
input(i,j+1,k+1,l+0,m) * kernel(1,1,0) +
|
||||
input(i,j+2,k+1,l+0,m) * kernel(2,1,0) +
|
||||
input(i,j+0,k+2,l+0,m) * kernel(0,2,0) +
|
||||
input(i,j+1,k+2,l+0,m) * kernel(1,2,0) +
|
||||
input(i,j+2,k+2,l+0,m) * kernel(2,2,0) +
|
||||
input(i,j+0,k+3,l+0,m) * kernel(0,3,0) +
|
||||
input(i,j+1,k+3,l+0,m) * kernel(1,3,0) +
|
||||
input(i,j+2,k+3,l+0,m) * kernel(2,3,0) +
|
||||
input(i,j+0,k+0,l+1,m) * kernel(0,0,1) +
|
||||
input(i,j+1,k+0,l+1,m) * kernel(1,0,1) +
|
||||
input(i,j+2,k+0,l+1,m) * kernel(2,0,1) +
|
||||
input(i,j+0,k+1,l+1,m) * kernel(0,1,1) +
|
||||
input(i,j+1,k+1,l+1,m) * kernel(1,1,1) +
|
||||
input(i,j+2,k+1,l+1,m) * kernel(2,1,1) +
|
||||
input(i,j+0,k+2,l+1,m) * kernel(0,2,1) +
|
||||
input(i,j+1,k+2,l+1,m) * kernel(1,2,1) +
|
||||
input(i,j+2,k+2,l+1,m) * kernel(2,2,1) +
|
||||
input(i,j+0,k+3,l+1,m) * kernel(0,3,1) +
|
||||
input(i,j+1,k+3,l+1,m) * kernel(1,3,1) +
|
||||
input(i,j+2,k+3,l+1,m) * kernel(2,3,1);
|
||||
VERIFY_IS_APPROX(result, expected);
|
||||
}
|
||||
}
|
||||
@ -414,91 +509,6 @@ static void test_cuda_convolution_3d()
|
||||
}
|
||||
}
|
||||
|
||||
static float* CudaCopyFloat(float* data, int size) {
|
||||
const int nbytes = size * sizeof(float);
|
||||
float* result = NULL;
|
||||
if (cudaMalloc((void**)(&result), nbytes) != cudaSuccess) {
|
||||
return NULL;
|
||||
} else {
|
||||
if (data != NULL) {
|
||||
cudaMemcpy(result, data, nbytes, cudaMemcpyHostToDevice);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
static void test_cuda_constant_broadcast()
|
||||
{
|
||||
cudaStream_t stream;
|
||||
assert(cudaStreamCreate(&stream) == cudaSuccess);
|
||||
Eigen::GpuDevice gpu_device(&stream);
|
||||
|
||||
Tensor<float, 1> t1(10);
|
||||
for (int i = 0; i < 10; ++i) {
|
||||
t1(i) = 10.0f * i;
|
||||
}
|
||||
float* t1_cuda = CudaCopyFloat(t1.data(), t1.size());
|
||||
Eigen::TensorMap<Eigen::Tensor<float, 1> > t1_gpu(t1_cuda, 10);
|
||||
|
||||
Tensor<float, 1> t2(1);
|
||||
t2 = t2.constant(20.0f);
|
||||
float* t2_cuda = CudaCopyFloat(t2.data(), t2.size());
|
||||
Eigen::TensorMap<Eigen::TensorFixedSize<float, Sizes<1> > > t2_gpu(t2_cuda, 1);
|
||||
|
||||
float* t3_cuda = CudaCopyFloat(NULL, 10);
|
||||
Eigen::TensorMap<Eigen::Tensor<float, 1> > t3_gpu(t3_cuda, 10);
|
||||
|
||||
t3_gpu.device(gpu_device) =
|
||||
t1_gpu + t2_gpu.broadcast(Eigen::array<int, 1>(10));
|
||||
|
||||
Eigen::Tensor<float, 1> t3(10);
|
||||
cudaMemcpy(t3.data(), t3_gpu.data(), 10 * sizeof(float),
|
||||
cudaMemcpyDeviceToHost);
|
||||
|
||||
for (int i = 0; i < 10; ++i) {
|
||||
VERIFY_IS_APPROX(t3(i), t1(i) + t2(0));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void test_cuda_cast()
|
||||
{
|
||||
Tensor<double, 3> in(Eigen::array<int, 3>(72,53,97));
|
||||
Tensor<float, 3> out(Eigen::array<int, 3>(72,53,97));
|
||||
in.setRandom();
|
||||
|
||||
std::size_t in_bytes = in.size() * sizeof(double);
|
||||
std::size_t out_bytes = out.size() * sizeof(float);
|
||||
|
||||
double* d_in;
|
||||
float* d_out;
|
||||
cudaMalloc((void**)(&d_in), in_bytes);
|
||||
cudaMalloc((void**)(&d_out), out_bytes);
|
||||
|
||||
cudaMemcpy(d_in, in.data(), in_bytes, cudaMemcpyHostToDevice);
|
||||
|
||||
cudaStream_t stream;
|
||||
assert(cudaStreamCreate(&stream) == cudaSuccess);
|
||||
Eigen::GpuDevice gpu_device(&stream);
|
||||
|
||||
Eigen::TensorMap<Eigen::Tensor<double, 3> > gpu_in(d_in, Eigen::array<int, 3>(72,53,97));
|
||||
Eigen::TensorMap<Eigen::Tensor<float, 3> > gpu_out(d_out, Eigen::array<int, 3>(72,53,97));
|
||||
|
||||
gpu_out.device(gpu_device) = gpu_in.template cast<float>();
|
||||
|
||||
assert(cudaMemcpyAsync(out.data(), d_out, out_bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess);
|
||||
assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess);
|
||||
|
||||
for (int i = 0; i < 72; ++i) {
|
||||
for (int j = 0; j < 53; ++j) {
|
||||
for (int k = 0; k < 97; ++k) {
|
||||
VERIFY_IS_APPROX(out(Eigen::array<int, 3>(i,j,k)), static_cast<float>(in(Eigen::array<int, 3>(i,j,k))));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void test_cxx11_tensor_cuda()
|
||||
{
|
||||
CALL_SUBTEST(test_cuda_elementwise_small());
|
||||
@ -506,9 +516,12 @@ void test_cxx11_tensor_cuda()
|
||||
CALL_SUBTEST(test_cuda_reduction());
|
||||
CALL_SUBTEST(test_cuda_contraction<ColMajor>());
|
||||
CALL_SUBTEST(test_cuda_contraction<RowMajor>());
|
||||
CALL_SUBTEST(test_cuda_convolution_1d());
|
||||
CALL_SUBTEST(test_cuda_convolution_2d());
|
||||
CALL_SUBTEST(test_cuda_convolution_3d());
|
||||
CALL_SUBTEST(test_cuda_constant_broadcast());
|
||||
CALL_SUBTEST(test_cuda_cast());
|
||||
CALL_SUBTEST(test_cuda_convolution_1d<ColMajor>());
|
||||
CALL_SUBTEST(test_cuda_convolution_1d<RowMajor>());
|
||||
CALL_SUBTEST(test_cuda_convolution_inner_dim_col_major_1d());
|
||||
CALL_SUBTEST(test_cuda_convolution_inner_dim_row_major_1d());
|
||||
CALL_SUBTEST(test_cuda_convolution_2d<ColMajor>());
|
||||
CALL_SUBTEST(test_cuda_convolution_2d<RowMajor>());
|
||||
CALL_SUBTEST(test_cuda_convolution_3d<ColMajor>());
|
||||
CALL_SUBTEST(test_cuda_convolution_3d<RowMajor>());
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user